diff --git a/.travis.yml b/.travis.yml index 059d2e51..b4bcae51 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,22 +5,27 @@ python: #- "3.2" #- "3.3" #- "3.4" -# command to install dependencies + before_install: - - sudo apt-get install gfortran libblas-dev liblapack-dev mpich2 libmpich2-dev python-dev python-numpy python-scipy python-matplotlib python-nose python-pip - - pip install pyDOE mpi4py + - "export DISPLAY=:99.0" + - "sh -e /etc/init.d/xvfb start" + - sudo apt-get install gfortran libblas-dev liblapack-dev mpich2 libmpich2-dev + - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh + - bash miniconda.sh -b -p $HOME/miniconda + - export PATH="$HOME/miniconda/bin:$PATH" + - conda update --yes conda + +install: + - conda install --yes python=$TRAVIS_PYTHON_VERSION pip numpy scipy nose + - pip install pyDOE mpi4py + - python setup.py install -# install package: - - python setup.py install -# script to run before running tests -before_script: - - mkdir -p shippable/testresults -# command to run tests script: - - nosetests --with-xunit --xunit-file=shippable/testresults/nosetests_mpi4py_serial.xml - - mpirun -n 2 nosetests --with-xunit --xunit-file=shippable/testresults/nosetests_mpi4py_parallel.xml - - pip uninstall -y mpi4py - - nosetests --with-xunit --xunit-file=shippable/testresults/nosetests_nompi4py.xml + - nosetests + - mpirun -n 2 nosetests + - pip uninstall -y mpi4py + - nosetests + # notification settings notifications: email: @@ -34,4 +39,4 @@ notifications: branches: only: - master - + - v2_master diff --git a/README.md b/README.md index 103f2b71..9f8d89dd 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,17 @@ You will need to run sphinx-apidoc AND reinstall BET anytime a new module or met Useful scripts are contained in ``examples/`` +Tests +----- + +To run tests in serial call:: + + nosetests tests + +To run tests in parallel call:: + + mpirun -np NPROC nosetets tests + Dependencies ------------ diff --git a/bet/Comm.py b/bet/Comm.py index 955799c0..4e603bed 100644 --- a/bet/Comm.py +++ b/bet/Comm.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ This module provides a workaround for people without mpi4py installed @@ -18,7 +18,9 @@ def __init__(self): """ Initialization """ + #: size, 1 self.size = 1 + #: rank, 0 self.rank = 0 pass @@ -146,6 +148,11 @@ def Barrier(self): """ pass + def barrier(self): + """ + Does nothing in serial. + """ + pass class MPI_for_no_mpi4py(object): @@ -158,9 +165,13 @@ def __init__(self): """ Initialization """ + #: fake sum self.SUM = None + #: float type self.DOUBLE = float + #: int type self.INT = int + #: bool type self.BOOL = bool try: diff --git a/bet/__init__.py b/bet/__init__.py index c9fdf906..67ff894d 100644 --- a/bet/__init__.py +++ b/bet/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ Butler, Estep, Tavener Method @@ -23,7 +23,10 @@ postProcess :mod:`~bet.postProcess` provides plotting tools and tools to sort samples by probabilities. +sample :mod:`~bet.sample` provides data structures to store sets of samples and + their associated arrays. + """ __all__ = ['sampling', 'calculateP', 'postProcess', 'sensitivity', 'util', - 'Comm'] + 'Comm', 'sample'] diff --git a/bet/calculateP/calculateP.py b/bet/calculateP/calculateP.py index 0643d228..265f706b 100644 --- a/bet/calculateP/calculateP.py +++ b/bet/calculateP/calculateP.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team r""" This module provides methods for calulating the probability measure @@ -6,275 +6,167 @@ * :mod:`~bet.calculateP.prob_emulated` provides a skeleton class and calculates the probability for a set of emulation points. -* :mod:`~bet.calculateP.calculateP.prob_samples_mc` estimates the volumes of - the voronoi cells using MC integration +* :mod:`~bet.calculateP.calculateP.prob_samples_mc` estimates the + probability based on pre-defined volumes. """ from bet.Comm import comm, MPI import numpy as np -import scipy.spatial as spatial import bet.util as util +import bet.sample as samp -def emulate_iid_lebesgue(lam_domain, num_l_emulate): +def emulate_iid_lebesgue(domain, num_l_emulate, globalize=False): """ Parition the parameter space using emulated samples into many voronoi cells. These samples are iid so that we can apply the standard MC assumuption/approximation - :param lam_domain: The domain for each parameter for the model. - :type lam_domain: :class:`~numpy.ndarray` of shape (ndim, 2) + :param domain: The domain for each parameter for the model. + :type domain: :class:`~numpy.ndarray` of shape (ndim, 2) :param num_l_emulate: The number of emulated samples. :type num_l_emulate: int - :rtype: :class:`~numpy.ndarray` of shape (num_l_emulate, ndim) + :rtype: :class:`~bet.sample.voronoi_sample_set` :returns: a set of samples for emulation """ - num_l_emulate = (num_l_emulate/comm.size) + \ - (comm.rank < num_l_emulate%comm.size) - lam_width = lam_domain[:, 1] - lam_domain[:, 0] + num_l_emulate = int((num_l_emulate/comm.size) + \ + (comm.rank < num_l_emulate%comm.size)) + lam_width = domain[:, 1] - domain[:, 0] lambda_emulate = lam_width*np.random.random((num_l_emulate, - lam_domain.shape[0]))+lam_domain[:, 0] - return lambda_emulate + domain.shape[0]))+domain[:, 0] -def prob_emulated(samples, data, rho_D_M, d_distr_samples, - lambda_emulate=None, d_Tree=None): + set_emulated = samp.voronoi_sample_set(dim=domain.shape[0]) + set_emulated._domain = domain + set_emulated._values_local = lambda_emulate + if globalize: + set_emulated.local_to_global() + return set_emulated + +def prob_emulated(discretization, globalize=True): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{emulate}})`, the probability assoicated with a set of voronoi cells defined by ``num_l_emulate`` iid samples :math:`(\lambda_{emulate})`. + This is added to the emulated input sample set object. - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param data: The data from running the model given the samples. - :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) - :param rho_D_M: The simple function approximation of rho_D - :type rho_D_M: :class:`~numpy.ndarray` of shape (M,) - :param d_distr_samples: The samples in the data space that define a - parition of D to for the simple function approximation - :type d_distr_samples: :class:`~numpy.ndarray` of shape (M, mdim) - :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples - :param lambda_emulate: Samples used to partition the parameter space - :type lambda_emulate: :class:`~numpy.ndarray` of shape (num_l_emulate, ndim) - :rtype: tuple - :returns: (P, lambda_emulate, io_ptr, emulate_ptr, lam_vol) - - """ - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - if len(data.shape) == 1: - data = np.expand_dims(data, axis=1) - if lambda_emulate is None: - lambda_emulate = samples - if len(d_distr_samples.shape) == 1: - d_distr_samples = np.expand_dims(d_distr_samples, axis=1) - if d_Tree is None: - d_Tree = spatial.KDTree(d_distr_samples) - - # Determine which inputs go to which M bins using the QoI - (_, io_ptr) = d_Tree.query(data) - - # Determine which emulated samples match with which model run samples - l_Tree = spatial.KDTree(samples) - (_, emulate_ptr) = l_Tree.query(lambda_emulate) - - # Calculate Probabilties - P = np.zeros((lambda_emulate.shape[0],)) - d_distr_emu_ptr = np.zeros(emulate_ptr.shape) - d_distr_emu_ptr = io_ptr[emulate_ptr] - for i in range(rho_D_M.shape[0]): - Itemp = np.equal(d_distr_emu_ptr, i) - Itemp_sum = np.sum(Itemp) - Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) - if Itemp_sum > 0: - P[Itemp] = rho_D_M[i]/Itemp_sum - - return (P, lambda_emulate, io_ptr, emulate_ptr) - -def prob(samples, data, rho_D_M, d_distr_samples, d_Tree=None): - r""" - - Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the - probability assoicated with a set of voronoi cells defined by the model - solves at :math:`(\lambda_{samples})` where the volumes of these voronoi - cells are assumed to be equal under the MC assumption. - - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param data: The data from running the model given the samples. - :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) - :param rho_D_M: The simple function approximation of rho_D - :type rho_D_M: :class:`~numpy.ndarray` of shape (M,) - :param d_distr_samples: The samples in the data space that define a - parition of D to for the simple function approximation - :type d_distr_samples: :class:`~numpy.ndarray` of shape (M, mdim) - :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples - :rtype: tuple of :class:`~numpy.ndarray` of sizes (num_samples,), - (num_samples,), (ndim, num_l_emulate), (num_samples,), (num_l_emulate,) - :returns: (P, lam_vol, io_ptr) where P is the - probability associated with samples, and lam_vol the volumes associated - with the samples, io_ptr a pointer from data to M bins. + :param discretization: An object containing the discretization information. + :type class:`bet.sample.discretization` + :param bool globalize: Makes local variables global. """ - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - if len(data.shape) == 1: - data = np.expand_dims(data, axis=1) - if len(d_distr_samples.shape) == 1: - d_distr_samples = np.expand_dims(d_distr_samples, axis=1) - if d_Tree is None: - d_Tree = spatial.KDTree(d_distr_samples) - - # Set up local arrays for parallelism - local_index = range(0+comm.rank, samples.shape[0], comm.size) - samples_local = samples[local_index, :] - data_local = data[local_index, :] - local_array = np.array(local_index, dtype='int64') - - # Determine which inputs go to which M bins using the QoI - (_, io_ptr) = d_Tree.query(data_local) - # Apply the standard MC approximation and - # calculate probabilities - P_local = np.zeros((samples_local.shape[0],)) - for i in range(rho_D_M.shape[0]): - Itemp = np.equal(io_ptr, i) - Itemp_sum = np.sum(Itemp) - Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) - if Itemp_sum > 0: - P_local[Itemp] = rho_D_M[i]/Itemp_sum - P_global = util.get_global_values(P_local) - global_index = util.get_global_values(local_array) - P = np.zeros(P_global.shape) - P[global_index] = P_global[:] + # Check dimensions + discretization.check_nums() + op_num = discretization._output_probability_set.check_num() + discretization._emulated_input_sample_set.check_num() - lam_vol = (1.0/float(samples.shape[0]))*np.ones((samples.shape[0],)) + # Check for necessary properties + if discretization._io_ptr_local is None: + discretization.set_io_ptr(globalize=True) + if discretization._emulated_ii_ptr_local is None: + discretization.set_emulated_ii_ptr(globalize=False) - return (P, lam_vol, io_ptr) + # Calculate Probabilties + P = np.zeros((discretization._emulated_input_sample_set.\ + _values_local.shape[0],)) + d_distr_emu_ptr = discretization._io_ptr[discretization.\ + _emulated_ii_ptr_local] + for i in range(op_num): + if discretization._output_probability_set._probabilities[i] > 0.0: + Itemp = np.equal(d_distr_emu_ptr, i) + Itemp_sum = np.sum(Itemp) + Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) + if Itemp_sum > 0: + P[Itemp] = discretization._output_probability_set.\ + _probabilities[i]/Itemp_sum + + discretization._emulated_input_sample_set._probabilities_local = P + if globalize: + discretization._emulated_input_sample_set.local_to_global() + pass -def prob_mc(samples, data, rho_D_M, d_distr_samples, - lambda_emulate=None, d_Tree=None): +def prob(discretization): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the - probability assoicated with a set of voronoi cells defined by the model - solves at :math:`(\lambda_{samples})` where the volumes of these voronoi - cells are approximated using MC integration. - - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param data: The data from running the model given the samples. - :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) - :param rho_D_M: The simple function approximation of rho_D - :type rho_D_M: :class:`~numpy.ndarray` of shape (M,) - :param d_distr_samples: The samples in the data space that define a - parition of D to for the simple function approximation - :type d_distr_samples: :class:`~numpy.ndarray` of shape (M, mdim) - :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples - :param lambda_emulate: Samples used to estimate the volumes of the Voronoi - cells associated with ``samples`` + probability assoicated with a set of cells defined by the model + solves at :math:`(\lambda_{samples})` where the volumes of these + cells are provided. - :rtype: tuple of :class:`~numpy.ndarray` of sizes (num_samples,), - (num_samples,), (ndim, num_l_emulate), (num_samples,), (num_l_emulate,) - :returns: (P, lam_vol, lambda_emulate, io_ptr, emulate_ptr) where P is the - probability associated with samples, lam_vol the volumes associated - with the samples, io_ptr a pointer from data to M bins, and emulate_ptr - a pointer from emulated samples to samples (in parameter space) + :param discretization: An object containing the discretization information. + :type class:`bet.sample.discretization` + :param bool globalize: Makes local variables global. """ - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - if len(data.shape) == 1: - data = np.expand_dims(data, axis=1) - if lambda_emulate is None: - lambda_emulate = samples - if len(d_distr_samples.shape) == 1: - d_distr_samples = np.expand_dims(d_distr_samples, axis=1) - if d_Tree is None: - d_Tree = spatial.KDTree(d_distr_samples) - - # Determine which inputs go to which M bins using the QoI - (_, io_ptr) = d_Tree.query(data) - - # Determine which emulated samples match with which model run samples - l_Tree = spatial.KDTree(samples) - (_, emulate_ptr) = l_Tree.query(lambda_emulate) - lam_vol, lam_vol_local, local_index = estimate_volume(samples, - lambda_emulate) + # Check Dimensions + discretization.check_nums() + op_num = discretization._output_probability_set.check_num() - local_array = np.array(local_index, dtype='int64') - data_local = data[local_index, :] - samples_local = samples[local_index, :] - - - # Determine which inputs go to which M bins using the QoI - (_, io_ptr_local) = d_Tree.query(data_local) + # Check for necessary attributes + if discretization._io_ptr_local is None: + discretization.set_io_ptr(globalize=False) # Calculate Probabilities - P_local = np.zeros((samples_local.shape[0],)) - for i in range(rho_D_M.shape[0]): - Itemp = np.equal(io_ptr_local, i) - Itemp_sum = np.sum(lam_vol_local[Itemp]) - Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) - if Itemp_sum > 0: - P_local[Itemp] = rho_D_M[i]*lam_vol_local[Itemp]/Itemp_sum - P_global = util.get_global_values(P_local) - global_index = util.get_global_values(local_array) - P = np.zeros(P_global.shape) - P[global_index] = P_global[:] - return (P, lam_vol, lambda_emulate, io_ptr, emulate_ptr) + if discretization._input_sample_set._values_local is None: + discretization._input_sample_set.global_to_local() + P_local = np.zeros((len(discretization._io_ptr_local),)) + for i in range(op_num): + if discretization._output_probability_set._probabilities[i] > 0.0: + Itemp = np.equal(discretization._io_ptr_local, i) + Itemp_sum = np.sum(discretization._input_sample_set.\ + _volumes_local[Itemp]) + Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) + if Itemp_sum > 0: + P_local[Itemp] = discretization._output_probability_set.\ + _probabilities[i]*discretization._input_sample_set.\ + _volumes_local[Itemp]/Itemp_sum + + discretization._input_sample_set._probabilities = util.\ + get_global_values(P_local) + discretization._input_sample_set._probabilities_local = P_local -def estimate_volume(samples, lambda_emulate=None): +def prob_mc(discretization): r""" - Estimate the volume fraction of the Voronoi cells associated with - ``samples`` using ``lambda_emulate`` as samples for Monte Carlo - integration. Specifically we are estimating - :math:`\mu_\Lambda(\mathcal(V)_{i,N} \cap A)/\mu_\Lambda(\Lambda)`. - - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param lambda_emulate: Samples used to partition the parameter space - :type lambda_emulate: :class:`~numpy.ndarray` of shape (num_l_emulate, ndim) - - :rtype: tuple - :returns: (lam_vol, lam_vol_local, local_index) where ``lam_vol`` is the - global array of volume fractions, ``lam_vol_local`` is the local array - of volume fractions, and ``local_index`` a list of the global indices - for local arrays on this particular processor ``lam_vol_local = - lam_vol[local_index]`` - - """ - - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - if lambda_emulate is None: - lambda_emulate = samples - - # Determine which emulated samples match with which model run samples - l_Tree = spatial.KDTree(samples) - (_, emulate_ptr) = l_Tree.query(lambda_emulate) + Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the + probability associated with a set of cells defined by the model + solves at :math:`(\lambda_{samples})` where the volumes are calculated + with the given emulated input points. - # Apply the standard MC approximation to determine the number of emulated - # samples per model run sample. This is for approximating - # \mu_Lambda(A_i \intersect b_j) - lam_vol = np.zeros((samples.shape[0],)) - for i in range(samples.shape[0]): - lam_vol[i] = np.sum(np.equal(emulate_ptr, i)) - clam_vol = np.copy(lam_vol) - comm.Allreduce([lam_vol, MPI.DOUBLE], [clam_vol, MPI.DOUBLE], op=MPI.SUM) - lam_vol = clam_vol - num_emulated = lambda_emulate.shape[0] - num_emulated = comm.allreduce(num_emulated, op=MPI.SUM) - lam_vol = lam_vol/(num_emulated) + :param discretization: An object containing the discretization information. + :type class:`bet.sample.discretization` + :param globalize: Makes local variables global. + :type bool - # Set up local arrays for parallelism - local_index = range(0+comm.rank, samples.shape[0], comm.size) - lam_vol_local = lam_vol[local_index] + """ - return (lam_vol, lam_vol_local, local_index) + # Check Dimensions + num = discretization.check_nums() + discretization._output_probability_set.check_num() + if discretization._output_probability_set._values_local is None: + discretization._output_probability_set.global_to_local() + if discretization._emulated_input_sample_set._values_local is None: + discretization._emulated_input_sample_set.global_to_local() + + # Calculate Volumes + (_, emulate_ptr) = discretization._input_sample_set.query(discretization.\ + _emulated_input_sample_set._values_local) + vol = np.zeros((num,)) + for i in range(num): + vol[i] = np.sum(np.equal(emulate_ptr, i)) + cvol = np.copy(vol) + comm.Allreduce([vol, MPI.DOUBLE], [cvol, MPI.DOUBLE], op=MPI.SUM) + vol = cvol + num_l_emulate = discretization._emulated_input_sample_set.\ + _values_local.shape[0] + num_l_emulate = comm.allreduce(num_l_emulate, op=MPI.SUM) + vol = vol/float(num_l_emulate) + discretization._input_sample_set._volumes = vol + discretization._input_sample_set.global_to_local() + + return prob(discretization) - diff --git a/bet/calculateP/simpleFunP.py b/bet/calculateP/simpleFunP.py index 27afb5e3..c5eec536 100644 --- a/bet/calculateP/simpleFunP.py +++ b/bet/calculateP/simpleFunP.py @@ -1,33 +1,42 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ -This module provides methods for creating simple funciton approximations to be -used by :mod:`~bet.calculateP.calculateP`. +This module provides methods for creating simple function approximations to be +used by :mod:`~bet.calculateP.calculateP`. These simple function approximations +are returned as `bet.sample.sample_set` objects. """ from bet.Comm import comm, MPI import numpy as np -import scipy.spatial as spatial import bet.calculateP.voronoiHistogram as vHist -import collections +import collections, logging import bet.util as util +import bet.sample as samp -def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6): +class wrong_argument_type(Exception): + """ + Exception for when the argument for data_set is not one of the acceptible + types. + """ + + +def uniform_partition_uniform_distribution_rectangle_size(data_set, Q_ref, + rect_size, M=50, + num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D}}` where :math:`\rho_{\mathcal{D}}` is a uniform probability density on - a generalized rectangle centered at Q_ref. - The support of this density is defined by bin_ratio, which determines - the size of the generalized rectangle by scaling the circumscribing - generalized rectangle of :math:`\mathcal{D}`. - The simple function approximation is then defined by determining M + a generalized rectangle centered at ``Q_ref``. + The support of this density is defined by ``rect_size``, which determines + the size of the generalized rectangle. + The simple function approximation is then defined by determining ``M`` Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These - bins are only implicitly defined by M samples in :math:`\mathcal{D}`. - Finally, the probabilities of each of these bins is computed by - sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor - searches to bin these samples in the M implicitly defined bins. + bins are only implicitly defined by ``M`` samples in :math:`\mathcal{D}`. + Finally, the probabilities of each of these bins is computed by + sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor + searches to bin these samples in the ``M`` implicitly defined bins. The result is the simple function approximation denoted by :math:`\rho_{\mathcal{D},M}`. - + Note that all computations in the measure-theoretic framework that follow from this are for the fixed simple function approximation :math:`\rho_{\mathcal{D},M}`. @@ -36,42 +45,58 @@ def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6): :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. - :param bin_ratio: The ratio used to determine the width of the - uniform distributiion as ``bin_size = (data_max-data_min)*bin_ratio`` - :type bin_ratio: double or list() + :param rect_size: Determines the size of the support of the + uniform distribution on a generalized rectangle + :type rect_size: double or list() :param int num_d_emulate: Number of samples used to emulate using an MC - assumption - :param data: Array containing QoI data where the QoI is mdim - diminsional - :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) + assumption + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` + or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` :param Q_ref: :math:`Q(`\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) - - :rtype: tuple - :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and - ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is - the :class:`~scipy.spatial.KDTree` for d_distr_samples - + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation """ - data = util.fix_dimensions_data(data) - bin_size = (np.max(data, 0) - np.min(data, 0))*bin_ratio + if isinstance(data_set, samp.sample_set_base): + data_set.check_num() + dim = data_set._dim + values = data_set._values + elif isinstance(data_set, samp.discretization): + data_set.check_nums() + dim = data_set._output_sample_set._dim + values = data_set._output_sample_set._values + elif isinstance(data_set, np.ndarray): + data_set.shape[0] + dim = data_set.shape[1] + values = data_set + else: + msg = "The first argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) + if not isinstance(rect_size, collections.Iterable): + rect_size = rect_size * np.ones((dim,)) + if np.any(np.less_equal(rect_size, 0)): + msg = 'rect_size must be greater than 0' + raise wrong_argument_type(msg) r''' - Create M samples defining M Voronoi cells (i.e., "bins") in D used to + Create M samples defining M Voronoi cells (i.e., "bins") in D used to define the simple function approximation :math:`\rho_{\mathcal{D},M}`. - + This does not have to be random, but here we assume this to be the case. We can choose these samples deterministically but that fails to scale with dimension efficiently. - + Note that these M samples are chosen for the sole purpose of determining the bins used to create the approximation to :math:`rho_{\mathcal{D}}`. - + We call these M samples "d_distr_samples" because they are samples on the data space and the distr implies these samples are chosen to create the approximation to the probability measure (distribution) on D. - + Note that we create these samples in a set containing the hyperrectangle in order to get output cells with zero probability. If all of the d_dstr_samples were taken from within the support of @@ -79,13 +104,19 @@ def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6): probability. This would in turn imply that the support of :math:`\rho_{\Lambda}` is all of :math:`\Lambda`. ''' + if comm.rank == 0: - d_distr_samples = 1.5*bin_size*(np.random.random((M, - data.shape[1]))-0.5)+Q_ref + d_distr_samples = 1.5 * rect_size * (np.random.random((M, + dim)) - 0.5) + Q_ref else: - d_distr_samples = np.empty((M, data.shape[1])) + d_distr_samples = np.empty((M, dim)) comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) + # Initialize sample set object + s_set = samp.voronoi_sample_set(dim) + s_set.set_values(d_distr_samples) + s_set.set_kdtree() + r''' Compute probabilities in the M bins used to define :math:`\rho_{\mathcal{D},M}` by Monte Carlo approximations @@ -94,25 +125,25 @@ def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6): :math:`\rho_{\mathcal{D}}`. ''' # Generate the samples from :math:`\rho_{\mathcal{D}}` - num_d_emulate = int(num_d_emulate/comm.size)+1 - d_distr_emulate = bin_size*(np.random.random((num_d_emulate, - data.shape[1]))-0.5) + Q_ref + num_d_emulate = int(num_d_emulate / comm.size) + 1 + d_distr_emulate = rect_size * (np.random.random((num_d_emulate, + dim)) - 0.5) + Q_ref # Bin these samples using nearest neighbor searches - d_Tree = spatial.KDTree(d_distr_samples) - (_, k) = d_Tree.query(d_distr_emulate) + (_, k) = s_set.query(d_distr_emulate) + count_neighbors = np.zeros((M,), dtype=np.int) for i in range(M): count_neighbors[i] = np.sum(np.equal(k, i)) - # Use the binning to define :math:`\rho_{\mathcal{D},M}` ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], - op=MPI.SUM) + op=MPI.SUM) count_neighbors = ccount_neighbors rho_D_M = count_neighbors.astype(np.float64) / \ - float(num_d_emulate*comm.size) + float(num_d_emulate * comm.size) + s_set.set_probabilities(rho_D_M) ''' NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples @@ -121,30 +152,393 @@ def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6): can then be stored and accessed later by the algorithm using a completely different set of parameter samples and model solves. ''' - return (rho_D_M, d_distr_samples, d_Tree) + if isinstance(data_set, samp.discretization): + data_set._output_probability_set = s_set + return s_set -def normal_normal(Q_ref, M, std, num_d_emulate=1E6): +def uniform_partition_uniform_distribution_rectangle_scaled(data_set, Q_ref, + rect_scale=0.2, M=50, + num_d_emulate=1E6): + r""" + Creates a simple function approximation of :math:`\rho_{\mathcal{D}}` + where :math:`\rho_{\mathcal{D}}` is a uniform probability density on + a generalized rectangle centered at ``Q_ref``. + The support of this density is defined by ``rect_scale``, which determines + the size of the generalized rectangle by scaling the circumscribing + generalized rectangle of :math:`\mathcal{D}`. + The simple function approximation is then defined by determining ``M `` + Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These + bins are only implicitly defined by ``M`` samples in :math:`\mathcal{D}`. + Finally, the probabilities of each of these bins is computed by + sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor + searches to bin these samples in the ``M`` implicitly defined bins. + The result is the simple function approximation denoted by + :math:`\rho_{\mathcal{D},M}`. + + Note that all computations in the measure-theoretic framework that + follow from this are for the fixed simple function approximation + :math:`\rho_{\mathcal{D},M}`. + + :param int M: Defines number M samples in D used to define + :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - + play around with it and you can get reasonable results with a + relatively small number here like 50. + :param rect_scale: The scale used to determine the support of the + uniform distribution as ``rect_size = (data_max-data_min)*rect_scale`` + :type rect_scale: double or list() + :param int num_d_emulate: Number of samples used to emulate using an MC + assumption + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` + :param Q_ref: :math:`Q(`\lambda_{reference})` + :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation + """ + + if isinstance(data_set, samp.sample_set_base): + num = data_set.check_num() + dim = data_set._dim + values = data_set._values + elif isinstance(data_set, samp.discretization): + num = data_set.check_nums() + dim = data_set._output_sample_set._dim + values = data_set._output_sample_set._values + elif isinstance(data_set, np.ndarray): + num = data_set.shape[0] + dim = data_set.shape[1] + values = data_set + else: + msg = "The first argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) + + rect_size = (np.max(values, 0) - np.min(values, 0))*rect_scale + + return uniform_partition_uniform_distribution_rectangle_size(data_set, Q_ref, + rect_size, M, + num_d_emulate) + +def uniform_partition_uniform_distribution_rectangle_domain(data_set, rect_domain, + M=50, num_d_emulate=1E6): + r""" + Creates a simple function approximation of :math:`\rho_{\mathcal{D}}` + where :math:`\rho_{\mathcal{D}}` is a uniform probability density on + a generalized rectangle defined by ``rect_domain``. + The simple function approximation is then defined by determining ``M`` + Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These + bins are only implicitly defined by ``M ``samples in :math:`\mathcal{D}`. + Finally, the probabilities of each of these bins is computed by + sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor + searches to bin these samples in the ``M`` implicitly defined bins. + The result is the simple function approximation denoted by + :math:`\rho_{\mathcal{D},M}`. + + Note that all computations in the measure-theoretic framework that + follow from this are for the fixed simple function approximation + :math:`\rho_{\mathcal{D},M}`. + + :param int M: Defines number M samples in D used to define + :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - + play around with it and you can get reasonable results with a + relatively small number here like 50. + :param rect_domain: The support of the density + :type rect_domain: double or list() + :param int num_d_emulate: Number of samples used to emulate using an MC + assumption + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` + :param Q_ref: :math:`Q(`\lambda_{reference})` + :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) + + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation + """ + + # make sure the shape of the data and the domain are correct + if isinstance(data_set, samp.sample_set_base): + data_set.check_num() + values = data_set._values + elif isinstance(data_set, samp.discretization): + num = data_set.check_nums() + dim = data_set._output_sample_set._dim + values = data_set._output_sample_set._values + elif isinstance(data_set, np.ndarray): + data_set.shape[0] + values = data_set + else: + msg = "The first argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) + + data = values + rect_domain = util.fix_dimensions_data(rect_domain, data.shape[1]) + domain_center = np.mean(rect_domain, 0) + domain_lengths = np.max(rect_domain, 0) - np.min(rect_domain, 0) + + + return uniform_partition_uniform_distribution_rectangle_size(data_set, + domain_center, domain_lengths, M, num_d_emulate) + + +def regular_partition_uniform_distribution_rectangle_size(data_set, Q_ref, rect_size, + center_pts_per_edge=1): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` - where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability - density centered at Q_ref with standard deviation std using M bins sampled - from the given normal distribution. + where :math:`\rho_{\mathcal{D},M}` is a uniform probability density + centered at ``Q_ref`` with ``rect_size`` of the width of a hyperrectangle. + + Since rho_D is a uniform distribution on a hyperrectanlge we should be able + to represent it exactly with ``M = 3^mdim`` or rather + ``len(d_distr_samples) == 3^mdim``. + + :param rect_size: The size used to determine the width of the uniform + distribution + :type rect_size: double or list() + :param int num_d_emulate: Number of samples used to emulate using an MC + assumption + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` + :param Q_ref: :math:`Q(\lambda_{reference})` + :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) + :param list() center_pts_per_edge: number of center points per edge + and additional two points will be added to create the bounding layer + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation + + """ + + if isinstance(data_set, samp.sample_set_base): + data_set.check_num() + dim = data_set._dim + values = data_set._values + elif isinstance(data_set, samp.discretization): + data_set.check_nums() + dim = data_set._output_sample_set._dim + values = data_set._output_sample_set._values + elif isinstance(data_set, np.ndarray): + data_set.shape[0] + dim = data_set.shape[1] + values = data_set + else: + msg = "The first argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) + + data = values + + if not isinstance(center_pts_per_edge, collections.Iterable): + center_pts_per_edge = np.ones((dim,)) * center_pts_per_edge + else: + if not len(center_pts_per_edge) == dim: + center_pts_per_edge = np.ones((dim,)) + msg = 'center_pts_per_edge dimension mismatch.' + msg += 'Using 1 in each dimension.' + logging.warning(msg) + if np.any(np.less_equal(center_pts_per_edge, 0)): + msg = 'center_pts_per_edge must be greater than 0' + raise wrong_argument_type(msg) + if not isinstance(rect_size, collections.Iterable): + rect_size = rect_size * np.ones((dim,)) + if np.any(np.less_equal(rect_size, 0)): + msg = 'rect_size must be greater than 0' + raise wrong_argument_type(msg) + + sur_domain = np.array([np.min(data, 0), np.max(data, 0)]).transpose() + + points, _, rect_domain = vHist.center_and_layer1_points_binsize \ + (center_pts_per_edge, Q_ref, rect_size, sur_domain) + edges = vHist.edges_regular(center_pts_per_edge, rect_domain, sur_domain) + _, volumes, _ = vHist.histogramdd_volumes(edges, points) + s_set = vHist.simple_fun_uniform(points, volumes, rect_domain) + + if isinstance(data_set, samp.discretization): + data_set._output_probability_set = s_set + return s_set + + +def regular_partition_uniform_distribution_rectangle_domain(data_set, + rect_domain, center_pts_per_edge=1): + r""" + Creates a simple function appoximation of :math:`\rho_{\mathcal{D},M}` + where :math:`\rho{\mathcal{D}, M}` is a uniform probablity density over the + hyperrectangular domain specified by ``rect_domain``. + + Since :math:`\rho_\mathcal{D}` is a uniform distribution on a + hyperrectangle we should we able to represent it exactly with + :math:`M=3^{m}` where m is the dimension of the data space or rather + ``len(d_distr_samples) == 3**mdim``. + + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` + :param rect_domain: The domain overwhich :math:`\rho_\mathcal{D}` is + uniform. + :type rect_domain: :class:`numpy.ndarray` of shape (2, mdim) + :param list() center_pts_per_edge: number of center points per edge and + additional two points will be added to create the bounding layer + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation + + """ + # make sure the shape of the data and the domain are correct + if isinstance(data_set, samp.sample_set_base): + num = data_set.check_num() + dim = data_set._dim + values = data_set._values + elif isinstance(data_set, samp.discretization): + num = data_set.check_nums() + dim = data_set._output_sample_set._dim + values = data_set._output_sample_set._values + elif isinstance(data_set, np.ndarray): + num = data_set.shape[0] + dim = data_set.shape[1] + values = data_set + else: + msg = "The first argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) + + data = values + rect_domain = util.fix_dimensions_data(rect_domain, data.shape[1]) + domain_center = np.mean(rect_domain, 0) + domain_lengths = np.max(rect_domain, 0) - np.min(rect_domain, 0) + + return regular_partition_uniform_distribution_rectangle_size(data_set, + domain_center, domain_lengths, center_pts_per_edge) + +def regular_partition_uniform_distribution_rectangle_scaled(data_set, Q_ref, + rect_scale, center_pts_per_edge=1): + r""" + Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` + where :math:`\rho_{\mathcal{D},M}` is a uniform probability density + centered at ``Q_ref`` with ``rect_scale`` of the width + of D. + + Since rho_D is a uniform distribution on a hyperrectanlge we should be able + to represent it exactly with ``M = 3^mdim`` or rather + ``len(d_distr_samples) == 3^mdim``. + + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` + :param rect_scale: The scale used to determine the width of the + uniform distributiion as ``rect_size = (data_max-data_min)*rect_scale`` + :type rect_scale: double or list() + :param int num_d_emulate: Number of samples used to emulate using an MC + assumption + :param Q_ref: :math:`Q(\lambda_{reference})` + :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) + :param list() center_pts_per_edge: number of center points per edge and + additional two points will be added to create the bounding layer + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation + + """ + if isinstance(data_set, samp.sample_set_base): + num = data_set.check_num() + dim = data_set._dim + values = data_set._values + elif isinstance(data_set, samp.discretization): + num = data_set.check_nums() + dim = data_set._output_sample_set._dim + values = data_set._output_sample_set._values + elif isinstance(data_set, np.ndarray): + num = data_set.shape[0] + dim = data_set.shape[1] + values = data_set + else: + msg = "The first argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) + + data = values + + if not isinstance(rect_scale, collections.Iterable): + rect_scale = rect_scale*np.ones((dim, )) + + rect_size = (np.max(data, 0) - np.min(data, 0))*rect_scale + return regular_partition_uniform_distribution_rectangle_size(data_set, Q_ref, + rect_size, center_pts_per_edge) + +def uniform_partition_uniform_distribution_data_samples(data_set): + r""" + Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` + where :math:`\rho_{\mathcal{D},M}` is a uniform probability density over + the entire ``data_domain``. Here the ``data_domain`` is the union of + voronoi cells defined by ``data``. In other words we assign each sample the + same probability, so ``M = len(data)`` or rather ``len(d_distr_samples) == + len(data)``. The purpose of this method is to approximate uniform + distributions over irregularly shaped domains. + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` + or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` + :param list() center_pts_per_edge: number of center points per edge and + additional two points will be added to create the bounding layer + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation + """ + if isinstance(data_set, samp.sample_set_base): + num = data_set.check_num() + dim = data_set._dim + values = data_set._values + s_set = data_set.copy() + elif isinstance(data_set, samp.discretization): + num = data_set.check_nums() + dim = data_set._output_sample_set._dim + values = data_set._output_sample_set._values + s_set = data_set._output_sample_set.copy() + elif isinstance(data_set, np.ndarray): + num = data_set.shape[0] + dim = data_set.shape[1] + values = data_set + s_set = samp.sample_set(dim=dim) + s_set.set_values(values) + else: + msg = "The first argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) + + s_set.set_probabilities(np.ones((num,), dtype=np.float)/num) + + if isinstance(data_set, samp.discretization): + data_set._output_probability_set = s_set + return s_set + + +def normal_partition_normal_distribution(data_set, Q_ref, std, M, num_d_emulate=1E6): + r""" + Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` + where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability + density centered at ``Q_ref`` with standard deviation ``std`` using + ``M`` bins sampled from the given normal distribution. + + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a - relatively small number here like 50. + relatively small number here like 50. :param int num_d_emulate: Number of samples used to emulate using an MC - assumption + assumption :param Q_ref: :math:`Q(\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :param std: The standard deviation of each QoI :type std: :class:`~numpy.ndarray` of size (mdim,) - - :rtype: tuple - :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and - ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is - the :class:`~scipy.spatial.KDTree` for d_distr_samples + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defining simple function approximation """ import scipy.stats as stats @@ -156,7 +550,7 @@ def normal_normal(Q_ref, M, std, num_d_emulate=1E6): if not isinstance(std, collections.Iterable): std = np.array([std]) - covariance = std**2 + covariance = std ** 2 d_distr_samples = np.zeros((M, len(Q_ref))) print "d_distr_samples.shape", d_distr_samples.shape @@ -165,268 +559,238 @@ def normal_normal(Q_ref, M, std, num_d_emulate=1E6): if comm.rank == 0: for i in range(len(Q_ref)): - d_distr_samples[:, i] = np.random.normal(Q_ref[i], std[i], M) + d_distr_samples[:, i] = np.random.normal(Q_ref[i], std[i], M) comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) - + # Initialize sample set object + s_set = samp.voronoi_sample_set(len(Q_ref)) + s_set.set_values(d_distr_samples) + s_set.set_kdtree() + r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling from rho_D First generate samples of rho_D - I sometimes call this emulation''' - num_d_emulate = int(num_d_emulate/comm.size)+1 + num_d_emulate = int(num_d_emulate / comm.size) + 1 d_distr_emulate = np.zeros((num_d_emulate, len(Q_ref))) for i in range(len(Q_ref)): d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i], - num_d_emulate) + num_d_emulate) - # Now bin samples of rho_D in the M bins of D to compute rho_{D, M} + # Now bin samples of rho_D in the M bins of D to compute rho_{D, M} if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) - d_Tree = spatial.KDTree(d_distr_samples) - (_, k) = d_Tree.query(d_distr_emulate) + (_, k) = s_set.query(d_distr_emulate) count_neighbors = np.zeros((M,), dtype=np.int) volumes = np.zeros((M,)) for i in range(M): Itemp = np.equal(k, i) count_neighbors[i] = np.sum(Itemp) - volumes[i] = np.sum(1.0/stats.multivariate_normal.pdf\ - (d_distr_emulate[Itemp, :], Q_ref, covariance)) + volumes[i] = np.sum(1.0 / stats.multivariate_normal.pdf \ + (d_distr_emulate[Itemp, :], Q_ref, covariance)) # Now define probability of the d_distr_samples # This together with d_distr_samples defines :math:`\rho_{\mathcal{D},M}` ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], - op=MPI.SUM) + op=MPI.SUM) count_neighbors = ccount_neighbors cvolumes = np.copy(volumes) comm.Allreduce([volumes, MPI.DOUBLE], [cvolumes, MPI.DOUBLE], op=MPI.SUM) volumes = cvolumes - rho_D_M = count_neighbors.astype(np.float64)*volumes - rho_D_M = rho_D_M/np.sum(rho_D_M) - + rho_D_M = count_neighbors.astype(np.float64) * volumes + rho_D_M = rho_D_M / np.sum(rho_D_M) + s_set.set_probabilities(rho_D_M) + s_set.set_volumes(volumes) + # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples # above, while informed by the sampling of the map Q, do not require # solving the model EVER! This can be done "offline" so to speak. - return (rho_D_M, d_distr_samples, d_Tree) + if isinstance(data_set, samp.discretization): + data_set._output_probability_set = s_set + return s_set + -def unif_normal(Q_ref, M, std, num_d_emulate=1E6): +def uniform_partition_normal_distribution(data_set, Q_ref, std, M, num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability - density centered at Q_ref with standard deviation std using M bins sampled - from a uniform distribution with a size 4 standard deviations in each - direction. + density centered at ``Q_ref`` with standard deviation ``std`` using + ``M`` bins sampled from a uniform distribution with a size 4 standard + deviations in each direction. + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. :param int num_d_emulate: Number of samples used to emulate using an MC - assumption + assumption :param Q_ref: :math:`Q(\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :param std: The standard deviation of each QoI :type std: :class:`~numpy.ndarray` of size (mdim,) - - :rtype: tuple - :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and - ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is - the :class:`~scipy.spatial.KDTree` for d_distr_samples + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation """ - r'''Create M smaples defining M bins in D used to define + r'''Create M samples defining M bins in D used to define :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal distribution with mean Q_ref and standard deviation std.''' + if not isinstance(Q_ref, collections.Iterable): + Q_ref = np.array([Q_ref]) + if not isinstance(std, collections.Iterable): + std = np.array([std]) - bin_size = 4.0*std + bin_size = 4.0 * std d_distr_samples = np.zeros((M, len(Q_ref))) if comm.rank == 0: - d_distr_samples = bin_size*(np.random.random((M, - len(Q_ref)))-0.5)+Q_ref + d_distr_samples = bin_size * (np.random.random((M, + len(Q_ref))) - 0.5) + Q_ref comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) - + # Initialize sample set object + s_set = samp.voronoi_sample_set(len(Q_ref)) + s_set.set_values(d_distr_samples) + s_set.set_kdtree() + r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling from rho_D First generate samples of rho_D - I sometimes call this - emulation''' - num_d_emulate = int(num_d_emulate/comm.size)+1 + emulation''' + num_d_emulate = int(num_d_emulate / comm.size) + 1 d_distr_emulate = np.zeros((num_d_emulate, len(Q_ref))) for i in range(len(Q_ref)): - d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i], - num_d_emulate) + d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i], + num_d_emulate) - # Now bin samples of rho_D in the M bins of D to compute rho_{D, M} + # Now bin samples of rho_D in the M bins of D to compute rho_{D, M} if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) - d_Tree = spatial.KDTree(d_distr_samples) - (_, k) = d_Tree.query(d_distr_emulate) + (_, k) = s_set.query(d_distr_emulate) count_neighbors = np.zeros((M,), dtype=np.int) - #volumes = np.zeros((M,)) + # volumes = np.zeros((M,)) for i in range(M): Itemp = np.equal(k, i) count_neighbors[i] = np.sum(Itemp) - + r'''Now define probability of the d_distr_samples This together with d_distr_samples defines :math:`\rho_{\mathcal{D},M}`''' ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], - op=MPI.SUM) + op=MPI.SUM) count_neighbors = ccount_neighbors - rho_D_M = count_neighbors.astype(np.float64)/float(comm.size*num_d_emulate) - + rho_D_M = count_neighbors.astype(np.float64) / float(comm.size * num_d_emulate) + s_set.set_probabilities(rho_D_M) # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples # above, while informed by the sampling of the map Q, do not require # solving the model EVER! This can be done "offline" so to speak. - return (rho_D_M, d_distr_samples, d_Tree) + if isinstance(data_set, samp.discretization): + data_set._output_probability_set = s_set + return s_set -def uniform_hyperrectangle_user(data, domain, center_pts_per_edge=1): +def user_partition_user_distribution(data_set, data_partition_set, + data_distribution_set): r""" - Creates a simple funciton appoximation of :math:`\rho_{\mathcal{D},M}` - where :math:`\rho{\mathcal{D}, M}` is a uniform probablity density over the - hyperrectangular domain specified by domain. - - Since :math:`\rho_\mathcal{D}` is a uniform distribution on a - hyperrectangle we should we able to represent it exactly with - :math:`M=3^{m}` where m is the dimension of the data space or rather - ``len(d_distr_samples) == 3**mdim``. - - :param data: Array containing QoI data where the QoI is mdim diminsional - :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) - :param domain: The domain overwhich :math:`\rho_\mathcal{D}` is - uniform. - :type domain: :class:`numpy.ndarray` of shape (2, mdim) - :param list() center_pts_per_edge: number of center points per edge and - additional two points will be added to create the bounding layer - - :rtype: tuple - :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and - ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` - is the :class:`~scipy.spatial.KDTree` for d_distr_samples - - """ - # make sure the shape of the data and the domain are correct - data = util.fix_dimensions_data(data) - domain = util.fix_dimensions_data(domain, data.shape[1]) - domain_center = np.mean(domain, 0) - domain_lengths = np.max(domain, 0) - np.min(domain, 0) - - return uniform_hyperrectangle_binsize(data, domain_center, domain_lengths, - center_pts_per_edge) - -def uniform_hyperrectangle_binsize(data, Q_ref, bin_size, - center_pts_per_edge=1): - r""" - Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` - where :math:`\rho_{\mathcal{D},M}` is a uniform probability density - centered at Q_ref with bin_size of the width of D. - - Since rho_D is a uniform distribution on a hyperrectanlge we should be able - to represent it exactly with ``M = 3^mdim`` or rather - ``len(d_distr_samples) == 3^mdim``. - - :param bin_size: The size used to determine the width of the uniform - distribution - :type bin_size: double or list() - :param int num_d_emulate: Number of samples used to emulate using an MC - assumption - :param data: Array containing QoI data where the QoI is mdim diminsional - :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) - :param Q_ref: :math:`Q(\lambda_{reference})` - :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) - :param list() center_pts_per_edge: number of center points per edge - and additional two points will be added to create the bounding layer - - :rtype: tuple - :returns: (rho_D_M, d_distr_samples, d_Tree) where - ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) - :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` - for d_distr_samples - + Creates a user defined simple function approximation of a user + defined distribution. The simple function discretization is + specified in the ``data_partition_set``, and the set of i.i.d. + samples from the distribution is specified in the + ``data_distribution_set``. + + :param data_set: Sample set that the probability measure is defined for. + :type data_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` + :param data_partition_set: Sample set defining the discretization + of the data space into Voronoi cells for which a simple function + is defined upon. + :type data_partition_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` + :param data_distribution_set: Sample set containing the i.i.d. samples + from the distribution on the data space that are binned within the + Voronoi cells implicitly defined by the data_discretization_set. + :type data_distribution_set: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` + + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation """ - data = util.fix_dimensions_data(data) - - if not isinstance(center_pts_per_edge, collections.Iterable): - center_pts_per_edge = np.ones((data.shape[1],)) * center_pts_per_edge + if isinstance(data_set, samp.sample_set_base): + s_set = data_set.copy() + dim = s_set._dim + elif isinstance(data_set, samp.discretization): + s_set = data_set._output_sample_set.copy() + dim = s_set._dim + elif isinstance(data_set, np.ndarray): + dim = data_set.shape[1] + values = data_set + s_set = samp.sample_set(dim=dim) + s_set.set_values(values) else: - if not len(center_pts_per_edge) == data.shape[1]: - center_pts_per_edge = np.ones((data.shape[1],)) - print 'Warning: center_pts_per_edge dimension mismatch.' - print 'Using 1 in each dimension.' - if np.any(np.less(center_pts_per_edge, 0)): - print 'Warning: center_pts_per_edge must be greater than 0' - if not isinstance(bin_size, collections.Iterable): - bin_size = bin_size*np.ones((data.shape[1],)) - if np.any(np.less(bin_size, 0)): - print 'Warning: center_pts_per_edge must be greater than 0' - - sur_domain = np.array([np.min(data, 0), np.max(data, 0)]).transpose() - - points, _, rect_domain = vHist.center_and_layer1_points_binsize\ - (center_pts_per_edge, Q_ref, bin_size, sur_domain) - edges = vHist.edges_regular(center_pts_per_edge, rect_domain, sur_domain) - _, volumes, _ = vHist.histogramdd_volumes(edges, points) - return vHist.simple_fun_uniform(points, volumes, rect_domain) - -def uniform_hyperrectangle(data, Q_ref, bin_ratio, center_pts_per_edge=1): - r""" - Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` - where :math:`\rho_{\mathcal{D},M}` is a uniform probability density - centered at Q_ref with bin_ratio of the width - of D. - - Since rho_D is a uniform distribution on a hyperrectanlge we should be able - to represent it exactly with ``M = 3^mdim`` or rather - ``len(d_distr_samples) == 3^mdim``. - - :param bin_ratio: The ratio used to determine the width of the - uniform distributiion as ``bin_size = (data_max-data_min)*bin_ratio`` - :type bin_ratio: double or list() - :param int num_d_emulate: Number of samples used to emulate using an MC - assumption - :param data: Array containing QoI data where the QoI is mdim diminsional - :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) - :param Q_ref: :math:`Q(\lambda_{reference})` - :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) - :param list() center_pts_per_edge: number of center points per edge and - additional two points will be added to create the bounding layer - - :rtype: tuple - :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and - ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` - is the :class:`~scipy.spatial.KDTree` for d_distr_samples + msg = "The first argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) + + if isinstance(data_partition_set, samp.sample_set_base): + M = data_partition_set.check_num() + d_distr_samples = data_partition_set._values + dim_simpleFun = d_distr_samples.shape[1] + elif isinstance(data_partition_set, samp.discretization): + M = data_partition_set.check_nums() + d_distr_samples = data_partition_set._output_sample_set._values + dim_simpleFun = d_distr_samples.shape[1] + elif isinstance(data_partition_set, np.ndarray): + M = data_partition_set.shape[0] + dim_simpleFun = data_partition_set.shape[1] + d_distr_samples = data_partition_set + else: + msg = "The second argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) + + if isinstance(data_distribution_set, samp.sample_set_base): + d_distr_emulate = data_distribution_set._values + dim_MonteCarlo = d_distr_emulate.shape[1] + num_d_emulate = data_distribution_set.check_num() + elif isinstance(data_distribution_set, samp.discretization): + d_distr_emulate = data_distribution_set._output_sample_set._values + dim_MonteCarlo = d_distr_emulate.shape[1] + num_d_emulate = data_distribution_set.check_nums() + elif isinstance(data_distribution_set, np.ndarray): + num_d_emulate = data_distribution_set.shape[0] + dim_MonteCarlo = data_distribution_set.shape[1] + d_distr_emulate = data_distribution_set + else: + msg = "The second argument must be of type bet.sample.sample_set, " + msg += "bet.sample.discretization or np.ndarray" + raise wrong_argument_type(msg) - """ - data = util.fix_dimensions_data(data) + if np.not_equal(dim_MonteCarlo, dim) or np.not_equal(dim_simpleFun, dim): + msg = "The argument types have conflicting dimensions" + raise wrong_argument_type(msg) - if not isinstance(bin_ratio, collections.Iterable): - bin_ratio = bin_ratio*np.ones((data.shape[1], )) + # Initialize sample set object + s_set = samp.sample_set(dim) + s_set.set_values(d_distr_samples) + s_set.set_kdtree() - bin_size = (np.max(data, 0) - np.min(data, 0))*bin_ratio - return uniform_hyperrectangle_binsize(data, Q_ref, bin_size, - center_pts_per_edge) + (_, k) = s_set.query(d_distr_emulate) -def uniform_data(data): - r""" - Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` - where :math:`\rho_{\mathcal{D},M}` is a uniform probability density over - the entire ``data_domain``. Here the ``data_domain`` is the union of - voronoi cells defined by ``data``. In other words we assign each sample the - same probability, so ``M = len(data)`` or rather ``len(d_distr_samples) == - len(data)``. The purpose of this method is to approximate uniform - distributions over irregularly shaped domains. - - :param data: Array containing QoI data where the QoI is mdim diminsional - :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) - :param list() center_pts_per_edge: number of center points per edge and - additional two points will be added to create the bounding layer + count_neighbors = np.zeros((M,), dtype=np.int) + for i in range(M): + count_neighbors[i] = np.sum(np.equal(k, i)) - :rtype: tuple - :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and - ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` - is the :class:`~scipy.spatial.KDTree` for d_distr_samples - """ - data = util.fix_dimensions_data(data) + # Use the binning to define :math:`\rho_{\mathcal{D},M}` + ccount_neighbors = np.copy(count_neighbors) + comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], + op=MPI.SUM) + count_neighbors = ccount_neighbors + rho_D_M = count_neighbors.astype(np.float64) / \ + float(num_d_emulate * comm.size) + s_set.set_probabilities(rho_D_M) - d_distr_prob = np.ones((data.shape[0],), dtype=np.float)/data.shape[0] - d_Tree = spatial.KDTree(data) - return (d_distr_prob, data, d_Tree) + if isinstance(data_set, samp.discretization): + data_set._output_probability_set = s_set + return s_set diff --git a/bet/calculateP/voronoiHistogram.py b/bet/calculateP/voronoiHistogram.py index 3900e393..864f8181 100644 --- a/bet/calculateP/voronoiHistogram.py +++ b/bet/calculateP/voronoiHistogram.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team # -*- coding: utf-8 -*- """ @@ -7,9 +7,10 @@ volumes of these cells. """ +import logging import numpy as np -from scipy import spatial import bet.util as util +import bet.sample as samp def center_and_layer1_points_binsize(center_pts_per_edge, center, r_size, sur_domain): @@ -50,7 +51,7 @@ def center_and_layer1_points_binsize(center_pts_per_edge, center, r_size, if np.any(np.greater(r_size, rect_width)): msg = "The hyperrectangle defined by this size extends outside the " msg += "original domain." - print msg + logging.warning(msg) # determine the locations of the points for the 1st bounding layer layer1_left = rect_domain[:, 0]-rect_width/(2*center_pts_per_edge) @@ -101,7 +102,7 @@ def center_and_layer1_points(center_pts_per_edge, center, r_ratio, sur_domain): if np.all(np.greater(r_ratio, 1)): msg = "The hyperrectangle defined by this ratio is larger than the" msg += " original domain." - print msg + logging.warning(msg) # determine r_size from the width of the surrounding domain r_size = r_ratio*(sur_domain[:, 1]-sur_domain[:, 0]) @@ -141,11 +142,11 @@ def edges_regular(center_pts_per_edge, rect_domain, sur_domain): if np.any(np.greater_equal(sur_domain[:, 0], rect_domain[:, 0])): msg = "The hyperrectangle defined by this size is larger than the" msg += " original domain." - print msg + logging.warning(msg) elif np.any(np.less_equal(sur_domain[:, 1], rect_domain[:, 1])): msg = "The hyperrectangle defined by this size is larger than the" msg += " original domain." - print msg + logging.warning(msg) rect_edges = list() rect_and_sur_edges = list() @@ -242,10 +243,8 @@ def simple_fun_uniform(points, volumes, rect_domain): hyperrectangle of uniform probability :type rect_domain: :class:`numpy.ndarray` of shape (mdim, 2) - :rtype: tuple - :returns: (rho_D_M, points, d_Tree) where ``rho_D_M`` and - ``points`` are (mdim, M) :class:`~numpy.ndarray` and - `d_Tree` is the :class:`~scipy.spatial.KDTree` for points + :rtype: :class:`~bet.sample.voronoi_sample_set` + :returns: sample_set object defininng simple function approximation """ util.fix_dimensions_data(points) @@ -256,6 +255,8 @@ def simple_fun_uniform(points, volumes, rect_domain): rho_D_M = np.zeros(volumes.shape) # normalize on Lambda not D rho_D_M[inside] = volumes[inside]/np.sum(volumes[inside]) - d_Tree = spatial.KDTree(points) - return (rho_D_M, points, d_Tree) + s_set = samp.voronoi_sample_set(dim=points.shape[1]) + s_set.set_values(points) + s_set.set_probabilities(rho_D_M) + return s_set diff --git a/bet/postProcess/plotDomains.py b/bet/postProcess/plotDomains.py index 84ef4d9b..d8222021 100644 --- a/bet/postProcess/plotDomains.py +++ b/bet/postProcess/plotDomains.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ This module provides methods used to plot two-dimensional domains and/or @@ -9,13 +9,14 @@ import matplotlib.tri as tri import numpy as np import matplotlib.pyplot as plt -#plt.rc('text', usetex=True) -#plt.rc('font', family='serif') +# plt.rc('text', usetex=True) +# plt.rc('font', family='serif') from matplotlib.lines import Line2D from itertools import combinations from mpl_toolkits.mplot3d import Axes3D import bet.util as util import os +import bet.sample as sample markers = [] for m in Line2D.markers: @@ -27,22 +28,35 @@ colors = ('b', 'g', 'r', 'c', 'm', 'y', 'k') -def scatter_2D(samples, sample_nos=None, color=None, p_ref=None, save=True, - interactive=False, xlabel='x', ylabel='y', - filename='scatter2d'): + +class dim_not_matching(Exception): """ - Creates a two-dimensional scatter plot of ``samples`` colored by ``color`` - (usually an array of pointwise probability density values). A reference - ``sample`` (``p_ref``) can be chosen by the user. This reference ``sample`` - will be plotted as a mauve circle twice the size of the other markers. - - :param samples: Samples to plot. These are the locations in the x-axis and - y-axis. - :type samples: :class:`numpy.ndarray` - :param list sample_nos: indicies of the ``samples`` to plot - :param color: values to color the ``samples`` by + Exception for when the dimension is inconsistent. + """ + + +class bad_object(Exception): + """ + Exception for when the wrong type of object is used. + """ + + +def scatter_2D(sample_obj, sample_nos=None, color=None, p_ref=None, save=True, + interactive=False, xlabel='x', ylabel='y', + filename='scatter2d'): + r""" + Creates a two-dimensional scatter plot of the samples within the sample + object colored by ``color`` (usually an array of pointwise probability + density values). A reference sample (``p_ref``) can be chosen by the user. + This reference sample will be plotted as a mauve circle twice the size of + the other markers. + + :param sample_obj: contains samples to create scatter plot + :type sample_obj: :class:`~bet.sample.sample_set_base` + :param list sample_nos: indicies of the samples to plot + :param color: values to color the samples by :type color: :class:`numpy.ndarray` - :param p_ref: reference parameter(``sample``) value + :param p_ref: reference parameter value :type p_ref: :class:`numpy.ndarray` of shape (ndim,) :param bool save: flag whether or not to save the figure :param bool interactive: flag whether or not to show the figure @@ -51,22 +65,29 @@ def scatter_2D(samples, sample_nos=None, color=None, p_ref=None, save=True, :param string filename: filename to save the figure as """ + if not isinstance(sample_obj, sample.sample_set_base): + raise bad_object("Improper sample object") + # check dimension of data to plot + if sample_obj.get_dim() != 2: + raise dim_not_matching("Cannot create 2D plot of non-2D sample " + "object") + # plot all of the samples by default if sample_nos is None: - sample_nos = np.arange(samples.shape[0]) + sample_nos = np.arange(sample_obj.get_values().shape[0]) # color all of the samples uniformly by default and set the default # to the default colormap of matplotlib if color is None: - color = np.ones((samples.shape[0],)) + color = np.ones((sample_obj.get_values().shape[0],)) cmap = None else: cmap = plt.cm.PuBu markersize = 75 color = color[sample_nos] # create the scatter plot for the samples specified by sample_nos - plt.scatter(samples[sample_nos, 0], samples[sample_nos, 1], c=color, - s=markersize, - alpha=.75, linewidth=.1, cmap=cmap) + plt.scatter(sample_obj.get_values()[sample_nos, 0], + sample_obj.get_values()[sample_nos, 1], + c=color, s=markersize, alpha=.75, linewidth=.1, cmap=cmap) # add a colorbar and label for the colorbar usually we just assume the # samples are colored by the pointwise probability density on the data # space @@ -74,35 +95,35 @@ def scatter_2D(samples, sample_nos=None, color=None, p_ref=None, save=True, cbar.set_label(r'$\rho_\mathcal{D}(q)$') # if there is a reference value plot it with a notiable marker if p_ref is not None: - plt.scatter(p_ref[0], p_ref[1], c='m', s=2*markersize) + plt.scatter(p_ref[0], p_ref[1], c='m', s=2 * markersize) if save: plt.autoscale(tight=True) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.savefig(filename, bbox_inches='tight', transparent=True, - pad_inches=0) + pad_inches=0) if interactive: plt.show() else: plt.close() -def scatter_3D(samples, sample_nos=None, color=None, p_ref=None, save=True, + +def scatter_3D(sample_obj, sample_nos=None, color=None, p_ref=None, save=True, interactive=False, xlabel='x', ylabel='y', zlabel='z', filename="scatter3d"): - """ - Creates a three-dimensional scatter plot of ``samples`` colored by - ``color`` (usually an array of pointwise probability density values). A - reference ``sample`` (``p_ref``) can be chosen by the user. This reference - ``sample`` will be plotted as a mauve circle twice the size of the other - markers. - - :param samples: Samples to plot. These are the locations in the x-axis, - y-axis, and z-axis. - :type samples: :class:`numpy.ndarray` - :param list sample_nos: indicies of the ``samples`` to plot - :param color: values to color the ``samples`` by + r""" + Creates a three-dimensional scatter plot of samples within the sample + object colored by ``color`` (usually an array of pointwise probability + density values). A reference sample (``p_ref``) can be chosen by the user. + This reference sample will be plotted as a mauve circle twice the size of + the other markers. + + :param sample_obj: Object containing the samples to plot + :type sample_obj: :class:`~bet.sample.sample_set_base` + :param list sample_nos: indicies of the samples to plot + :param color: values to color the samples by :type color: :class:`numpy.ndarray` - :param p_ref: reference parameter(``sample``) value + :param p_ref: reference parameter value :type p_ref: :class:`numpy.ndarray` of shape (ndim,) :param bool save: flag whether or not to save the figure :param bool interactive: flag whether or not to show the figure @@ -112,14 +133,20 @@ def scatter_3D(samples, sample_nos=None, color=None, p_ref=None, save=True, :param string filename: filename to save the figure as """ - + if not isinstance(sample_obj, sample.sample_set_base): + raise bad_object("Improper sample object") + # check dimension of data to plot + if sample_obj.get_dim() != 3: + raise dim_not_matching("Cannot create 3D plot of non-3D sample " + "object") + # plot all of the samples by default if sample_nos is None: - sample_nos = np.arange(samples.shape[0]) + sample_nos = np.arange(sample_obj.get_values().shape[0]) # color all of the samples uniformly by default and set the default # to the default colormap of matplotlib if color is None: - color = np.ones((samples.shape[0],)) + color = np.ones((sample_obj.get_values().shape[0],)) cmap = None else: cmap = plt.cm.PuBu @@ -128,42 +155,43 @@ def scatter_3D(samples, sample_nos=None, color=None, p_ref=None, save=True, # create the scatter plot for the samples specified by sample_nos fig = plt.figure() ax = fig.add_subplot(111, projection='3d') - p = ax.scatter(samples[sample_nos, 0], samples[sample_nos, 1], - samples[sample_nos, 2], alpha=.75, linewidth=.1, c=color, - s=markersize, - cmap=cmap) + p = ax.scatter(sample_obj.get_values()[sample_nos, 0], + sample_obj.get_values()[sample_nos, 1], + sample_obj.get_values()[sample_nos, 2], + alpha=.75, linewidth=.1, c=color, s=markersize, cmap=cmap) # add a colorbar and label for the colorbar usually we just assume the # samples are colored by the pointwise probability density on the data # space cbar = fig.colorbar(p) - cbar.set_label(r'$\rho_\mathcal{D}(q)$') + cbar.set_label(r'$\rho_\mathcal{D}(q)$') # if there is a reference value plot it with a notiable marker if p_ref is not None: - ax.scatter(p_ref[0], p_ref[1], p_ref[2], c='m', s=2*markersize) + ax.scatter(p_ref[0], p_ref[1], p_ref[2], c='m', s=2 * markersize) ax.autoscale(tight=True) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_zlabel(zlabel) if save: plt.savefig(filename, bbox_inches='tight', transparent=True, - pad_inches=0) + pad_inches=0) if interactive: plt.show() else: plt.close() - -def show_param(samples, data, rho_D=None, p_ref=None, sample_nos=None, - save=True, interactive=False, lnums=None, showdim=None): + + +def show_param(sample_disc, rho_D=None, p_ref=None, sample_nos=None, + save=True, interactive=False, lnums=None, showdim=None): r""" - Create scatter plots of ``samples`` colored by ``color`` (usually - an array of pointwise probability density values). A reference ``sample`` - (``p_ref``) can be chosen by the user. This reference ``sample`` will be - plotted as a mauve circle twice the size of the other markers. - - :param samples: Samples to plot - :type samples: :class:`numpy.ndarray` - :param data: Data value(s) associated with ``samples`` - :type data: :class:`numpy.ndarray` + Create scatter plots of samples within the sample object colored by + ``color`` (usually an array of pointwise probability density values). A + reference sample (``p_ref``) can be chosen by the user. This reference + sample will be plotted as a mauve circle twice the size of the other + markers. + + :param sample_disc: Object containing the samples to plot + :type sample_disc: :class:`~bet.sample.discretization` + or :class:`~bet.sample.sample_set_base` :param list sample_nos: sample numbers to plot :param rho_D: probability density function on D :type rho_D: callable function that takes a :class:`np.array` and returns a @@ -183,54 +211,67 @@ def show_param(samples, data, rho_D=None, p_ref=None, sample_nos=None, # If there is density function given determine the pointwise probability # values of each sample based on the value in the data space. Otherwise, # color the samples in numerical order. - if rho_D is not None and data is not None: - rD = rho_D(data) + if isinstance(sample_disc, sample.discretization) and rho_D is not None: + rD = rho_D(sample_disc._output_sample_set.get_values()) + sample_obj = sample_disc._input_sample_set else: - rD = np.ones(samples.shape[0]) + if isinstance(sample_disc, sample.discretization): + sample_obj = sample_disc._input_sample_set + elif isinstance(sample_disc, sample.sample_set_base): + sample_obj = sample_disc + else: + raise bad_object("Improper sample object") + rD = np.ones(sample_obj.get_values().shape[0]) # If no specific coordinate numbers are given for the parameter coordinates # (e.g. i, where \lambda_i is a coordinate in the parameter space), then # set them to be the the counting numbers. if lnums is None: - lnums = 1+np.array(range(samples.shape[1])) + lnums = 1 + np.array(range(sample_obj.get_values().shape[1])) # Create the labels based on the user selected parameter coordinates - xlabel = r'$\lambda_{'+str(lnums[0])+'}$' - ylabel = r'$\lambda_{'+str(lnums[1])+'}$' + xlabel = r'$\lambda_{' + str(lnums[0]) + '}$' + ylabel = r'$\lambda_{' + str(lnums[1]) + '}$' savename = 'param_samples_cs.eps' # Plot 2 or 3 dimensional scatter plots of the samples colored by rD. - if samples.shape[1] == 2: - scatter_2D(samples, sample_nos, rD, p_ref, save, interactive, xlabel, - ylabel, savename) - elif samples.shape[1] == 3: - zlabel = r'$\lambda_{'+str(lnums[2])+'}$' - scatter_3D(samples, sample_nos, rD, p_ref, save, interactive, xlabel, - ylabel, zlabel, savename) - elif samples.shape[1] > 2 and showdim == 2: + if sample_obj.get_dim() == 2: + scatter_2D(sample_obj, sample_nos, rD, p_ref, save, + interactive, xlabel, ylabel, savename) + elif sample_obj.get_dim() == 3: + zlabel = r'$\lambda_{' + str(lnums[2]) + '}$' + scatter_3D(sample_obj, sample_nos, rD, p_ref, save, + interactive, xlabel, ylabel, zlabel, savename) + elif sample_obj.get_dim() > 2 and showdim == 2: + temp_obj = sample.sample_set(2) for x, y in combinations(lnums, 2): - xlabel = r'$\lambda_{'+str(x)+'}$' - ylabel = r'$\lambda_{'+str(y)+'}$' - savename = 'param_samples_l'+str(x)+'l'+str(y)+'_cs.eps' - scatter_2D(samples[:, [x-1, y-1]], sample_nos, rD, p_ref, save, - interactive, xlabel, ylabel, savename) - elif samples.shape[1] > 3 and showdim == 3: + xlabel = r'$\lambda_{' + str(x) + '}$' + ylabel = r'$\lambda_{' + str(y) + '}$' + savename = 'param_samples_l' + str(x) + 'l' + str(y) + '_cs.eps' + temp_obj.set_values(sample_obj.get_values()[:, [x - 1, y - 1]]) + scatter_2D(temp_obj, sample_nos, rD, p_ref, save, + interactive, xlabel, ylabel, savename) + elif sample_obj.get_dim() > 3 and showdim == 3: + temp_obj = sample.sample_set(3) for x, y, z in combinations(lnums, 3): - xlabel = r'$\lambda_{'+str(x)+'}$' - ylabel = r'$\lambda_{'+str(y)+'}$' - zlabel = r'$\lambda_{'+str(z)+'}$' - savename = 'param_samples_l'+str(x)+'l'+str(y)+'l'+str(z)+'_cs.eps' - scatter_3D(samples[:, [x-1, y-1, z-1]], sample_nos, rD, p_ref, save, - interactive, xlabel, ylabel, zlabel, savename) - -def show_data(data, rho_D=None, Q_ref=None, sample_nos=None, - save=True, interactive=False, Q_nums=None, showdim=None): + xlabel = r'$\lambda_{' + str(x) + '}$' + ylabel = r'$\lambda_{' + str(y) + '}$' + zlabel = r'$\lambda_{' + str(z) + '}$' + savename = 'param_samples_l' + str(x) + 'l' + str(y) + 'l' + str(z) + \ + '_cs.eps' + temp_obj.set_values(sample_obj.get_values()[:, [x - 1, y - 1, z - 1]]) + scatter_3D(temp_obj, sample_nos, rD, p_ref, save, + interactive, xlabel, ylabel, zlabel, savename) + + +def show_data(sample_obj, rho_D=None, Q_ref=None, sample_nos=None, + save=True, interactive=False, Q_nums=None, showdim=None): r""" - Create scatter plots of ``data`` colored by ``color`` (usually - an array of pointwise probability density values). A reference ``data`` - point (``Q_ref``) can be chosen by the user. This reference ``data`` will - be plotted as a mauve circle twice the size of the other markers. - - :param data: Data (the data associated with a given set of samples in the - data space) - :type data: :class:`numpy.ndarray` + Create scatter plots of data within the sample_obj colored by ``color`` + (usually an array of pointwise probability density values). + A reference datum point (``Q_ref``) can be chosen by the user. + This reference datum is plotted as a mauve circle twice the size of + the other markers. + + :param sample_obj: Object containing the samples to plot + :type sample_obj: :class:`~bet.sample.sample_set_base` :param list sample_nos: sample numbers to plot :param rho_D: probability density on D :type rho_D: callable function that takes a :class:`np.array` and returns a @@ -246,73 +287,83 @@ def show_data(data, rho_D=None, Q_ref=None, sample_nos=None, pairwise or tripletwise data sample scatter plots in 2 or 3 dimensions - """ + """ + if isinstance(sample_obj, sample.discretization): + sample_obj = sample_obj._output_sample_set + # If there is density function given determine the pointwise probability # values of each sample based on the value in the data space. Otherwise, # color the samples in numerical order. if rho_D != None: - rD = rho_D(data) + rD = rho_D(sample_obj.get_values()) else: - rD = np.ones(data.shape[0]) + rD = np.ones(sample_obj.get_values().shape[0]) # If no specific coordinate numbers are given for the data coordinates # (e.g. i, where \q_i is a coordinate in the data space), then # set them to be the the counting numbers. if Q_nums is None: - Q_nums = range(data.shape[1]) + Q_nums = range(sample_obj.get_dim()) # Create the labels based on the user selected data coordinates - xlabel = r'$q_{'+str(Q_nums[0]+1)+'}$' - ylabel = r'$q_{'+str(Q_nums[1]+1)+'}$' + xlabel = r'$q_{' + str(Q_nums[0] + 1) + '}$' + ylabel = r'$q_{' + str(Q_nums[1] + 1) + '}$' savename = 'data_samples_cs.eps' # Plot 2 or 3 dimensional scatter plots of the data colored by rD. - if data.shape[1] == 2: + if sample_obj.get_dim() == 2: q_ref = None if isinstance(Q_ref, np.ndarray): q_ref = Q_ref[Q_nums[:2]] - scatter_2D(data, sample_nos, rD, q_ref, save, interactive, xlabel, - ylabel, savename) - elif data.shape[1] == 3: - zlabel = r'$q_{'+str(Q_nums[2]+1)+'}$' + scatter_2D(sample_obj, sample_nos, rD, q_ref, save, interactive, xlabel, + ylabel, savename) + elif sample_obj.get_dim() == 3: + zlabel = r'$q_{' + str(Q_nums[2] + 1) + '}$' if isinstance(Q_ref, np.ndarray): q_ref = Q_ref[Q_nums[:3]] - scatter_3D(data, sample_nos, rD, q_ref, save, interactive, xlabel, - ylabel, zlabel, savename) - elif data.shape[1] > 2 and showdim == 2: + scatter_3D(sample_obj, sample_nos, rD, q_ref, save, interactive, xlabel, + ylabel, zlabel, savename) + elif sample_obj.get_dim() > 2 and showdim == 2: for x, y in combinations(Q_nums, 2): - xlabel = r'$q_{'+str(x+1)+'}$' - ylabel = r'$q_{'+str(y+1)+'}$' - savename = 'data_samples_q'+str(x+1)+'q'+str(y+1)+'_cs.eps' + xlabel = r'$q_{' + str(x + 1) + '}$' + ylabel = r'$q_{' + str(y + 1) + '}$' + savename = 'data_samples_q' + str(x + 1) + 'q' + str(y + 1) + '_cs.eps' q_ref = None if isinstance(Q_ref, np.ndarray): q_ref = Q_ref[[x, y]] - scatter_2D(data[:, [x, y]], sample_nos, rD, q_ref, save, - interactive, xlabel, ylabel, savename) - elif data.shape[1] > 3 and showdim == 3: + + sample_obj_temp = sample.sample_set(2) + sample_obj_temp.set_values(sample_obj.get_values()[:, [x, y]]) + + scatter_2D(sample_obj_temp, sample_nos, rD, q_ref, save, + interactive, xlabel, ylabel, savename) + elif sample_obj.get_dim() > 3 and showdim == 3: for x, y, z in combinations(Q_nums, 3): - xlabel = r'$q_{'+str(x+1)+'}$' - ylabel = r'$q_{'+str(y+1)+'}$' - zlabel = r'$q_{'+str(z+1)+'}$' + xlabel = r'$q_{' + str(x + 1) + '}$' + ylabel = r'$q_{' + str(y + 1) + '}$' + zlabel = r'$q_{' + str(z + 1) + '}$' q_ref = None if isinstance(Q_ref, np.ndarray): q_ref = Q_ref[[x, y, z]] - savename = 'data_samples_q'+str(x+1)+'q'+str(y+1)+'q'\ - +str(z+1)+'_cs.eps' - scatter_3D(data[:, [x, y, z]], sample_nos, rD, q_ref, save, - interactive, xlabel, ylabel, zlabel, savename) - -def show_data_domain_multi(samples, data, Q_ref=None, Q_nums=None, - img_folder='figs/', ref_markers=None, - ref_colors=None, showdim=None): + savename = 'data_samples_q' + str(x + 1) + 'q' + str(y + 1) + 'q' \ + + str(z + 1) + '_cs.eps' + + sample_obj_temp = sample.sample_set(3) + sample_obj_temp.set_values(sample_obj.get_values()[:, [x, y, z]]) + + scatter_3D(sample_obj_temp, sample_nos, rD, q_ref, save, + interactive, xlabel, ylabel, zlabel, savename) + + +def show_data_domain_multi(sample_disc, Q_ref=None, Q_nums=None, + img_folder='figs/', ref_markers=None, + ref_colors=None, showdim=None): r""" Plots 2-D projections of the data domain D using a triangulation based on the first two coordinates (parameters) of the generating samples where :math:`Q={q_1, q_i}` for ``i=Q_nums``, with a marker for various - :math:`Q_{ref}`. + :math:`Q_{ref}`. - :param samples: Samples to plot - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim). Only - uses the first two dimensions. - :param data: Data associated with ``samples`` - :type data: :class:`numpy.ndarray` + :param sample_disc: Object containing the samples to plot + :type sample_disc: :class:`~bet.sample.discretization` or + :class:`~bet.sample.sample_set_base` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, mdim) :param list Q_nums: dimensions of the QoI to plot @@ -324,16 +375,22 @@ def show_data_domain_multi(samples, data, Q_ref=None, Q_nums=None, :type showdim: int or string """ + if not isinstance(sample_disc, sample.discretization): + raise bad_object("Improper sample object") + # Set the default marker and colors if ref_markers == None: ref_markers = markers if ref_colors == None: ref_colors = colors + + data_obj = sample_disc._output_sample_set + sample_obj = sample_disc._input_sample_set # If no specific coordinate numbers are given for the data coordinates # (e.g. i, where \q_i is a coordinate in the data space), then # set them to be the the counting numbers. if Q_nums is None: - Q_nums = range(data.shape[1]) + Q_nums = range(data_obj.get_dim()) # If no specific coordinate number of choice is given set to be the first # coordinate direction. if showdim == None: @@ -345,55 +402,67 @@ def show_data_domain_multi(samples, data, Q_ref=None, Q_nums=None, # Make sure the shape of Q_ref is correct if Q_ref is not None: - Q_ref = util.fix_dimensions_data(Q_ref, data.shape[1]) + Q_ref = util.fix_dimensions_data(Q_ref, data_obj.get_dim()) # Create the triangulization to use to define the topology of the samples # in the data space from the first two parameters in the parameter space - triangulation = tri.Triangulation(samples[:, 0], samples[:, 1]) + triangulation = tri.Triangulation(sample_obj.get_values()[:, 0], + sample_obj.get_values()[:, 1]) triangles = triangulation.triangles # Create plots of the showdim^th QoI (q_{showdim}) with all other QoI (q_i) if isinstance(showdim, int): for i in Q_nums: - xlabel = r'$q_{'+str(showdim+1)+r'}$' - ylabel = r'$q_{'+str(i+1)+r'}$' - - filenames = [img_folder+'domain_q'+str(showdim+1)+'_q'+\ - str(i+1)+'.eps', img_folder+'q'+str(showdim+1)+\ - '_q'+str(i+1)+'_domain_Q_cs.eps'] - if Q_ref is not None: - show_data_domain_2D(samples, data[:, [showdim, i]], Q_ref[:, - [showdim, i]], ref_markers, ref_colors, xlabel=xlabel, - ylabel=ylabel, triangles=triangles, save=True, - interactive=False, filenames=filenames) + xlabel = r'$q_{' + str(showdim + 1) + r'}$' + ylabel = r'$q_{' + str(i + 1) + r'}$' + + filenames = [img_folder + 'domain_q' + str(showdim + 1) + '_q' + \ + str(i + 1) + '.eps', img_folder + 'q' + str(showdim + 1) + \ + '_q' + str(i + 1) + '_domain_Q_cs.eps'] + + data_obj_temp = sample.sample_set(2) + data_obj_temp.set_values(data_obj.get_values()[:, [showdim, i]]) + sample_disc_temp = sample.discretization(sample_obj, data_obj_temp) + + if Q_ref is not None: + show_data_domain_2D(sample_disc_temp, Q_ref[:, [showdim, i]], + ref_markers, ref_colors, xlabel=xlabel, + ylabel=ylabel, triangles=triangles, save=True, + interactive=False, filenames=filenames) + else: - show_data_domain_2D(samples, data[:, [showdim, i]], None, - ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, - triangles=triangles, save=True, interactive=False, - filenames=filenames) - # Create plots of all combinations of QoI in 2D + show_data_domain_2D(sample_disc_temp, None, + ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, + triangles=triangles, save=True, interactive=False, + filenames=filenames) + # Create plots of all combinations of QoI in 2D elif showdim == 'all' or showdim == 'ALL': for x, y in combinations(Q_nums, 2): - xlabel = r'$q_{'+str(x+1)+r'}$' - ylabel = r'$q_{'+str(y+1)+r'}$' + xlabel = r'$q_{' + str(x + 1) + r'}$' + ylabel = r'$q_{' + str(y + 1) + r'}$' + + filenames = [img_folder + 'domain_q' + str(x + 1) + '_q' + str(y + 1) + '.eps', + img_folder + 'q' + str(x + 1) + '_q' + str(y + 1) + '_domain_Q_cs.eps'] + + data_obj_temp = sample.sample_set(2) + data_obj_temp.set_values(data_obj.get_values()[:, [x, y]]) + sample_disc_temp = sample.discretization(sample_obj, data_obj_temp) - filenames = [img_folder+'domain_q'+str(x+1)+'_q'+str(y+1)+'.eps', - img_folder+'q'+str(x+1)+'_q'+str(y+1)+'_domain_Q_cs.eps'] - if Q_ref is not None: - show_data_domain_2D(samples, data[:, [x, y]], Q_ref[:, [x, y]], - ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, - triangles=triangles, save=True, interactive=False, - filenames=filenames) + show_data_domain_2D(sample_disc_temp, Q_ref[:, [x, y]], + ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, + triangles=triangles, save=True, interactive=False, + filenames=filenames) else: - show_data_domain_2D(samples, data[:, [x, y]], None, - ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, - triangles=triangles, save=True, interactive=False, - filenames=filenames) - -def show_data_domain_2D(samples, data, Q_ref=None, ref_markers=None, - ref_colors=None, xlabel=r'$q_1$', ylabel=r'$q_2$', - triangles=None, save=True, interactive=False, filenames=None): + show_data_domain_2D(sample_disc_temp, None, + ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, + triangles=triangles, save=True, interactive=False, + filenames=filenames) + + +def show_data_domain_2D(sample_disc, Q_ref=None, ref_markers=None, + ref_colors=None, xlabel=r'$q_1$', ylabel=r'$q_2$', + triangles=None, save=True, interactive=False, filenames=None): r""" Plots 2-D a single data domain D using a triangulation based on the first two coordinates (parameters) of the generating samples where :math:`Q={q_1, @@ -401,11 +470,9 @@ def show_data_domain_2D(samples, data, Q_ref=None, ref_markers=None, that the first dimension of data is :math:`q_1`. - :param samples: Samples to plot - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim). Only - uses the first two dimensions. - :param data: Data associated with ``samples`` - :type data: :class:`numpy.ndarray` + :param sample_disc: Object containing the samples to plot + :type sample_disc: :class:`~bet.sample.discretization` + or :class:`~bet.sample.sample_set_base` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, 2) :param list ref_markers: list of marker types for :math:`Q_{ref}` @@ -419,6 +486,12 @@ def show_data_domain_2D(samples, data, Q_ref=None, ref_markers=None, :param list filenames: file names for the unmarked and marked domain plots """ + if not isinstance(sample_disc, sample.discretization): + raise bad_object("Improper sample object") + + data_obj = sample_disc._output_sample_set + sample_obj = sample_disc._input_sample_set + # Set the default marker and colors if ref_markers == None: ref_markers = markers @@ -428,45 +501,48 @@ def show_data_domain_2D(samples, data, Q_ref=None, ref_markers=None, # (e.g. i, where \q_i is a coordinate in the data space), then # set them to be the the counting numbers. if triangles is None: - triangulation = tri.Triangulation(samples[:, 0], samples[:, 1]) + triangulation = tri.Triangulation(sample_obj.get_values()[:, 0], + sample_obj.get_values()[:, 1]) triangles = triangulation.triangles # Set default file names if filenames == None: filenames = ['domain_q1_q2_cs.eps', 'q1_q2_domain_Q_cs.eps'] - + # Make sure the shape of Q_ref is correct if Q_ref is not None: Q_ref = util.fix_dimensions_data(Q_ref, 2) # Create figure - plt.tricontourf(data[:, 0], data[:, 1], np.zeros((data.shape[0],)), - triangles=triangles, colors='grey') + plt.tricontourf(data_obj.get_values()[:, 0], data_obj.get_values()[:, 1], + np.zeros((data_obj.get_values().shape[0],)), + triangles=triangles, colors='grey') plt.autoscale(tight=True) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.savefig(filenames[0], bbox_inches='tight', transparent=True, - pad_inches=.2) + pad_inches=.2) # Add truth markers if Q_ref is not None: for i in xrange(Q_ref.shape[0]): plt.scatter(Q_ref[i, 0], Q_ref[i, 1], s=60, c=ref_colors[i], - marker=ref_markers[i]) + marker=ref_markers[i]) if save: plt.savefig(filenames[1], bbox_inches='tight', transparent=True, - pad_inches=.2) + pad_inches=.2) if interactive: plt.show() else: plt.close() -def scatter_param_multi(samples, img_folder='figs/', showdim='all', save=True, - interactive=False): + +def scatter_param_multi(sample_obj, img_folder='figs/', showdim='all', + save=True, interactive=False): r""" - Creates two-dimensional projections of scatter plots of ``samples``. - - :param samples: Samples to plot. - :type samples: :class:`numpy.ndarray` + Creates two-dimensional projections of scatter plots of samples. + + :param sample_obj: Object containing the samples to plot + :type sample_obj: :class:`~bet.sample.sample_set_base` :param bool save: flag whether or not to save the figure :param bool interactive: flag whether or not to show the figure :param string img_folder: folder to save the plots to @@ -475,6 +551,9 @@ def scatter_param_multi(samples, img_folder='figs/', showdim='all', save=True, :type showdim: int or string """ + if not isinstance(sample_obj, sample.sample_set_base): + raise bad_object("Improper sample object") + # If no specific coordinate number of choice is given set to be the first # coordinate direction. if showdim == None: @@ -483,26 +562,27 @@ def scatter_param_multi(samples, img_folder='figs/', showdim='all', save=True, if not os.path.isdir(img_folder): os.mkdir(img_folder) # Create list of all the parameter coordinates - L_nums = range(samples.shape[1]) + L_nums = range(sample_obj.get_dim()) - # Create plots of the showdim^th parameter (\lambda_{showdim}) with all the - # other parameters + # Create plots of the showdim^th parameter (\lambda_{showdim}) with all the + # other parameters if isinstance(showdim, int): for i in L_nums: - xlabel = r'$\lambda_{'+str(showdim+1)+r'}$' - ylabel = r'$\lambda_{'+str(i+1)+r'}$' + xlabel = r'$\lambda_{' + str(showdim + 1) + r'}$' + ylabel = r'$\lambda_{' + str(i + 1) + r'}$' - filenames = [img_folder+'domain_l'+str(showdim+1)+'_l'+\ - str(i+1)+'.eps', img_folder+'l'+str(showdim+1)+\ - '_l'+str(i+1)+'_domain_L_cs.eps'] + filenames = [img_folder + 'domain_l' + str(showdim + 1) + '_l' + \ + str(i + 1) + '.eps', img_folder + 'l' + str(showdim + 1) + \ + '_l' + str(i + 1) + '_domain_L_cs.eps'] filename = filenames[0] - plt.scatter(samples[:, 0], samples[:, 1]) + plt.scatter(sample_obj.get_values()[:, 0], + sample_obj.get_values()[:, 1]) if save: plt.autoscale(tight=True) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.savefig(filename, bbox_inches='tight', transparent=True, - pad_inches=0) + pad_inches=0) if interactive: plt.show() else: @@ -510,37 +590,39 @@ def scatter_param_multi(samples, img_folder='figs/', showdim='all', save=True, # Create plots of all of the possible pairwise combinations of parameters elif showdim == 'all' or showdim == 'ALL': for x, y in combinations(L_nums, 2): - xlabel = r'$\lambda_{'+str(x+1)+r'}$' - ylabel = r'$\lambda_{'+str(y+1)+r'}$' + xlabel = r'$\lambda_{' + str(x + 1) + r'}$' + ylabel = r'$\lambda_{' + str(y + 1) + r'}$' - filenames = [img_folder+'domain_l'+str(x+1)+'_l'+\ - str(y+1)+'.eps', img_folder+'l'+str(x+1)+\ - '_l'+str(y+1)+'_domain_L_cs.eps'] + filenames = [img_folder + 'domain_l' + str(x + 1) + '_l' + \ + str(y + 1) + '.eps', img_folder + 'l' + str(x + 1) + \ + '_l' + str(y + 1) + '_domain_L_cs.eps'] filename = filenames[0] - plt.scatter(samples[:, x], samples[:, y]) + plt.scatter(sample_obj.get_values()[:, x], + sample_obj.get_values()[:, y]) if save: plt.autoscale(tight=True) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.savefig(filename, bbox_inches='tight', transparent=True, - pad_inches=0) + pad_inches=0) if interactive: plt.show() else: plt.close() -def scatter2D_multi(samples, color=None, p_ref=None, img_folder='figs/', + +def scatter2D_multi(sample_obj, color=None, p_ref=None, img_folder='figs/', filename="scatter2Dm", label_char=r'$\lambda', - showdim=None): + showdim=None): r""" - Creates two-dimensional projections of scatter plots of ``samples`` colored + Creates two-dimensional projections of scatter plots of samples colored by ``color`` (usually an array of pointwise probability density values). A - reference ``sample`` (``p_ref``) can be chosen by the user. This reference - ``sample`` will be plotted as a mauve circle twice the size of the other + reference sample (``p_ref``) can be chosen by the user. This reference + sample will be plotted as a mauve circle twice the size of the other markers. - :param samples: Samples to plot. - :type samples: :class:`numpy.ndarray` + :param sample_obj: Object containing the samples to plot + :type sample_obj: :class:`~bet.sample.sample_set_base` :param color: values to color the ``samples`` by :type color: :class:`numpy.ndarray` :param string filename: filename to save the figure as @@ -553,6 +635,8 @@ def scatter2D_multi(samples, color=None, p_ref=None, img_folder='figs/', :type showdim: int or string """ + if not isinstance(sample_obj, sample.sample_set_base): + raise bad_object("Improper sample object") # If no specific coordinate number of choice is given set to be the first # coordinate direction. if showdim == None: @@ -560,43 +644,50 @@ def scatter2D_multi(samples, color=None, p_ref=None, img_folder='figs/', # Create a folder for these figures if it doesn't already exist if not os.path.isdir(img_folder): os.mkdir(img_folder) - # Create list of all the parameter coordinates - p_nums = range(samples.shape[1]) + # Create list of all the parameter coordinates + p_nums = range(sample_obj.get_dim()) - # Create plots of the showdim^th parameter (\lambda_{showdim}) with all the - # other parameters + # Create plots of the showdim^th parameter (\lambda_{showdim}) with all the + # other parameters if isinstance(showdim, int): for i in p_nums: - xlabel = label_char+r'_{'+str(showdim+1)+r'}$' - ylabel = label_char+r'_{'+str(i+1)+r'}$' - - postfix = '_d'+str(showdim+1)+'_d'+str(i+1)+'.eps' - myfilename = os.path.join(img_folder, filename+postfix) - if p_ref: - scatter_2D(samples[:, [showdim, i]], sample_nos=None, - color=color, p_ref=p_ref[[showdim, i]], save=True, - interactive=False, xlabel=xlabel, ylabel=ylabel, - filename=myfilename) + xlabel = label_char + r'_{' + str(showdim + 1) + r'}$' + ylabel = label_char + r'_{' + str(i + 1) + r'}$' + + postfix = '_d' + str(showdim + 1) + '_d' + str(i + 1) + '.eps' + myfilename = os.path.join(img_folder, filename + postfix) + + sample_obj_temp = sample.sample_set(2) + sample_obj_temp.set_values(sample_obj.get_values()[:, [showdim, i]]) + + if p_ref is not None: + scatter_2D(sample_obj_temp, sample_nos=None, + color=color, p_ref=p_ref[[showdim, i]], save=True, + interactive=False, xlabel=xlabel, ylabel=ylabel, + filename=myfilename) else: - scatter_2D(samples[:, [showdim, i]], sample_nos=None, - color=color, p_ref=None, save=True, - interactive=False, xlabel=xlabel, ylabel=ylabel, - filename=myfilename) + scatter_2D(sample_obj_temp, sample_nos=None, + color=color, p_ref=None, save=True, + interactive=False, xlabel=xlabel, ylabel=ylabel, + filename=myfilename) # Create plots of all of the possible pairwise combinations of parameters elif showdim == 'all' or showdim == 'ALL': for x, y in combinations(p_nums, 2): - xlabel = label_char+r'_{'+str(x+1)+r'}$' - ylabel = label_char+r'_{'+str(y+1)+r'}$' - - postfix = '_d'+str(x+1)+'_d'+str(y+1)+'.eps' - myfilename = os.path.join(img_folder, filename+postfix) - - if p_ref: - scatter_2D(samples[:, [x, y]], sample_nos=None, color=color, - p_ref=p_ref[[x, y]], save=True, interactive=False, - xlabel=xlabel, ylabel=ylabel, filename=myfilename) + xlabel = label_char + r'_{' + str(x + 1) + r'}$' + ylabel = label_char + r'_{' + str(y + 1) + r'}$' + + postfix = '_d' + str(x + 1) + '_d' + str(y + 1) + '.eps' + myfilename = os.path.join(img_folder, filename + postfix) + + sample_obj_temp = sample.sample_set(2) + sample_obj_temp.set_values(sample_obj.get_values()[:, [x, y]]) + + if p_ref is not None: + scatter_2D(sample_obj_temp, sample_nos=None, color=color, + p_ref=p_ref[[x, y]], save=True, interactive=False, + xlabel=xlabel, ylabel=ylabel, filename=myfilename) else: - scatter_2D(samples[:, [x, y]], sample_nos=None, color=color, - p_ref=None, save=True, interactive=False, - xlabel=xlabel, ylabel=ylabel, filename=myfilename) + scatter_2D(sample_obj_temp, sample_nos=None, color=color, + p_ref=None, save=True, interactive=False, + xlabel=xlabel, ylabel=ylabel, filename=myfilename) diff --git a/bet/postProcess/plotP.py b/bet/postProcess/plotP.py index 030718a1..9cb0be12 100644 --- a/bet/postProcess/plotP.py +++ b/bet/postProcess/plotP.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ This module provides methods for plotting probabilities. @@ -10,86 +10,145 @@ #plt.rc('font', family='serif') import numpy as np import copy, math +import bet.sample as sample -def calculate_1D_marginal_probs(P_samples, samples, lam_domain, nbins=20): - + +class dim_not_matching(Exception): + """ + Exception for when the dimension is inconsistent. + """ + +class bad_object(Exception): + """ + Exception for when the wrong type of object is used. """ - This calculates every single marginal of - input probability measure defined by P_samples on a 1D grid. - - :param P_samples: Probabilities. - :type P_samples: :class:`~numpy.ndarray` of shape (num_samples,) - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param lam_domain: The domain for each parameter for the model. - :type lam_domain: :class:`~numpy.ndarray` of shape (ndim, 2) + +class missing_attribute(Exception): + """ + Exception for missing attribute. + """ + +def calculate_1D_marginal_probs(sample_set, nbins=20): + + r""" + This calculates every single marginal of the probability measure + described by the probabilities within the sample_set object. + If the sample_set object is a discretization object, we assume + that the probabilities to be plotted are from the input space on the + emulated samples + (``discretization._emulated_input_sample_set._probabilties_local``). + + This assumes that the user has already run + :meth:`~bet.calculateP.calculateP.prob_emulated`. + + :param sample_set: Object containing samples and probabilities + :type sample_set: :class:`~bet.sample.sample_set_base` or + :class:`~bet.sample.discretization` :param nbins: Number of bins in each direction. :type nbins: :int or :class:`~numpy.ndarray` of shape (ndim,) :rtype: tuple :returns: (bins, marginals) """ - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - num_dim = samples.shape[1] + if isinstance(sample_set, sample.discretization): + sample_obj = sample_set._emulated_input_sample_set + if sample_obj is None: + raise missing_attribute("Missing emulated_input_sample_set") + elif isinstance(sample_set, sample.sample_set_base): + sample_obj = sample_set + else: + raise bad_object("Improper sample object") + + # Check for local probabilities + if sample_obj._probabilities_local is None: + if sample_obj.probabilities is None: + raise missing_attribute("Missing probabilities") + else: + sample_obj.global_to_local() # Make list of bins if only an integer is given if isinstance(nbins, int): - nbins = nbins*np.ones(num_dim, dtype=np.int) + nbins = nbins*np.ones(sample_obj.get_dim(), dtype=np.int) # Create bins bins = [] - for i in range(num_dim): - bins.append(np.linspace(lam_domain[i][0], lam_domain[i][1], nbins[i]+1)) + for i in range(sample_obj.get_dim()): + bins.append(np.linspace(sample_obj.get_domain()[i][0], + sample_obj.get_domain()[i][1], + nbins[i]+1)) # Calculate marginals marginals = {} - for i in range(num_dim): - [marg, _] = np.histogram(samples[:, i], bins=bins[i], - weights=P_samples) + for i in range(sample_obj.get_dim()): + [marg, _] = np.histogram(sample_obj.get_values_local()[:, i], bins=bins[i], + weights=sample_obj.get_probabilities_local()) marg_temp = np.copy(marg) comm.Allreduce([marg, MPI.DOUBLE], [marg_temp, MPI.DOUBLE], op=MPI.SUM) marginals[i] = marg_temp return (bins, marginals) -def calculate_2D_marginal_probs(P_samples, samples, lam_domain, nbins=20): +def calculate_2D_marginal_probs(sample_set, nbins=20): """ This calculates every pair of marginals (or joint in 2d case) of - input probability measure defined by P_samples on a rectangular grid. - - :param P_samples: Probabilities. - :type P_samples: :class:`~numpy.ndarray` of shape (num_samples,) - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param lam_domain: The domain for each parameter for the model. - :type lam_domain: :class:`~numpy.ndarray` of shape (ndim, 2) + input probability measure defined on a rectangular grid. + If the sample_set object is a discretization object, we assume + that the probabilities to be plotted are from the input space on the + emulated samples + (``discretization._emulated_input_sample_set._probabilties_local``). + + This assumes that the user has already run + :meth:`~bet.calculateP.calculateP.prob_emulated`. + + + :param sample_set: Object containing samples and probabilities + :type sample_set: :class:`~bet.sample.sample_set_base` + or :class:`~bet.sample.discretization` :param nbins: Number of bins in each direction. :type nbins: :int or :class:`~numpy.ndarray` of shape (ndim,) :rtype: tuple :returns: (bins, marginals) """ - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - num_dim = samples.shape[1] + if isinstance(sample_set, sample.discretization): + sample_obj = sample_set._emulated_input_sample_set + if sample_obj is None: + raise missing_attribute("Missing emulated_input_sample_set") + elif isinstance(sample_set, sample.sample_set_base): + sample_obj = sample_set + else: + raise bad_object("Improper sample object") + + # Check for local probabilities + if sample_obj._probabilities_local is None: + if sample_obj.probabilities is None: + raise missing_attribute("Missing probabilities") + else: + sample_obj.global_to_local() + + if sample_obj.get_dim() < 2: + raise dim_not_matching("Incompatible dimensions of sample set" + " for plotting") # Make list of bins if only an integer is given if isinstance(nbins, int): - nbins = nbins*np.ones(num_dim, dtype=np.int) + nbins = nbins*np.ones(sample_obj.get_dim(), dtype=np.int) # Create bins bins = [] - for i in range(num_dim): - bins.append(np.linspace(lam_domain[i][0], lam_domain[i][1], nbins[i]+1)) - + for i in range(sample_obj.get_dim()): + bins.append(np.linspace(sample_obj.get_domain()[i][0], + sample_obj.get_domain()[i][1], + nbins[i]+1)) + # Calculate marginals marginals = {} - for i in range(num_dim): - for j in range(i+1, num_dim): - (marg, _) = np.histogramdd(samples[:, [i, j]], bins=[bins[i], - bins[j]], weights=P_samples) + for i in range(sample_obj.get_dim()): + for j in range(i+1, sample_obj.get_dim()): + (marg, _) = np.histogramdd(sample_obj.get_values_local()[:, [i, j]], + bins=[bins[i], bins[j]], + weights=sample_obj.get_probabilities_local()) marg = np.ascontiguousarray(marg) marg_temp = np.copy(marg) comm.Allreduce([marg, MPI.DOUBLE], [marg_temp, MPI.DOUBLE], @@ -98,21 +157,24 @@ def calculate_2D_marginal_probs(P_samples, samples, lam_domain, nbins=20): return (bins, marginals) -def plot_1D_marginal_probs(marginals, bins, lam_domain, +def plot_1D_marginal_probs(marginals, bins, sample_set, filename="file", lam_ref=None, interactive=False, - lambda_label=None): + lambda_label=None, file_extension=".eps"): """ This makes plots of every single marginal probability of - input probability measure defined by P_samples on a 1D grid. + input probability measure on a 1D grid. + If the sample_set object is a discretization object, we assume + that the probabilities to be plotted are from the input space. :param marginals: 1D marginal probabilities :type marginals: dictionary with int as keys and :class:`~numpy.ndarray` of shape (nbins+1,) as values :param bins: Endpoints of bins used in calculating marginals :type bins: :class:`~numpy.ndarray` of shape (nbins+1,) - :param lam_domain: The domain for each parameter for the model. - :type lam_domain: :class:`~numpy.ndarray` of shape (ndim, 2) + :param sample_set: Object containing samples and probabilities + :type sample_set: :class:`~bet.sample.sample_set_base` + or :class:`~bet.sample.discretization` :param filename: Prefix for output files. :type filename: str :param lam_ref: True parameters. @@ -123,6 +185,15 @@ def plot_1D_marginal_probs(marginals, bins, lam_domain, :type lambda_label: list of length nbins of strings or None """ + if isinstance(sample_set, sample.discretization): + sample_obj = sample_set._input_sample_set + elif isinstance(sample_set, sample.sample_set_base): + sample_obj = sample_set + else: + raise bad_object("Improper sample object") + + lam_domain = sample_obj.get_domain() + if comm.rank == 0: index = copy.deepcopy(marginals.keys()) index.sort() @@ -141,28 +212,32 @@ def plot_1D_marginal_probs(marginals, bins, lam_domain, label1 = lambda_label[i] ax.set_xlabel(label1) ax.set_ylabel(r'$\rho$') - fig.savefig(filename + "_1D_" + str(i) + ".eps", transparent=True) + fig.savefig(filename + "_1D_" + str(i) + file_extension, + transparent=True) if interactive: plt.show() else: plt.close() plt.clf() -def plot_2D_marginal_probs(marginals, bins, lam_domain, +def plot_2D_marginal_probs(marginals, bins, sample_set, filename="file", lam_ref=None, plot_surface=False, interactive=False, - lambda_label=None): + lambda_label=None, file_extension=".eps"): """ This makes plots of every pair of marginals (or joint in 2d case) of - input probability measure defined by P_samples on a rectangular grid. + input probability measure on a rectangular grid. + If the sample_set object is a discretization object, we assume + that the probabilities to be plotted are from the input space. :param marginals: 2D marginal probabilities :type marginals: dictionary with tuples of 2 integers as keys and :class:`~numpy.ndarray` of shape (nbins+1,) as values :param bins: Endpoints of bins used in calculating marginals :type bins: :class:`~numpy.ndarray` of shape (nbins+1,2) - :param lam_domain: The domain for each parameter for the model. - :type lam_domain: :class:`~numpy.ndarray` of shape (ndim, 2) + :param sample_set: Object containing samples and probabilities + :type sample_set: :class:`~bet.sample.sample_set_base` + or :class:`~bet.sample.discretization` :param filename: Prefix for output files. :type filename: str :param lam_ref: True parameters. @@ -173,6 +248,15 @@ def plot_2D_marginal_probs(marginals, bins, lam_domain, :type lambda_label: list of length nbins of strings or None """ + if isinstance(sample_set, sample.discretization): + sample_obj = sample_set._input_sample_set + elif isinstance(sample_set, sample.sample_set_base): + sample_obj = sample_set + else: + raise bad_object("Improper sample object") + + lam_domain = sample_obj.get_domain() + from matplotlib import cm if plot_surface: from mpl_toolkits.mplot3d import Axes3D @@ -204,8 +288,8 @@ def plot_2D_marginal_probs(marginals, bins, lam_domain, fig.colorbar(quadmesh, ax=ax, label=label_cbar) plt.axis([lam_domain[i][0], lam_domain[i][1], lam_domain[j][0], lam_domain[j][1]]) - fig.savefig(filename + "_2D_" + str(i) + "_" + str(j) + ".eps", - transparent=True) + fig.savefig(filename + "_2D_" + str(i) + "_" + str(j) +\ + file_extension, transparent=True) if interactive: plt.show() else: @@ -215,8 +299,8 @@ def plot_2D_marginal_probs(marginals, bins, lam_domain, for k, (i, j) in enumerate(pairs): fig = plt.figure(k) ax = fig.gca(projection='3d') - X = bins[i] - Y = bins[j] + X = bins[i][:-1] + np.diff(bins[i])/2 + Y = bins[j][:-1] + np.diff(bins[j])/2 X, Y = np.meshgrid(X, Y, indexing='ij') surf = ax.plot_surface(X, Y, marginals[(i, j)], rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, @@ -228,14 +312,16 @@ def plot_2D_marginal_probs(marginals, bins, lam_domain, ax.set_zlabel(r'$P$') plt.backgroundcolor = 'w' fig.colorbar(surf, shrink=0.5, aspect=5, label=r'$P$') - fig.savefig(filename + "_surf_"+str(i)+"_"+str(j)+".eps", - transparent=True) + fig.savefig(filename + "_surf_" + str(i) + "_" + str(j) + \ + file_extension, transparent=True) + if interactive: plt.show() else: plt.close() plt.clf() - + + def smooth_marginals_1D(marginals, bins, sigma=10.0): """ This function smooths 1D marginal probabilities. @@ -260,7 +346,7 @@ def smooth_marginals_1D(marginals, bins, sigma=10.0): for i in index: nx = len(bins[i])-1 dx = bins[i][1] - bins[i][0] - augx = math.ceil(3*sigma[i]/dx) + augx = int(math.ceil(3*sigma[i]/dx)) x_kernel = np.linspace(-nx*dx/2, nx*dx/2, nx) kernel = np.exp(-(x_kernel/sigma[i])**2) aug_kernel = np.zeros((nx+2*augx,)) @@ -304,8 +390,8 @@ def smooth_marginals_2D(marginals, bins, sigma=10.0): dx = bins[i][1] - bins[i][0] dy = bins[j][1] - bins[j][0] - augx = math.ceil(3*sigma[i]/dx) - augy = math.ceil(3*sigma[j]/dy) + augx = int(math.ceil(3*sigma[i]/dx)) + augy = int(math.ceil(3*sigma[j]/dy)) x_kernel = np.linspace(-nx*dx/2, nx*dx/2, nx) y_kernel = np.linspace(-ny*dy/2, ny*dy/2, ny) diff --git a/bet/postProcess/postTools.py b/bet/postProcess/postTools.py index 1046c3b7..70aaa89b 100644 --- a/bet/postProcess/postTools.py +++ b/bet/postProcess/postTools.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ This module provides methods for postprocessing probabilities and data. @@ -6,33 +6,53 @@ from bet.Comm import comm import numpy as np import scipy.io as sio +import bet.sample as sample +import logging +class dim_not_matching(Exception): + """ + Exception for when the dimension is inconsistent. + """ -def sort_by_rho(P_samples, samples, lam_vol=None, data=None): +class bad_object(Exception): + """ + Exception for when the wrong type of object is used. """ - This sorts the samples by probability density. It returns the sorted - values. If the samples are iid, no volume data is needed. It is optional - to sort the QoI data, but be sure to do so if using it later. - :param P_samples: Probabilities. - :type P_samples: :class:`~numpy.ndarray` of shape (num_samples,) - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param lam_vol: Volume of cell associated with sample. - :type lam_vol: :class:`~numpy.ndarray` of shape (num_samples,) - :param data: QoI data from running the model with the given samples. - :type data: :class:`~numpy.ndarray` of shape (num_samples, mdim) - :param indices: sorting indices of unsorted ``P_samples`` +def sort_by_rho(sample_set): + """ + This sorts the samples within the sample_set by probability density. + If a discretization object is given, then the QoI data is also sorted + to maintain the correspondence. + Any volumes present in the input space (or just the sample object) + are also sorted. + + :param sample_set: Object containing samples and probabilities + :type sample_set: :class:`~bet.sample.sample_set_base` or + :class:`~bet.sample.discretization` + :param indices: sorting indices :type indices: :class:`numpy.ndarray` of shape (num_samples,) - + :param sample_set_out: Object containing sorted samples and probabilities + :type sample_set_out: :class:`~bet.sample.sample_set` or + :class:`~bet.sample.discretization` + :rtype: tuple - :returns: (P_samples, samples, lam_vol, data, indicices) + :returns: (sample_set_out, indicices) """ - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - if P_samples.shape != (samples.shape[0],): - raise ValueError("P_samples must be of the shape (num_samples,)") + if isinstance(sample_set, sample.discretization): + samples = sample_set._input_sample_set.get_values() + P_samples = sample_set._input_sample_set.get_probabilities() + lam_vol = sample_set._input_sample_set.get_volumes() + data = sample_set._output_sample_set.get_values() + elif isinstance(sample_set, sample.sample_set_base): + samples = sample_set.get_values() + P_samples = sample_set.get_probabilities() + lam_vol = sample_set.get_volumes() + data = None + else: + raise bad_object("Improper sample object") + nnz = np.sum(P_samples > 0) if lam_vol is None: indices = np.argsort(P_samples)[::-1][0:nnz] @@ -43,55 +63,84 @@ def sort_by_rho(P_samples, samples, lam_vol=None, data=None): if lam_vol is not None: lam_vol = lam_vol[indices] if data is not None: - if len(data.shape) == 1: - data = np.expand_dims(data, axis=1) data = data[indices, :] - return (P_samples, samples, lam_vol, data, indices) + if isinstance(sample_set, sample.discretization): + samples_out = sample.sample_set(sample_set._input_sample_set.get_dim()) + data_out = sample.sample_set(sample_set._output_sample_set.get_dim()) + sample_set_out = sample.discretization(samples_out, data_out) + sample_set_out._input_sample_set.set_values(samples) + sample_set_out._input_sample_set.set_probabilities(P_samples) + sample_set_out._input_sample_set.set_volumes(lam_vol) + sample_set_out._output_sample_set.set_values(data) + else: + sample_set_out = sample.sample_set(sample_set.get_dim()) + sample_set_out.set_values(samples) + sample_set_out.set_probabilities(P_samples) + sample_set_out.set_volumes(lam_vol) + + return (sample_set_out, indices) -def sample_prob(percentile, P_samples, samples, lam_vol=None, - data=None, sort=True, descending=False): +def sample_prob(percentile, sample_set, sort=True, descending=False): """ This calculates the highest/lowest probability samples whose probability - sum to a given value. The number of high/low probability samples that sum - to the value and the probabilities, samples, volumes, and data are - returned. This assumes that ``P_samples``, ``samples``, ``lam_vol``, and - ``data`` have all be sorted using :meth:`~bet.postProcess.sort_by_rho`. The - ``descending`` flag determines whether or not to calcuate the + sum to a given value. + A new sample_set with the samples corresponding to these highest/lowest + probability samples is returned along with the number of samples and + the indices. + This uses :meth:`~bet.postProcess.sort_by_rho`. + The ``descending`` flag determines whether or not to calcuate the highest/lowest. :param percentile: ratio of highest probability samples to select :type percentile: float - :param P_samples: Probabilities. - :type P_samples: :class:`~numpy.ndarray` of shape (num_samples,) - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param lam_vol: Volume of cell associated with sample. - :type lam_vol: :class:`~numpy.ndarray` of shape (num_samples,) - :param data: QoI data from running the model with the given samples. - :type data: :class:`~numpy.ndarray` of shape (num_samples, mdim) + :param sample_set: Object containing samples and probabilities + :type sample_set: :class:`~bet.sample.sample_set_base` or + :class:`~bet.sample.discretization` :type indices: :class:`numpy.ndarray` of shape (num_samples,) - :param indices: sorting indices of unsorted ``P_samples`` + :param indices: sorting indices :param bool sort: Flag whether or not to sort :param bool descending: Flag order of sorting - + :param sample_set_out: Object containing sorted samples and probabilities + :type sample_set_out: :class:`~bet.sample.sample_set` or + :class:`~bet.sample.discretization` + :rtype: tuple - :returns: ( num_samples, P_samples, samples, lam_vol, data) + :returns: ( num_samples, sample_set_out, data) """ - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - if P_samples.shape != (samples.shape[0],): - raise ValueError("P_samples must be of the shape (num_samples,)") + if isinstance(sample_set, sample.discretization): + samples = sample_set._input_sample_set.get_values() + P_samples = sample_set._input_sample_set.get_probabilities() + lam_vol = sample_set._input_sample_set.get_volumes() + data = sample_set._output_sample_set.get_values() + elif isinstance(sample_set, sample.sample_set_base): + samples = sample_set.get_values() + P_samples = sample_set.get_probabilities() + lam_vol = sample_set.get_volumes() + data = None + else: + raise bad_object("Improper sample object") + if sort: - (P_samples, samples, lam_vol, data, indices) = sort_by_rho(P_samples, - samples, lam_vol, data) + (sample_set, indices) = sort_by_rho(sample_set) + if isinstance(sample_set, sample.discretization): + samples = sample_set._input_sample_set.get_values() + P_samples = sample_set._input_sample_set.get_probabilities() + lam_vol = sample_set._input_sample_set.get_volumes() + data = sample_set._output_sample_set.get_values() + elif isinstance(sample_set, sample.sample_set_base): + samples = sample_set.get_values() + P_samples = sample_set.get_probabilities() + lam_vol = sample_set.get_volumes() + data = None if descending: P_samples = P_samples[::-1] samples = samples[::-1] if lam_vol is not None: lam_vol = lam_vol[::-1] - data = data[::-1] + if data is not None: + data = data[::-1] indices = indices[::-1] P_sum = np.cumsum(P_samples) @@ -104,72 +153,84 @@ def sample_prob(percentile, P_samples, samples, lam_vol=None, if len(data.shape) == 1: data = np.expand_dims(data, axis=1) data = data[0:num_samples, :] - - return (num_samples, P_samples, samples, lam_vol, data, + + if isinstance(sample_set, sample.discretization): + samples_out = sample.sample_set(sample_set._input_sample_set.get_dim()) + data_out = sample.sample_set(sample_set._output_sample_set.get_dim()) + sample_set_out = sample.discretization(samples_out, data_out) + sample_set_out._input_sample_set.set_values(samples) + sample_set_out._input_sample_set.set_probabilities(P_samples) + sample_set_out._input_sample_set.set_volumes(lam_vol) + sample_set_out._output_sample_set.set_values(data) + else: + sample_set_out = sample.sample_set(sample_set.get_dim()) + sample_set_out.set_values(samples) + sample_set_out.set_probabilities(P_samples) + sample_set_out.set_volumes(lam_vol) + + return (num_samples, sample_set_out, indices[0:num_samples]) -def sample_highest_prob(top_percentile, P_samples, samples, lam_vol=None, - data=None, sort=True): +def sample_highest_prob(top_percentile, sample_set, sort=True): """ This calculates the highest probability samples whose probability sum to a - given value. The number of high probability samples that sum to the value - and the probabilities, samples, volumes, and data are returned. This - assumes that ``P_samples``, ``samples``, ``lam_vol``, and ``data`` have all - be sorted using :meth:`~bet.postProcess.sort_by_rho`. + given value. + The number of high probability samples that sum to the value, + a new sample_set, and the indices are returned. + This uses :meth:`~bet.postProcess.sort_by_rho`. :param top_percentile: ratio of highest probability samples to select :type top_percentile: float - :param P_samples: Probabilities. - :type P_samples: :class:`~numpy.ndarray` of shape (num_samples,) - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param lam_vol: Volume of cell associated with sample. - :type lam_vol: :class:`~numpy.ndarray` of shape (num_samples,) - :param data: QoI data from running the model with the given samples. - :type data: :class:`~numpy.ndarray` of shape (num_samples, mdim) + :param sample_set: Object containing samples and probabilities + :type sample_set: :class:`~bet.sample.sample_set_base` + or :class:`~bet.sample.discretization` :type indices: :class:`numpy.ndarray` of shape (num_samples,) - :param indices: sorting indices of unsorted ``P_samples`` + :param indices: sorting indices :param bool sort: Flag whether or not to sort - + :param sample_set_out: Object containing sorted samples and probabilities + :type sample_set_out: :class:`~bet.sample.sample_set` + or :class:`~bet.sample.discretization` + :rtype: tuple - :returns: ( num_samples, P_samples, samples, lam_vol, data) + :returns: ( num_samples, sample_set_out, indices) """ - return sample_prob(top_percentile, P_samples, samples, lam_vol, data, sort) + return sample_prob(top_percentile, sample_set, sort) -def sample_lowest_prob(bottom_percentile, P_samples, samples, lam_vol=None, - data=None, sort=True): +def sample_lowest_prob(bottom_percentile, sample_set, sort=True): """ This calculates the lowest probability samples whose probability sum to a - given value. The number of low probability samples that sum to the value - and the probabilities, samples, volumes, and data are returned. This - assumes that ``P_samples``, ``samples``, ``lam_vol``, and ``data`` have all - be sorted using :meth:`~bet.postProcess.sort_by_rho`. + given value. + The number of low probability samples that sum to the value, + a new sample_set, and the indices are returned. + This uses :meth:`~bet.postProcess.sort_by_rho`. :param top_percentile: ratio of highest probability samples to select :type top_percentile: float - :param P_samples: Probabilities. - :type P_samples: :class:`~numpy.ndarray` of shape (num_samples,) - :param samples: The samples in parameter space for which the model was run. - :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) - :param lam_vol: Volume of cell associated with sample. - :type lam_vol: :class:`~numpy.ndarray` of shape (num_samples,) - :param data: QoI data from running the model with the given samples. - :type data: :class:`~numpy.ndarray` of shape (num_samples, mdim) + :param sample_set: Object containing samples and probabilities + :type sample_set: :class:`~bet.sample.sample_set_base` + or :class:`~bet.sample.discretization` :type indices: :class:`numpy.ndarray` of shape (num_samples,) :param indices: sorting indices of unsorted ``P_samples`` :param bool sort: Flag whether or not to sort - + :param sample_set_out: Object containing sorted samples and probabilities + :type sample_set_out: :class:`~bet.sample.sample_set` + or :class:`~bet.sample.discretization` + :rtype: tuple - :returns: ( num_samples, P_samples, samples, lam_vol, data) + :returns: ( num_samples, sample_set_out, indices) """ - return sample_prob(bottom_percentile, P_samples, samples, lam_vol, data, + return sample_prob(bottom_percentile, sample_set, sort, descending=True) def save_parallel_probs_csv(P_samples, samples, P_file, lam_file, compress=False): """ + .. todo:: + + Revisit when save features in sample.py are stable + Saves probabilites and samples from parallel runs in individual ``.csv`` files for each process. @@ -198,6 +259,10 @@ def save_parallel_probs_csv(P_samples, samples, P_file, lam_file, def collect_parallel_probs_csv(P_file, lam_file, num_files, save=False, compress=False): """ + .. todo:: + + Revisit when save features in sample.py are stable + Collects probabilities and samples saved in ``.csv`` format from parallel runs into single arrays. @@ -235,6 +300,10 @@ def collect_parallel_probs_csv(P_file, lam_file, num_files, save=False, def save_parallel_probs_mat(P_samples, samples, file_prefix, compress=False): """ + .. todo:: + + Revisit when save features in sample.py are stable + Saves probabilites and samples from parallel runs in individual .mat files for each process. @@ -256,6 +325,10 @@ def save_parallel_probs_mat(P_samples, samples, file_prefix, compress=False): def collect_parallel_probs_mat(file_prefix, num_files, save=False, compress=False): """ + .. todo:: + + Revisit when save features in sample.py are stable + Collects probabilities and samples saved in .mat format from parallel runs into single arrays. @@ -289,6 +362,9 @@ def collect_parallel_probs_mat(file_prefix, num_files, save=False, def compare_yield(sort_ind, sample_quality, run_param, column_headings=None): """ + .. todo:: + + Revisit to deprecate later. Compare the quality of samples where ``sample_quality`` is the measure of quality by which the sets of samples have been indexed and ``sort_ind`` is @@ -305,12 +381,15 @@ def compare_yield(sort_ind, sample_quality, run_param, column_headings=None): """ if column_headings == None: column_headings = "Run parameters" - print "Sample Set No., Quality, "+ str(column_headings) + logging.info("Sample Set No., Quality, "+ str(column_headings)) for i in reversed(sort_ind): - print i, sample_quality[i], np.round(run_param[i], 3) + logging.info(i, sample_quality[i], np.round(run_param[i], 3)) def in_high_prob(data, rho_D, maximum, sample_nos=None): """ + .. todo:: + + Revisit to deprecate later. Estimates the number of samples in high probability regions of D. @@ -333,11 +412,14 @@ def in_high_prob(data, rho_D, maximum, sample_nos=None): else: rD = rho_D(data[sample_nos, :]) adjusted_total_prob = int(sum(rD)/maximum) - print "Samples in box "+str(adjusted_total_prob) + logging.info("Samples in box "+str(adjusted_total_prob)) return adjusted_total_prob def in_high_prob_multi(results_list, rho_D, maximum, sample_nos_list=None): """ + .. todo:: + + Revisit to deprecate later. Estimates the number of samples in high probability regions of D for a list of results. diff --git a/bet/sample.py b/bet/sample.py new file mode 100644 index 00000000..97ef66fd --- /dev/null +++ b/bet/sample.py @@ -0,0 +1,1212 @@ +# Copyright (C) 2016 The BET Development Team + +""" +This module contains data structure/storage classes for BET. Notably: + :class:`bet.sample.sample_set` + :class:`bet.sample.discretization` + :class:`bet.sample.length_not_matching` + :class:`bet.sample.dim_not_matching` +""" + +import os, logging +import numpy as np +import scipy.spatial as spatial +import scipy.io as sio +import scipy.stats +from bet.Comm import comm, MPI +import bet.util as util +import bet.sampling.LpGeneralizedSamples as lp + +class length_not_matching(Exception): + """ + Exception for when the length of the array is inconsistent. + """ + + +class dim_not_matching(Exception): + """ + Exception for when the dimension of the array is inconsistent. + """ + +def save_sample_set(save_set, file_name, sample_set_name=None): + """ + Saves this :class:`bet.sample.sample_set` as a ``.mat`` file. Each + attribute is added to a dictionary of names and arrays which are then + saved to a MATLAB-style file. + + :param save_set: sample set to save + :type save_set: :class:`bet.sample.sample_set` + :param string file_name: Name of the ``.mat`` file, no extension is + needed. + :param string sample_set_name: String to prepend to attribute names when + saving multiple :class`bet.sample.sample_set` objects to a single + ``.mat`` file + + """ + if os.path.exists(file_name) or os.path.exists(file_name+'.mat'): + mdat = sio.loadmat(file_name) + else: + mdat = dict() + if sample_set_name is None: + sample_set_name = 'default' + for attrname in sample_set.vector_names: + curr_attr = getattr(save_set, attrname) + if curr_attr is not None: + mdat[sample_set_name+attrname] = curr_attr + for attrname in sample_set.all_ndarray_names: + curr_attr = getattr(save_set, attrname) + if curr_attr is not None: + mdat[sample_set_name+attrname] = curr_attr + if comm.rank == 0: + sio.savemat(file_name, mdat) + +def load_sample_set(file_name, sample_set_name=None): + """ + Loads a :class:`~bet.sample.sample_set` from a ``.mat`` file. If a file + contains multiple :class:`~bet.sample.sample_set` objects then + ``sample_set_name`` is used to distinguish which between different + :class:`~bet.sample.sample_set` objects. + + :param string file_name: Name of the ``.mat`` file, no extension is + needed. + :param string sample_set_name: String to prepend to attribute names when + saving multiple :class`bet.sample.sample_set` objects to a single + ``.mat`` file + + :rtype: :class:`~bet.sample.sample_set` + :returns: the ``sample_set`` that matches the ``sample_set_name`` + """ + mdat = sio.loadmat(file_name) + if sample_set_name is None: + sample_set_name = 'default' + + if sample_set_name+"_dim" in mdat.keys(): + loaded_set = sample_set(np.squeeze(mdat[sample_set_name+"_dim"])) + else: + logging.info("No sample_set named {} with _dim in file".\ + format(sample_set_name)) + return None + + for attrname in sample_set.vector_names: + if attrname is not '_dim': + if sample_set_name+attrname in mdat.keys(): + setattr(loaded_set, attrname, + np.squeeze(mdat[sample_set_name+attrname])) + for attrname in sample_set.all_ndarray_names: + if sample_set_name+attrname in mdat.keys(): + setattr(loaded_set, attrname, mdat[sample_set_name+attrname]) + + # localize arrays if necessary + if sample_set_name+"_values_local" in mdat.keys(): + loaded_set.global_to_local() + + return loaded_set + +class sample_set_base(object): + """ + + A data structure containing arrays specific to a set of samples. + + """ + #: List of attribute names for attributes which are vectors or 1D + #: :class:`numpy.ndarray` or int/float + vector_names = ['_probabilities', '_probabilities_local', '_volumes', + '_volumes_local', '_local_index', '_dim'] + #: List of global attribute names for attributes that are + #: :class:`numpy.ndarray` + array_names = ['_values', '_volumes', '_probabilities', '_jacobians', + '_error_estimates', '_right', '_left', '_width', + '_kdtree_values'] + #: List of attribute names for attributes that are + #: :class:`numpy.ndarray` with dim > 1 + all_ndarray_names = ['_error_estimates', '_error_estimates_local', + '_values', '_values_local', '_left', '_left_local', + '_right', '_right_local', '_width', '_width_local', + '_domain', '_kdtree_values'] + + + def __init__(self, dim): + """ + + Initialization + + :param int dim: Dimension of the space in which these samples reside. + + """ + #: Dimension of the sample space + self._dim = dim + #: :class:`numpy.ndarray` of sample values of shape (num, dim) + self._values = None + #: :class:`numpy.ndarray` of sample Voronoi volumes of shape (num,) + self._volumes = None + #: :class:`numpy.ndarray` of sample probabilities of shape (num,) + self._probabilities = None + #: :class:`numpy.ndarray` of Jacobians at samples of shape (num, + #: other_dim, dim) + self._jacobians = None + #: :class:`numpy.ndarray` of model error estimates at samples of shape + #: (num, dim) + self._error_estimates = None + #: The sample domain :class:`numpy.ndarray` of shape (dim, 2) + self._domain = None + #: Bounding box of values, :class:`numpy.ndarray`of shape (dim, 2) + self._bounding_box = None + #: Local values for parallelism, :class:`numpy.ndarray` of shape + #: (local_num, dim) + self._values_local = None + #: Local volumes for parallelism, :class:`numpy.ndarray` of shape + #: (local_num,) + self._volumes_local = None + #: Local probabilities for parallelism, :class:`numpy.ndarray` of shape + #: (local_num,) + self._probabilities_local = None + #: Local Jacobians for parallelism, :class:`numpy.ndarray` of shape + #: (local_num, other_dim, dim) + self._jacobians_local = None + #: Local error_estimates for parallelism, :class:`numpy.ndarray` of + #: shape (local_num,) + self._error_estimates_local = None + #: Local indicies of global arrays, :class:`numpy.ndarray` of shape + #: (local_num, dim) + self._local_index = None + #: :class:`scipy.spatial.KDTree` + self._kdtree = None + #: Values defining kd tree, :class:`numpy.ndarray` of shape (num, dim) + self._kdtree_values = None + #: Local values defining kd tree, :class:`numpy.ndarray` of + #: shape (num, dim) + self._kdtree_values_local = None + #: Local pointwise left (local_num, dim) + self._left_local = None + #: Local pointwise right (local_num, dim) + self._right_local = None + #: Local pointwise width (local_num, dim) + self._width_local = None + + #: Pointwise left (num, dim) + self._left = None + #: Pointwise right (num, dim) + self._right = None + #: Pointwise width (num, dim) + self._width = None + + def update_bounds(self, num=None): + """ + Creates ``self._right``, ``self._left``, ``self._width``. + + :param int num: Determinzes shape of pointwise bounds (num, dim) + + """ + if num == None: + num = self._values.shape[0] + self._left = np.repeat([self._domain[:, 0]], num, 0) + self._right = np.repeat([self._domain[:, 1]], num, 0) + self._width = self._right-self._left + + def update_bounds_local(self, local_num=None): + """ + Creates local versions of ``self._right``, ``self._left``, + ``self._width`` (``self._right_local``, ``self._left_local``, + ``self._width_local``). + + :param int local_num: Determinzes shape of local pointwise bounds + (local_num, dim) + + """ + if local_num == None: + local_num = self._values_local.shape[0] + self._left_local = np.repeat([self._domain[:, 0]], local_num, 0) + self._right_local = np.repeat([self._domain[:, 1]], local_num, 0) + self._width_local = self._right_local-self._left_local + + def append_values(self, values): + """ + Appends the values in ``_values`` to ``self._values``. + + .. seealso:: + + :meth:`numpy.concatenate` + + :param values: values to append + :type values: :class:`numpy.ndarray` of shape (some_num, dim) + """ + self._values = np.concatenate((self._values, + util.fix_dimensions_data(values)), 0) + + def append_values_local(self, values_local): + """ + Appends the values in ``_values_local`` to ``self._values``. + + .. seealso:: + + :meth:`numpy.concatenate` + + :param values_local: values to append + :type values_local: :class:`numpy.ndarray` of shape (some_num, dim) + """ + self._values_local = np.concatenate((self._values_local, + util.fix_dimensions_data(values_local)), 0) + + def check_num(self): + """ + + Checks that the number of entries in ``self._values``, + ``self._volumes``, ``self._probabilities``, ``self._jacobians``, and + ``self._error_estimates`` all match (assuming the named array exists). + + :rtype: int + :returns: num + + """ + num = None + for array_name in sample_set.array_names: + current_array = getattr(self, array_name) + if current_array is not None: + if num is None: + num = current_array.shape[0] + first_array = array_name + else: + if num != current_array.shape[0]: + raise length_not_matching("length of {} inconsistent \ + with {}".format(array_name, + first_array)) + if self._values is not None and self._values.shape[1] != self._dim: + raise dim_not_matching("dimension of values incorrect") + return num + + def get_dim(self): + """ + + Return the dimension of the sample space. + + :rtype: int + :returns: Dimension of the sample space. + + """ + return self._dim + + def set_bounding_box(self): + """ + Set the bounding box of the values. + """ + mins = np.min(self._values, axis=0) + maxes = np.max(self._values, axis=0) + self._bounding_box = np.vstack((mins, maxes)).transpose() + pass + + def get_bounding_box(self): + """ + Get the bounding box of the values. + """ + if self._bounding_box is None: + self.set_bounding_box() + return self._bounding_box + + def set_values(self, values): + """ + Sets the sample values. + + :param values: sample values + :type values: :class:`numpy.ndarray` of shape (num, dim) + + """ + self._values = util.fix_dimensions_data(values) + if self._values.shape[1] != self._dim: + raise dim_not_matching("dimension of values incorrect") + + def get_values(self): + """ + Returns sample values. + + :rtype: :class:`numpy.ndarray` + :returns: sample values + + """ + return self._values + + def set_domain(self, domain): + """ + Sets the domain. + + :param domain: Sample domain + :type domain: :class:`numpy.ndarray` of shape (dim, 2) + + """ + if (domain.shape[0], 2) != (self._dim, 2): + raise dim_not_matching("dimension of values incorrect") + else: + self._domain = domain + + def get_domain(self): + """ + Returns the sample domain, + + :rtype: :class:`numpy.ndarray` of shape (dim, 2) + :returns: Sample domain + + """ + return self._domain + + def set_volumes(self, volumes): + """ + Sets sample cell volumes. + + :type volumes: :class:`numpy.ndarray` of shape (num,) + :param volumes: sample cell volumes + + """ + self._volumes = volumes + + def get_volumes(self): + """ + Returns sample cell volumes. + + :rtype: :class:`numpy.ndarray` of shape (num,) + :returns: sample cell volumes + + """ + return self._volumes + + def set_probabilities(self, probabilities): + """ + Set sample probabilities. + + :type probabilities: :class:`numpy.ndarray` of shape (num,) + :param probabilities: sample probabilities + + """ + self._probabilities = probabilities + + def get_probabilities(self): + """ + Returns sample probabilities. + + :rtype: :class:`numpy.ndarray` of shape (num,) + :returns: sample probabilities + + """ + return self._probabilities + + def set_jacobians(self, jacobians): + """ + Returns sample jacobians. + + :type jacobians: :class:`numpy.ndarray` of shape (num, other_dim, dim) + :param jacobians: sample jacobians + + """ + self._jacobians = jacobians + + def get_jacobians(self): + """ + Returns sample jacobians. + + :rtype: :class:`numpy.ndarray` of shape (num, other_dim, dim) + :returns: sample jacobians + + """ + return self._jacobians + + def append_jacobians(self, new_jacobians): + """ + Appends the ``new_jacobians`` to ``self._jacobians``. + + .. note:: + + Remember to update the other member attribute arrays so that + :meth:`~sample.sample.check_num` does not fail. + + :param new_jacobians: New jacobians to append. + :type new_jacobians: :class:`numpy.ndarray` of shape (num, other_dim, + dim) + + """ + self._jacobians = np.concatenate((self._jacobians, new_jacobians), + axis=0) + + def set_error_estimates(self, error_estimates): + """ + Returns sample error estimates. + + :type error_estimates: :class:`numpy.ndarray` of shape (num,) + :param error_estimates: sample error estimates + + """ + self._error_estimates = error_estimates + + def get_error_estimates(self): + """ + Returns sample error_estimates. + + :rtype: :class:`numpy.ndarray` of shape (num,) + :returns: sample error_estimates + + """ + return self._error_estimates + + def append_error_estimates(self, new_error_estimates): + """ + Appends the ``new_error_estimates`` to ``self._error_estimates``. + + .. note:: + + Remember to update the other member attribute arrays so that + :meth:`~sample.sample.check_num` does not fail. + + :param new_error_estimates: New error_estimates to append. + :type new_error_estimates: :class:`numpy.ndarray` of shape (num,) + + """ + self._error_estimates = np.concatenate((self._error_estimates, + new_error_estimates), axis=0) + + + def set_values_local(self, values_local): + """ + Sets the local sample values. + + :param values_local: sample local values + :type values_local: :class:`numpy.ndarray` of shape (local_num, dim) + + """ + self._values_local = util.fix_dimensions_data(values_local) + if self._values_local.shape[1] != self._dim: + raise dim_not_matching("dimension of values incorrect") + pass + + def set_kdtree(self): + """ + Creates a :class:`scipy.spatial.KDTree` for this set of samples. + """ + self._kdtree = spatial.KDTree(self._values) + self._kdtree_values = self._kdtree.data + + def get_kdtree(self): + """ + Returns a :class:`scipy.spatial.KDTree` for this set of samples. + + :rtype: :class:`scipy.spatial.KDTree` + :returns: :class:`scipy.spatial.KDTree` for this set of samples. + + """ + return self._kdtree + + def get_values_local(self): + """ + Returns sample local values. + + :rtype: :class:`numpy.ndarray` + :returns: sample local values + + """ + return self._values_local + + def set_volumes_local(self, volumes_local): + """ + Sets local sample cell volumes. + + :type volumes_local: :class:`numpy.ndarray` of shape (num,) + :param volumes_local: local sample cell volumes + + """ + self._volumes_local = volumes_local + pass + + def get_volumes_local(self): + """ + Returns sample local volumes. + + :rtype: :class:`numpy.ndarray` + :returns: sample local volumes + + """ + return self._volumes_local + + def set_probabilities_local(self, probabilities_local): + """ + Set sample local probabilities. + + :type probabilities_local: :class:`numpy.ndarray` of shape (num,) + :param probabilities_local: local sample probabilities + + """ + self._probabilities_local = probabilities_local + pass + + def get_probabilities_local(self): + """ + Returns sample local probablities. + + :rtype: :class:`numpy.ndarray` + :returns: sample local probablities + + """ + + return self._probabilities_local + + def set_jacobians_local(self, jacobians_local): + """ + Returns local sample jacobians. + + :type jacobians_local: :class:`numpy.ndarray` of shape (num, other_dim, + dim) + :param jacobians_local: local sample jacobians + + """ + self._jacobians_local = jacobians_local + pass + + def get_jacobians_local(self): + """ + Returns local sample jacobians. + + :rtype: :class:`numpy.ndarray` of shape (num, other_dim, dim) + :returns: local sample jacobians + + """ + return self._jacobians_local + + def set_error_estimates_local(self, error_estimates_local): + """ + Returns local sample error estimates. + + :type error_estimates_local: :class:`numpy.ndarray` of shape (num,) + :param error_estimates_local: local sample error estimates + + """ + self._error_estimates_local = error_estimates_local + pass + + def get_error_estimates_local(self): + """ + Returns sample error_estimates_local. + + :rtype: :class:`numpy.ndarray` of shape (num,) + :returns: sample error_estimates_local + + """ + return self._error_estimates_local + + def local_to_global(self): + """ + Makes global arrays from available local ones. + """ + for array_name in sample_set.array_names: + current_array_local = getattr(self, array_name + "_local") + if current_array_local is not None: + setattr(self, array_name, + util.get_global_values(current_array_local)) + def query(self, x): + """ + Identify which value points x are associated with for discretization. + + :param x: points for query + :type x: :class:`numpy.ndarray` of shape (*, dim) + """ + pass + + def estimate_volume(self, n_mc_points=int(1E4)): + """ + Calculate the volume faction of cells approximately using Monte + Carlo integration. + + .. todo:: + + This currently presumes a uniform Lesbegue measure on the + ``domain``. Currently the way this is written + ``emulated_input_sample_set`` is NOT used to calculate the volume. + This should at least be an option. + + :param int n_mc_points: If estimate is True, number of MC points to use + """ + num = self.check_num() + n_mc_points_local = (n_mc_points/comm.size) + \ + (comm.rank < n_mc_points%comm.size) + width = self._domain[:, 1] - self._domain[:, 0] + mc_points = width*np.random.random((n_mc_points_local, + self._domain.shape[0])) + self._domain[:, 0] + (_, emulate_ptr) = self.query(mc_points) + vol = np.zeros((num,)) + for i in range(num): + vol[i] = np.sum(np.equal(emulate_ptr, i)) + cvol = np.copy(vol) + comm.Allreduce([vol, MPI.DOUBLE], [cvol, MPI.DOUBLE], op=MPI.SUM) + vol = cvol + vol = vol/float(n_mc_points) + self._volumes = vol + self.global_to_local() + + def estimate_volume_mc(self): + """ + Give all cells the same volume fraction based on the Monte Carlo + assumption. + """ + num = self.check_num() + self._volumes = 1.0/float(num)*np.ones((num,)) + self.global_to_local() + + def global_to_local(self): + """ + Makes local arrays from available global ones. + """ + num = self.check_num() + global_index = np.arange(num, dtype=np.int) + self._local_index = np.array_split(global_index, comm.size)[comm.rank] + for array_name in sample_set.array_names: + current_array = getattr(self, array_name) + if current_array is not None: + setattr(self, array_name + "_local", + np.array_split(current_array, comm.size)[comm.rank]) + + def copy(self): + """ + Makes a copy using :meth:`numpy.copy`. + + :rtype: :class:`~bet.sample.sample_set` + :returns: Copy of this :class:`~bet.sample.sample_set` + + """ + my_copy = sample_set(self.get_dim()) + for array_name in sample_set.all_ndarray_names: + current_array = getattr(self, array_name) + if current_array is not None: + setattr(my_copy, array_name, + np.copy(current_array)) + for vector_name in sample_set.vector_names: + if vector_name is not "_dim": + current_vector = getattr(self, vector_name) + if current_vector is not None: + setattr(my_copy, vector_name, np.copy(current_vector)) + if self._kdtree is not None: + my_copy.set_kdtree() + return my_copy + + def shape(self): + """ + + Returns the shape of ``self._values`` + + :rtype: tuple + :returns: (num, dim) + + """ + return self._values.shape + + def shape_local(self): + """ + + Returns the shape of ``self._values_local`` + + :rtype: tuple + :returns: (local_num, dim) + + """ + return self._values_local.shape + + def calculate_volumes(self): + """ + + Calculate the volumes of cells. Depends on sample set type. + + """ + +def save_discretization(save_disc, file_name, discretization_name=None): + """ + Saves this :class:`bet.sample.discretization` as a ``.mat`` file. Each + attribute is added to a dictionary of names and arrays which are then + saved to a MATLAB-style file. + + :param save_disc: sample set to save + :type save_disc: :class:`bet.sample.discretization` + :param string file_name: Name of the ``.mat`` file, no extension is + needed. + :param string discretization_name: String to prepend to attribute names when + saving multiple :class`bet.sample.discretization` objects to a single + ``.mat`` file + + """ + new_mdat = dict() + + if discretization_name is None: + discretization_name = 'default' + + for attrname in discretization.sample_set_names: + curr_attr = getattr(save_disc, attrname) + if curr_attr is not None: + if attrname in discretization.sample_set_names: + save_sample_set(curr_attr, file_name, + discretization_name+attrname) + + for attrname in discretization.vector_names: + curr_attr = getattr(save_disc, attrname) + if curr_attr is not None: + new_mdat[discretization_name+attrname] = curr_attr + + if comm.rank == 0: + if os.path.exists(file_name) or os.path.exists(file_name+'.mat'): + mdat = sio.loadmat(file_name) + for i, v in new_mdat.iteritems(): + mdat[i] = v + sio.savemat(file_name, mdat) + else: + sio.savemat(file_name, new_mdat) + +def load_discretization(file_name, discretization_name=None): + """ + Loads a :class:`~bet.sample.discretization` from a ``.mat`` file. If a file + contains multiple :class:`~bet.sample.discretization` objects then + ``discretization_name`` is used to distinguish which between different + :class:`~bet.sample.discretization` objects. + + :param string file_name: Name of the ``.mat`` file, no extension is + needed. + :param string discretization_name: String to prepend to attribute names when + saving multiple :class`bet.sample.discretization` objects to a single + ``.mat`` file + + :rtype: :class:`~bet.sample.discretization` + :returns: the ``discretization`` that matches the ``discretization_name`` + """ + mdat = sio.loadmat(file_name) + if discretization_name is None: + discretization_name = 'default' + + input_sample_set = load_sample_set(file_name, + discretization_name+'_input_sample_set') + + output_sample_set = load_sample_set(file_name, + discretization_name+'_output_sample_set') + + loaded_disc = discretization(input_sample_set, output_sample_set) + + for attrname in discretization.sample_set_names: + if attrname is not '_input_sample_set' and \ + attrname is not '_output_sample_set': + setattr(loaded_disc, attrname, load_sample_set(file_name, + discretization_name+attrname)) + + for attrname in discretization.vector_names: + if discretization_name+attrname in mdat.keys(): + setattr(loaded_disc, attrname, + np.squeeze(mdat[discretization_name+attrname])) + return loaded_disc + +class voronoi_sample_set(sample_set_base): + """ + + A data structure containing arrays specific to a set of samples defining + a Voronoi tesselation. + + """ + def __init__(self, dim, p_norm=2): + sample_set_base.__init__(self, dim) + #: p-norm to use for nearest neighbor search + self.p_norm = p_norm + + def query(self, x): + """ + Identify which value points x are associated with for discretization. + + :param x: points for query + :type x: :class:`numpy.ndarray` of shape (*, dim) + + :rtype: tuple + :returns: (dist, ptr) + """ + if self._kdtree is None: + self.set_kdtree() + else: + self.check_num() + + + #TODO add exception if dimensions of x are wrong + (dist, ptr) = self._kdtree.query(x, p=self.p_norm) + return (dist, ptr) + + def exact_volume_1D(self, distribution='uniform', a=None, b=None): + r""" + + Exactly calculates the volume fraction of the Voronoic cells. + Specifically we are calculating + :math:`\mu_\Lambda(\mathcal(V)_{i,N} \cap A)/\mu_\Lambda(\Lambda)`. + + :param string distribution: Probability distribution (uniform, normal, + truncnorm, beta) + :param float a: mean or alpha (normal/truncnorm, beta) + :param float b: covariance or beta (normal/truncnorm, beta) + """ + self.check_num() + if self._dim != 1: + raise dim_not_matching("Only applicable for 1D domains.") + + # sort the samples + sort_ind = np.squeeze(np.argsort(self._values, 0)) + sorted_samples = self._values[sort_ind] + domain_width = self._domain[:, 1] - self._domain[:, 0] + + # determine the mid_points which are the edges of the associated voronoi + # cells and bound the cells by the domain + edges = np.concatenate(([self._domain[:, 0]], (sorted_samples[:-1, :] +\ + sorted_samples[1:, :])*.5, [self._domain[:, 1]])) + if distribution == 'normal': + edges = scipy.stats.norm.cdf(edges, loc=a, scale=np.sqrt(b)) + elif distribution == 'truncnorm': + l = (self._domain[:, 0] - a) / np.sqrt(b) + r = (self._domain[:, 1] - a) / np.sqrt(b) + edges = scipy.stats.truncnorm.cdf(edges, a=l, b=r, loc=a, + scale=np.sqrt(b)) + elif distribution == 'beta': + edges = scipy.stats.beta.cdf(edges, a=a, b=b, + loc=self._domain[:, 0], scale=domain_width) + # calculate difference between right and left of each cell and + # renormalize + sorted_lam_vol = np.squeeze(edges[1:, :] - edges[:-1, :]) + lam_vol = np.zeros(sorted_lam_vol.shape) + lam_vol[sort_ind] = sorted_lam_vol + if distribution == 'uniform': + lam_vol = lam_vol/domain_width + self._volumes = lam_vol + self.global_to_local() + + def estimate_local_volume(self, num_l_emulate_local=100, + max_num_l_emulate=1e3): + r""" + + Estimates the volume fraction of the Voronoice cells associated + with ``samples``. Specifically we are calculating + :math:`\mu_\Lambda(\mathcal(V)_{i,N} \cap A)/\mu_\Lambda(\Lambda)`. + Here all of the samples are drawn from the generalized Lp uniform + distribution. + + .. note :: + + If this :class:`~bet.sample.voronoi_sample_set` has exact/estimated + radii of the Voronoi cell associated with each sample for a domain + normalized to the unit hypercube (``_normalized_radii``). + + .. todo :: + + When we move away from domains defined on hypercubes this will need + to be updated to use whatever ``_in_domain`` method exists. + + Volume of the L-p ball is obtained from Wang, X.. (2005). Volumes of + Generalized Unit Balls. Mathematics Magazine, 78(5), 390-395. + `DOI 10.2307/30044198 `_ + + :param int num_l_emulate_local: The number of emulated samples. + :param int max_num_l_emulate: Maximum number of local emulated samples + + """ + self.check_num() + # normalize the samples + samples = np.copy(self.get_values()) + self.update_bounds() + samples = samples - self._left + samples = samples/self._width + + kdtree = spatial.KDTree(samples) + + # for each sample determine the appropriate radius of the Lp ball (this + # should be the distance to the farthest neighboring Voronoi cell) + # calculating this exactly is hard so we will estimate it as follows + # TODO it is unclear whether to use min, mean, or the first n nearest + # samples + sample_radii = None + if hasattr(self, '_normalized_radii'): + sample_radii = np.copy(getattr(self, '_normalized_radii')) + + if sample_radii is None: + # Calculate the pairwise distances + if not np.isinf(self.p_norm): + pairwise_distance = spatial.distance.pdist(samples, + p=self.p_norm) + else: + pairwise_distance = spatial.distance.pdist(samples, p='chebyshev') + pairwise_distance = spatial.distance.squareform(pairwise_distance) + pairwise_distance_ma = np.ma.masked_less_equal(pairwise_distance, 0.) + # Calculate mean, std of pairwise distances + sample_radii = np.std(pairwise_distance_ma, 0)*3 + elif np.sum(sample_radii <=0) > 0: + # Calculate the pairwise distances + if not np.isinf(self.p_norm): + pairwise_distance = spatial.distance.pdist(samples, + p=self.p_norm) + else: + pairwise_distance = spatial.distance.pdist(samples, p='chebyshev') + pairwise_distance = spatial.distance.squareform(pairwise_distance) + pairwise_distance_ma = np.ma.masked_less_equal(pairwise_distance, 0.) + # Calculate mean, std of pairwise distances + # TODO this may be too large/small + # Estimate radius as 2.*STD of the pairwise distance + sample_radii[sample_radii <= 0] = np.std(pairwise_distance_ma, 0)*2. + + # determine the volume of the Lp ball + if not np.isinf(self.p_norm): + sample_Lp_ball_vol = sample_radii**self._dim * \ + scipy.special.gamma(1+1./self.p_norm) / \ + scipy.special.gamma(1+float(self._dim)/self.p_norm) + else: + sample_Lp_ball_vol = (2.0*sample_radii)**self._dim + + # Set up local arrays for parallelism + self.global_to_local() + lam_vol_local = np.zeros(self._local_index.shape) + + # parallize + for i, iglobal in enumerate(self._local_index): + samples_in_cell = 0 + total_samples = 10 + while samples_in_cell < num_l_emulate_local and \ + total_samples < max_num_l_emulate: + total_samples = total_samples*10 + # Sample within an Lp ball until num_l_emulate_local samples are + # present in the Voronoi cell + local_lambda_emulate = lp.Lp_generalized_uniform(self._dim, + total_samples, self.p_norm, scale=sample_radii[iglobal], + loc=samples[iglobal]) + + # determine the number of samples in the Voronoi cell (intersected + # with the input_domain) + if self._domain is not None: + inside = np.all(np.logical_and(local_lambda_emulate >= 0.0, + local_lambda_emulate <= 1.0), 1) + local_lambda_emulate = local_lambda_emulate[inside] + + (_, emulate_ptr) = kdtree.query(local_lambda_emulate, + p=self.p_norm, + distance_upper_bound=sample_radii[iglobal]) + + samples_in_cell = np.sum(np.equal(emulate_ptr, iglobal)) + + # the volume for the Voronoi cell corresponding to this sample is the + # the volume of the Lp ball times the ratio + # "num_samples_in_cell/num_total_local_emulated_samples" + lam_vol_local[i] = sample_Lp_ball_vol[iglobal]*float(samples_in_cell)\ + /float(total_samples) + + self.set_volumes_local(lam_vol_local) + self.local_to_global() + + # normalize by the volume of the input_domain + domain_vol = np.sum(self.get_volumes()) + self.set_volumes(self._volumes / domain_vol) + self.set_volumes_local(self._volumes_local / domain_vol) + + +class sample_set(voronoi_sample_set): + """ + Set Voronoi cells as the default for now. + """ + +class discretization(object): + """ + A data structure to store all of the :class:`~bet.sample.sample_set` + objects and associated pointers to solve an stochastic inverse problem. + """ + #: List of attribute names for attributes which are vectors or 1D + #: :class:`numpy.ndarray` + vector_names = ['_io_ptr', '_io_ptr_local', '_emulated_ii_ptr', + '_emulated_ii_ptr_local', '_emulated_oo_ptr', '_emulated_oo_ptr_local'] + #: List of attribute names for attributes that are + #: :class:`sample.sample_set`` + sample_set_names = ['_input_sample_set', '_output_sample_set', + '_emulated_input_sample_set', '_emulated_output_sample_set', + '_output_probability_set'] + + + def __init__(self, input_sample_set, output_sample_set, + output_probability_set=None, + emulated_input_sample_set=None, + emulated_output_sample_set=None): + #: Input sample set :class:`~bet.sample.sample_set` + self._input_sample_set = input_sample_set + #: Output sample set :class:`~bet.sample.sample_set` + self._output_sample_set = output_sample_set + #: Emulated Input sample set :class:`~bet.sample.sample_set` + self._emulated_input_sample_set = emulated_input_sample_set + #: Emulated output sample set :class:`~bet.sample.sample_set` + self._emulated_output_sample_set = emulated_output_sample_set + #: Output probability set :class:`~bet.sample.sample_set` + self._output_probability_set = output_probability_set + #: Pointer from ``self._output_sample_set`` to + #: ``self._output_probability_set`` + self._io_ptr = None + #: Pointer from ``self._emulated_input_sample_set`` to + #: ``self._input_sample_set`` + self._emulated_ii_ptr = None + #: Pointer from ``self._emulated_output_sample_set`` to + #: ``self._output_probability_set`` + self._emulated_oo_ptr = None + #: local io pointer for parallelism + self._io_ptr_local = None + #: local emulated ii ptr for parallelsim + self._emulated_ii_ptr_local = None + #: local emulated oo ptr for parallelism + self._emulated_oo_ptr_local = None + if output_sample_set is not None: + self.check_nums() + else: + logging.info("No output_sample_set") + + def check_nums(self): + """ + + Checks that ``self._input_sample_set`` and ``self._output_sample_set`` + both have the same number of samples. + + :rtype: int + :returns: Number of samples + + """ + out_num = self._output_sample_set.check_num() + in_num = self._input_sample_set.check_num() + if out_num != in_num: + raise length_not_matching("input and output lengths do not match") + else: + return in_num + + def set_io_ptr(self, globalize=True): + """ + + Creates the pointer from ``self._output_sample_set`` to + ``self._output_probability_set`` + + :param bool globalize: flag whether or not to globalize + ``self._output_sample_set`` + + """ + if self._output_sample_set._values_local is None: + self._output_sample_set.global_to_local() + if self._output_probability_set._kdtree is None: + self._output_probability_set.set_kdtree() + (_, self._io_ptr_local) = self._output_probability_set.query(\ + self._output_sample_set._values_local) + + if globalize: + self._io_ptr = util.get_global_values(self._io_ptr_local) + + def get_io_ptr(self): + """ + + Returns the pointer from ``self._output_sample_set`` to + ``self._output_probability_set`` + + .. seealso:: + + :meth:`scipy.spatial.KDTree.query`` + + :rtype: :class:`numpy.ndarray` of int of shape + (self._output_sample_set._values.shape[0],) + :returns: self._io_ptr + + """ + return self._io_ptr + + def set_emulated_ii_ptr(self, globalize=True): + """ + + Creates the pointer from ``self._emulated_input_sample_set`` to + ``self._input_sample_set`` + + .. seealso:: + + :meth:`scipy.spatial.KDTree.query`` + + :param bool globalize: flag whether or not to globalize + ``self._output_sample_set`` + :param int p: Which Minkowski p-norm to use. (1 <= p <= infinity) + + """ + if self._emulated_input_sample_set._values_local is None: + self._emulated_input_sample_set.global_to_local() + if self._input_sample_set._kdtree is None: + self._input_sample_set.set_kdtree() + (_, self._emulated_ii_ptr_local) = self._input_sample_set.query(\ + self._emulated_input_sample_set._values_local) + if globalize: + self._emulated_ii_ptr = util.get_global_values\ + (self._emulated_ii_ptr_local) + + def get_emulated_ii_ptr(self): + """ + + Returns the pointer from ``self._emulated_input_sample_set`` to + ``self._input_sample_set`` + + .. seealso:: + + :meth:`scipy.spatial.KDTree.query`` + + :rtype: :class:`numpy.ndarray` of int of shape + (self._output_sample_set._values.shape[0],) + :returns: self._emulated_ii_ptr + + """ + return self._emulated_ii_ptr + + def set_emulated_oo_ptr(self, globalize=True): + """ + + Creates the pointer from ``self._emulated_output_sample_set`` to + ``self._output_probability_set`` + + .. seealso:: + + :meth:`scipy.spatial.KDTree.query`` + + :param bool globalize: flag whether or not to globalize + ``self._output_sample_set`` + :param int p: Which Minkowski p-norm to use. (1 <= p <= infinity) + + """ + if self._emulated_output_sample_set._values_local is None: + self._emulated_output_sample_set.global_to_local() + if self._output_probability_set._kdtree is None: + self._output_probability_set.set_kdtree() + (_, self._emulated_oo_ptr_local) = self._output_probability_set.query(\ + self._emulated_output_sample_set._values_local) + + if globalize: + self._emulated_oo_ptr = util.get_global_values\ + (self._emulated_oo_ptr_local) + + def get_emulated_oo_ptr(self): + """ + + Returns the pointer from ``self._emulated_output_sample_set`` to + ``self._output_probabilityset`` + + .. seealso:: + + :meth:`scipy.spatial.KDTree.query`` + + :rtype: :class:`numpy.ndarray` of int of shape + (self._output_sample_set._values.shape[0],) + :returns: self._emulated_ii_ptr + + """ + return self._emulated_oo_ptr + + def copy(self): + """ + Makes a copy using :meth:`numpy.copy`. + + :rtype: :class:`~bet.sample.discretization` + :returns: Copy of this :class:`~bet.sample.discretization` + + """ + my_copy = discretization(self._input_sample_set.copy(), + self._output_sample_set.copy()) + + for attrname in discretization.sample_set_names: + if attrname is not '_input_sample_set' and \ + attrname is not '_output_sample_set': + curr_sample_set = getattr(self, attrname) + if curr_sample_set is not None: + setattr(my_copy, attrname, curr_sample_set.copy()) + + for array_name in discretization.vector_names: + current_array = getattr(self, array_name) + if current_array is not None: + setattr(my_copy, array_name, np.copy(current_array)) + return my_copy diff --git a/bet/sampling/LpGeneralizedSamples.py b/bet/sampling/LpGeneralizedSamples.py new file mode 100644 index 00000000..10d3356e --- /dev/null +++ b/bet/sampling/LpGeneralizedSamples.py @@ -0,0 +1,79 @@ +# Copyright (C) 2016 The BET Development Team + +# Lindley Graham 05/19/2016 + +""" + +This module provides methods to sample from Lp generalized normal, uniform, and +beta distributions on the nD ball. + +Adapted from natter.LpSphericallySymmetric.py https://github.com/fabiansinz/natter + +""" + +import numpy as np + +def Lp_generalized_normal(dim, num, p=2, scale=1.0, loc=None): + """ + + Generate samples from an Lp generalized normal distribution. + + :param float p: 0 < p < infinity, p for the lp norm + :param int dim: Dimension of the space + :param int num: Number of samples to generate + + """ + p = float(p) + z = np.random.gamma(1./p, scale=scale, size=(num, dim)) + z = np.abs(z)**(1./p) + samples = z * np.sign(np.random.randn(num, dim)) + if loc is not None: + samples = samples + loc + return samples + +def Lp_generalized_uniform(dim, num, p=2, scale=1.0, loc=None): + """ + + Generate samples from an Lp generalized uniform distribution. + + :param p: 0 < p <= infinity, p for the lp norm where infinitiy is `np.inf` + :param int dim: Dimension of the space + :param int num: Number of samples to generate + + """ + if not np.isinf(p): + p = float(p) + # sample from a p-generalized normal with scale 1 + samples = Lp_generalized_normal(dim, num, p) + samples_norm = np.sum(np.abs(samples)**p, axis=1)**(1./p) + samples = samples/np.reshape(samples_norm, (num, 1)) + r = np.random.beta(a=dim, b=1., size=(num,1)) + samples = samples * r * scale + else: + samples = (np.random.random((num, dim))-.5)*2.0 * scale + if loc is not None: + samples = samples + loc + return samples + +def Lp_generalized_beta(dim, num, p=2, d=2, scale=1.0, loc=None): + """ + + Generate samples from an Lp generalized beta distribution. When p=d then + this is simly the Lp generalized uniform distribution. + + :param float p: 0 < p < infinity, p for the lp norm + :param float d: shape parameter + :param int dim: Dimension of the space + :param int num: Number of samples to generate + + """ + p = float(p) + # sample from a p-generalized normal with scale 1 + samples = Lp_generalized_normal(dim, num, p) + samples_norm = np.sum(np.abs(samples)**p, axis=1)**(1./p) + samples = samples/np.reshape(samples_norm, (num, 1)) + r = np.random.beta(a=dim/p, b=d/p, size=(num,1))**(1./p) + samples = samples * r * scale + if loc is not None: + samples = samples + loc + return samples diff --git a/bet/sampling/__init__.py b/bet/sampling/__init__.py index 8ecbed8f..4b0de127 100644 --- a/bet/sampling/__init__.py +++ b/bet/sampling/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ This subpackage contains @@ -11,4 +11,4 @@ * :class:`bet.sampling.adaptiveSampling` inherits from :class:`~bet.sampling.basicSampling` adaptively generates samples. """ -__all__ = ['basicSampling', 'adaptiveSampling'] +__all__ = ['basicSampling', 'adaptiveSampling', 'LpGeneralizedSamples'] diff --git a/bet/sampling/adaptiveSampling.py b/bet/sampling/adaptiveSampling.py index f593a323..787014d0 100644 --- a/bet/sampling/adaptiveSampling.py +++ b/bet/sampling/adaptiveSampling.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team # -*- coding: utf-8 -*- # Lindley Graham 3/10/2014 @@ -16,9 +16,9 @@ import scipy.io as sio import bet.sampling.basicSampling as bsam import bet.util as util -import math, os, glob -from bet.Comm import comm, MPI - +import math, os, glob, logging +from bet.Comm import comm +import bet.sample as sample def loadmat(save_file, lb_model=None): """ @@ -30,61 +30,51 @@ def loadmat(save_file, lb_model=None): ndim), and returns data (N, mdim) :rtype: tuple - :returns: (sampler, samples, data) + :returns: (sampler, discretization) """ # load the data from a *.mat file mdat = sio.loadmat(save_file) - # load the samples - if mdat.has_key('samples'): - samples = mdat['samples'] - num_samples = samples.shape[0] - else: - samples = None - num_samples = np.squeeze(mdat['num_samples']) - # load the data - if mdat.has_key('data'): - data = mdat['data'] - else: - data = None + # load the discretization + discretization = sample.load_discretization(save_file) + num_samples = np.squeeze(mdat['num_samples']) # recreate the sampler new_sampler = sampler(num_samples, np.squeeze(mdat['chain_length']), lb_model) - - return (new_sampler, samples, data) + return (new_sampler, discretization) class sampler(bsam.sampler): """ This class provides methods for adaptive sampling of parameter space to provide samples to be used by algorithms to solve inverse problems. - chain_length - number of batches of samples - num_chains - number of samples per batch (either a single int or a list of int) - lb_model - :class:`~bet.loadBalance.load_balance` runs the model at a given set of - parameter samples and returns data - """ def __init__(self, num_samples, chain_length, lb_model): """ Initialization - - :param int num_samples: Total number of samples - :param int chain_length: Number of samples per chain - :param lb_model: runs the model at a given set of parameter samples, (N, - ndim), and returns data (N, mdim) + + :param int num_samples: total number of samples + :param int chain_length: number of batches of samples + :param callable lb_model: runs the model at a given set of parameter + samples, (N, ndim), and returns data (N, mdim) """ super(sampler, self).__init__(lb_model, num_samples) + #: number of batches of samples self.chain_length = chain_length + #: number of samples per processor per batch (either a single int or a + #: list of int) self.num_chains_pproc = int(math.ceil(num_samples/\ float(chain_length*comm.size))) + #: number of samples per batch (either a single int or a list of int) self.num_chains = comm.size * self.num_chains_pproc + #: Total number of samples self.num_samples = chain_length * self.num_chains + #: runs the model at a given set of parameter samples, (N, + #: ndim), and returns data (N, mdim) self.lb_model = lb_model + #: batch number for this particular chain self.sample_batch_no = np.repeat(range(self.num_chains), chain_length, 0) @@ -100,7 +90,7 @@ def update_mdict(self, mdict): mdict['num_chains'] = self.num_chains mdict['sample_batch_no'] = self.sample_batch_no - def run_gen(self, kern_list, rho_D, maximum, param_min, param_max, + def run_gen(self, kern_list, rho_D, maximum, input_domain, t_set, savefile, initial_sample_type="lhs", criterion='center'): """ Generates samples using generalized chains and a list of different @@ -112,10 +102,8 @@ def run_gen(self, kern_list, rho_D, maximum, param_min, param_max, :type rho_D: callable function that takes a :class:`numpy.ndarray` and returns a :class:`numpy.ndarray` :param float maximum: maximum value of rho_D - :param param_min: minimum value for each parameter dimension - :type param_min: :class:`numpy.ndarray` (ndim,) - :param param_max: maximum value for each parameter dimension - :type param_max: :class:`numpy.ndarray` (ndim,) + :param input_domain: min, max value for each input dimension + :type input_domain: :class:`numpy.ndarray` (ndim, 2) :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` @@ -126,7 +114,7 @@ def run_gen(self, kern_list, rho_D, maximum, param_min, param_max, `PyDOE `_ :rtype: tuple - :returns: ((samples, data), all_step_ratios, num_high_prob_samples, + :returns: (discretization, , num_high_prob_samples, sorted_incidices_of_num_high_prob_samples, average_step_ratio) """ @@ -136,18 +124,19 @@ def run_gen(self, kern_list, rho_D, maximum, param_min, param_max, results_rD = list() mean_ss = list() for kern in kern_list: - (samples, data, step_sizes) = self.generalized_chains( - param_min, param_max, t_set, kern, savefile, + (discretization, step_sizes) = self.generalized_chains( + input_domain, t_set, kern, savefile, initial_sample_type, criterion) - results.append((samples, data)) + results.append(discretization) r_step_size.append(step_sizes) - results_rD.append(int(sum(rho_D(data)/maximum))) + results_rD.append(int(sum(rho_D(discretization._output_sample_set.\ + get_values())/maximum))) mean_ss.append(np.mean(step_sizes)) sort_ind = np.argsort(results_rD) return (results, r_step_size, results_rD, sort_ind, mean_ss) def run_tk(self, init_ratio, min_ratio, max_ratio, rho_D, maximum, - param_min, param_max, kernel, savefile, + input_domain, kernel, savefile, initial_sample_type="lhs", criterion='center'): """ Generates samples using generalized chains and @@ -164,10 +153,8 @@ def run_tk(self, init_ratio, min_ratio, max_ratio, rho_D, maximum, :type rho_D: callable function that takes a :class:`numpy.ndarray` and returns a :class:`numpy.ndarray` :param float maximum: maximum value of rho_D - :param param_min: minimum value for each parameter dimension - :type param_min: :class:`numpy.ndarray` (ndim,) - :param param_max: maximum value for each parameter dimension - :type param_max: :class:`numpy.ndarray` (ndim,) + :param input_domain: min, max value for each input dimension + :type input_domain: :class:`numpy.ndarray` (ndim, 2) :param kernel: functional that acts on the data used to determine the proposed change to the ``step_size`` :type kernel: :class:`bet.sampling.adaptiveSampling.kernel` object. @@ -178,7 +165,7 @@ def run_tk(self, init_ratio, min_ratio, max_ratio, rho_D, maximum, `PyDOE `_ :rtype: tuple - :returns: ((samples, data), all_step_ratios, num_high_prob_samples, + :returns: (discretization, , num_high_prob_samples, sorted_incidices_of_num_high_prob_samples, average_step_ratio) """ @@ -188,18 +175,19 @@ def run_tk(self, init_ratio, min_ratio, max_ratio, rho_D, maximum, mean_ss = list() for i, j, k in zip(init_ratio, min_ratio, max_ratio): ts = transition_set(i, j, k) - (samples, data, step_sizes) = self.generalized_chains( - param_min, param_max, ts, kernel, savefile, + (discretization, step_sizes) = self.generalized_chains( + input_domain, ts, kernel, savefile, initial_sample_type, criterion) - results.append((samples, data)) + results.append(discretization) r_step_size.append(step_sizes) - results_rD.append(int(sum(rho_D(data)/maximum))) + results_rD.append(int(sum(rho_D(discretization._output_sample_set.\ + get_values())/maximum))) mean_ss.append(np.mean(step_sizes)) sort_ind = np.argsort(results_rD) return (results, r_step_size, results_rD, sort_ind, mean_ss) def run_inc_dec(self, increase, decrease, tolerance, rho_D, maximum, - param_min, param_max, t_set, savefile, + input_domain, t_set, savefile, initial_sample_type="lhs", criterion='center'): """ Generates samples using generalized chains and @@ -214,10 +202,8 @@ def run_inc_dec(self, increase, decrease, tolerance, rho_D, maximum, :type rho_D: callable function that takes a :class:`numpy.ndarray` and returns a :class:`numpy.ndarray` :param float maximum: maximum value of rho_D - :param param_min: minimum value for each parameter dimension - :type param_min: :class:`numpy.ndarray` (ndim,) - :param param_max: maximum value for each parameter dimension - :type param_max: :class:`numpy.ndarray` (ndim,) + :param input_domain: min, max value for each input dimension + :type input_domain: :class:`numpy.ndarray` (ndim, 2) :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` @@ -228,28 +214,34 @@ def run_inc_dec(self, increase, decrease, tolerance, rho_D, maximum, `PyDOE `_ :rtype: tuple - :returns: ((samples, data), all_step_ratios, num_high_prob_samples, + :returns: (discretization, , num_high_prob_samples, sorted_incidices_of_num_high_prob_samples, average_step_ratio) """ kern_list = list() for i, j, z in zip(increase, decrease, tolerance): kern_list.append(rhoD_kernel(maximum, rho_D, i, j, z)) - return self.run_gen(kern_list, rho_D, maximum, param_min, param_max, + return self.run_gen(kern_list, rho_D, maximum, input_domain, t_set, savefile, initial_sample_type, criterion) - def generalized_chains(self, param_min, param_max, t_set, kern, + def generalized_chains(self, input_obj, t_set, kern, savefile, initial_sample_type="random", criterion='center', hot_start=0): """ Basic adaptive sampling algorithm using generalized chains. + + .. todo:: + + Test HOTSTART from parallel files using different and same num proc :param string initial_sample_type: type of initial sample random (or r), latin hypercube(lhs), or space-filling curve(TBD) - :param param_min: minimum value for each parameter dimension - :type param_min: :class:`numpy.ndarray` (ndim,) - :param param_max: maximum value for each parameter dimension - :type param_max: :class:`numpy.ndarray` (ndim,) + :param input_obj: Either a :class:`bet.sample.sample_set` object for an + input space, an array of min and max bounds for the input values + with ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]``, + or the dimension of an input space + :type input_obj: :class: `~bet.sample.sample_set`, + :class:`numpy.ndarray` of shape (ndim, 2), or :class: `int` :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` @@ -266,10 +258,10 @@ def generalized_chains(self, param_min, param_max, t_set, kern, `PyDOE `_ :rtype: tuple - :returns: (``parameter_samples``, ``data_samples``, - ``all_step_ratios``) where ``parameter_samples`` is np.ndarray of - shape (num_samples, ndim), ``data_samples`` is np.ndarray of shape - (num_samples, mdim), and ``all_step_ratios`` is np.ndarray of shape + :returns: (``discretization``, + ``all_step_ratios``) where ``discretization`` is a + :class:`~bet.sample.discretization` object containing + ``num_samples`` and ``all_step_ratios`` is np.ndarray of shape (num_chains, chain_length) """ @@ -277,152 +269,150 @@ def generalized_chains(self, param_min, param_max, t_set, kern, psavefile = os.path.join(os.path.dirname(savefile), "proc{}_{}".format(comm.rank, os.path.basename(savefile))) - # Initialize Nx1 vector Step_size = something reasonable (based on size - # of domain and transition set type) - # Calculate domain size - param_left = np.repeat([param_min], self.num_chains_pproc, 0) - param_right = np.repeat([param_max], self.num_chains_pproc, 0) - - param_width = param_right - param_left # Calculate step_size max_ratio = t_set.max_ratio min_ratio = t_set.min_ratio if not hot_start: + logging.info("COLD START") step_ratio = t_set.init_ratio*np.ones(self.num_chains_pproc) # Initiative first batch of N samples (maybe taken from latin # hypercube/space-filling curve to fully explore parameter space - # not necessarily random). Call these Samples_old. - (samples_old, data_old) = super(sampler, self).random_samples( - initial_sample_type, param_min, param_max, savefile, + disc_old = super(sampler, self).create_random_discretization( + initial_sample_type, input_obj, savefile, self.num_chains, criterion) self.num_samples = self.chain_length * self.num_chains comm.Barrier() - # now split it all up - if comm.size > 1: - MYsamples_old = np.empty((np.shape(samples_old)[0]/comm.size, - np.shape(samples_old)[1])) - comm.Scatter([samples_old, MPI.DOUBLE], [MYsamples_old, - MPI.DOUBLE]) - MYdata_old = np.empty((np.shape(data_old)[0]/comm.size, - np.shape(data_old)[1])) - comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE]) - else: - MYsamples_old = np.copy(samples_old) - MYdata_old = np.copy(data_old) + # populate local values + disc_old._input_sample_set.global_to_local() + disc_old._output_sample_set.global_to_local() + input_old = disc_old._input_sample_set.copy() + + disc = disc_old.copy() + all_step_ratios = step_ratio + + (kern_old, proposal) = kern.delta_step(disc_old.\ + _output_sample_set.get_values_local(), None) - samples = MYsamples_old - data = MYdata_old - all_step_ratios = step_ratio - (kern_old, proposal) = kern.delta_step(MYdata_old, None) start_ind = 1 if hot_start: # LOAD FILES if hot_start == 1: # HOT START FROM PARTIAL RUN if comm.rank == 0: - print "HOT START from partial run" + logging.info("HOT START from partial run") # Find and open save files save_dir = os.path.dirname(savefile) base_name = os.path.dirname(savefile) mdat_files = glob.glob(os.path.join(save_dir, "proc*_{}".format(base_name))) if len(mdat_files) == 0: - print "HOT START using serial file" + logging.info("HOT START using serial file") mdat = sio.loadmat(savefile) - samples = mdat['samples'] - data = mdat['data'] + disc = sample.load_discretization(savefile) kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) - chain_length = samples.shape[0]/self.num_chains + chain_length = disc.check_nums()/self.num_chains if all_step_ratios.shape == (self.num_chains, - chain_length): - print "Serial file, from completed run updating hot_start" + chain_length): + msg = "Serial file, from completed" + msg += " run updating hot_start" hot_start = 2 # reshape if parallel if comm.size > 1: - samples = np.reshape(samples, (self.num_chains, - chain_length, -1), 'F') - data = np.reshape(data, (self.num_chains, - chain_length, -1), 'F') + temp_input = np.reshape(disc._input_sample_set.\ + get_values(), (self.num_chains, + chain_length, -1), 'F') + temp_output = np.reshape(disc._output_sample_set.\ + get_values(), (self.num_chains, + chain_length, -1), 'F') all_step_ratios = np.reshape(all_step_ratios, - (self.num_chains, -1), 'F') + (self.num_chains, -1), 'F') elif hot_start == 1 and len(mdat_files) == comm.size: - print "HOT START using parallel files (same nproc)" + logging.info("HOT START using parallel files (same nproc)") # if the number of processors is the same then set mdat to # be the one with the matching processor number (doesn't # really matter) mdat = sio.loadmat(mdat_files[comm.rank]) - samples = mdat['samples'] - data = mdat['data'] + disc = sample.load_discretization(mdat_files[comm.rank]) kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) elif hot_start == 1 and len(mdat_files) != comm.size: - print "HOT START using parallel files (diff nproc)" + logging.info("HOT START using parallel files (diff nproc)") # Determine how many processors the previous data used # otherwise gather the data from mdat and then scatter # among the processors and update mdat mdat_files_local = comm.scatter(mdat_files) mdat_local = [sio.loadmat(m) for m in mdat_files_local] + disc_local = [sample.load_discretization(m) for m in\ + mdat_files_local] mdat_list = comm.allgather(mdat_local) + disc_list = comm.allgather(disc_local) mdat_global = [] + disc_global = [] # instead of a list of lists, create a list of mdat - for mlist in mdat_list: + for mlist, dlist in zip(mdat_list, disc_list): mdat_global.extend(mlist) + disc_global.extend(dlist) # get num_proc and num_chains_pproc for previous run old_num_proc = max((len(mdat_list), 1)) old_num_chains_pproc = self.num_chains/old_num_proc # get batch size and/or number of dimensions - chain_length = mdat_global[0]['samples'].shape[0]/\ + chain_length = disc_global[0].check_nums()/\ old_num_chains_pproc + disc = disc_global[0].copy() # create lists of local data - samples = [] - data = [] + temp_input = [] + temp_output = [] all_step_ratios = [] kern_old = [] # RESHAPE old_num_chains_pproc, chain_length(or batch), dim - for mdat in mdat_global: - samples.append(np.reshape(mdat['samples'], - (old_num_chains_pproc, chain_length, -1), 'F')) - data.append(np.reshape(mdat['data'], - (old_num_chains_pproc, chain_length, -1), 'F')) + for mdat, disc_local in zip(mdat_global, disc_local): + temp_input.append(np.reshape(disc_local.\ + _input_sample_set.get_values_local(), + (old_num_chains_pproc, chain_length, -1), 'F')) + temp_output.append(np.reshape(disc_local.\ + _output_sample_set.get_values_local(), + (old_num_chains_pproc, chain_length, -1), 'F')) all_step_ratios.append(np.reshape(mdat['step_ratios'], (old_num_chains_pproc, chain_length, -1), 'F')) kern_old.append(np.reshape(mdat['kern_old'], (old_num_chains_pproc,), 'F')) # turn into arrays - samples = np.concatenate(samples) - data = np.concatenate(data) + temp_input = np.concatenate(temp_input) + temp_output = np.concatenate(temp_output) all_step_ratios = np.concatenate(all_step_ratios) kern_old = np.concatenate(kern_old) if hot_start == 2: # HOT START FROM COMPLETED RUN: if comm.rank == 0: - print "HOT START from completed run" + logging.info("HOT START from completed run") mdat = sio.loadmat(savefile) - samples = mdat['samples'] - data = mdat['data'] + disc = sample.load_discretization(savefile) kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) - chain_length = samples.shape[0]/self.num_chains - mdat_files = [] + chain_length = disc.check_nums()/self.num_chains # reshape if parallel if comm.size > 1: - samples = np.reshape(samples, (self.num_chains, - chain_length, -1), 'F') - data = np.reshape(data, (self.num_chains, - chain_length, -1), 'F') + temp_input = np.reshape(disc._input_sample_set.\ + get_values(), (self.num_chains, chain_length, + -1), 'F') + temp_output = np.reshape(disc._output_sample_set.\ + get_values(), (self.num_chains, chain_length, + -1), 'F') all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, chain_length), 'F') # SPLIT DATA IF NECESSARY if comm.size > 1 and (hot_start == 2 or (hot_start == 1 and \ len(mdat_files) != comm.size)): - # Use split to split along num_chains - samples = np.reshape(np.split(samples, comm.size, - 0)[comm.rank], (self.num_chains_pproc*chain_length, -1), - 'F') - data = np.reshape(np.split(data, comm.size, 0)[comm.rank], - (self.num_chains_pproc*chain_length, -1), 'F') + # Use split to split along num_chains and set *._values_local + disc._input_sample_set.set_values_local(np.reshape(np.split(\ + temp_input, comm.size, 0)[comm.rank], + (self.num_chains_pproc*chain_length, -1), 'F')) + disc._output_sample_set.set_values_local(np.reshape(np.split(\ + temp_output, comm.size, 0)[comm.rank], + (self.num_chains_pproc*chain_length, -1), 'F')) all_step_ratios = np.reshape(np.split(all_step_ratios, comm.size, 0)[comm.rank], (self.num_chains_pproc*chain_length,), 'F') @@ -430,28 +420,35 @@ def generalized_chains(self, param_min, param_max, t_set, kern, 0)[comm.rank], (self.num_chains_pproc,), 'F') else: all_step_ratios = np.reshape(all_step_ratios, (-1,), 'F') - # Set samples, data, all_step_ratios, mdat, step_ratio, - # MYsamples_old, and kern_old accordingly + # MAKE SURE ARRAYS ARE LOCALIZED FROM HERE ON OUT WILL ONLY + # OPERATE ON _local_values + # Set mdat, step_ratio, input_old, start_ind appropriately step_ratio = all_step_ratios[-self.num_chains_pproc:] - MYsamples_old = samples[-self.num_chains_pproc:, :] + input_old = sample.sample_set(disc._input_sample_set.get_dim()) + input_old.set_domain(disc._input_sample_set.get_domain()) + input_old.set_values_local(disc._input_sample_set.\ + get_values_local()[-self.num_chains_pproc:, :]) + # Determine how many batches have been run - start_ind = samples.shape[0]/self.num_chains_pproc + start_ind = disc._input_sample_set.get_values_local().\ + shape[0]/self.num_chains_pproc mdat = dict() self.update_mdict(mdat) + input_old.update_bounds_local() + for batch in xrange(start_ind, self.chain_length): # For each of N samples_old, create N new parameter samples using - # transition set and step_ratio. Call these samples samples_new. - samples_new = t_set.step(step_ratio, param_width, - param_left, param_right, MYsamples_old) + # transition set and step_ratio. Call these samples input_new. + input_new = t_set.step(step_ratio, input_old) - # Solve the model for the samples_new. - data_new = self.lb_model(samples_new) + # Solve the model for the input_new. + output_new_values = self.lb_model(input_new.get_values_local()) # Make some decision about changing step_size(k). There are # multiple ways to do this. # Determine step size - (kern_old, proposal) = kern.delta_step(data_new, kern_old) + (kern_old, proposal) = kern.delta_step(output_new_values, kern_old) step_ratio = proposal*step_ratio # Is the ratio greater than max? step_ratio[step_ratio > max_ratio] = max_ratio @@ -462,31 +459,27 @@ def generalized_chains(self, param_min, param_max, t_set, kern, if self.chain_length < 4: pass elif comm.rank == 0 and (batch+1)%(self.chain_length/4) == 0: - print "Current chain length: "+\ - str(batch+1)+"/"+str(self.chain_length) - samples = np.concatenate((samples, samples_new)) - data = np.concatenate((data, data_new)) + logging.info("Current chain length: "+\ + str(batch+1)+"/"+str(self.chain_length)) + disc._input_sample_set.append_values_local(input_new.\ + get_values_local()) + disc._output_sample_set.append_values_local(output_new_values) all_step_ratios = np.concatenate((all_step_ratios, step_ratio)) mdat['step_ratios'] = all_step_ratios - mdat['samples'] = samples - mdat['data'] = data mdat['kern_old'] = kern_old + if comm.size > 1: - super(sampler, self).save(mdat, psavefile) + super(sampler, self).save(mdat, psavefile, disc) else: - super(sampler, self).save(mdat, savefile) - MYsamples_old = samples_new + super(sampler, self).save(mdat, savefile, disc) + input_old = input_new # collect everything - MYsamples = np.copy(samples) - MYdata = np.copy(data) - MYall_step_ratios = np.copy(all_step_ratios) - # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) - samples = util.get_global_values(MYsamples, - shape=(self.num_samples, np.shape(MYsamples)[1])) - # and ``data_samples`` is np.ndarray of shape (num_samples, mdim) - data = util.get_global_values(MYdata, shape=(self.num_samples, - np.shape(MYdata)[1])) + disc._input_sample_set.update_bounds_local() + disc._input_sample_set.local_to_global() + disc._output_sample_set.local_to_global() + + MYall_step_ratios = np.copy(all_step_ratios) # ``all_step_ratios`` is np.ndarray of shape (num_chains, # chain_length) all_step_ratios = util.get_global_values(MYall_step_ratios, @@ -496,13 +489,12 @@ def generalized_chains(self, param_min, param_max, t_set, kern, # save everything mdat['step_ratios'] = all_step_ratios - mdat['samples'] = samples - mdat['data'] = data mdat['kern_old'] = util.get_global_values(kern_old, shape=(self.num_chains,)) - super(sampler, self).save(mdat, savefile) + if comm.rank == 0: + super(sampler, self).save(mdat, savefile, disc) - return (samples, data, all_step_ratios) + return (disc, all_step_ratios) def kernels(Q_ref, rho_D, maximum): """ @@ -528,19 +520,12 @@ def kernels(Q_ref, rho_D, maximum): class transition_set(object): """ Basic class that is used to create a step to move from samples_old to - samples_new based. This class generates steps for a random walk using a + input_new based. This class generates steps for a random walk using a very basic algorithm. Future classes will inherit from this one with different implementations of the :meth:~`polysim.run_framework.apdative_sampling.step` method. This basic transition set is designed without a preferential direction. - init_ratio - Initial step size ratio compared to the parameter domain. - min_ratio - Minimum step size compared to the initial step size. - max_ratio - Maximum step size compared to the maximum step size. - """ def __init__(self, init_ratio, min_ratio, max_ratio): @@ -552,55 +537,50 @@ def __init__(self, init_ratio, min_ratio, max_ratio): :param float max_ratio: maximum step_ratio """ + #: float, initial step ratio self.init_ratio = init_ratio + #: float, minimum step_ratio self.min_ratio = min_ratio + #: float, maximum step_ratio self.max_ratio = max_ratio - def step(self, step_ratio, param_width, param_left, param_right, - samples_old): + def step(self, step_ratio, input_old): """ Generate ``num_samples`` new steps using ``step_ratio`` and - ``param_width`` to calculate the ``step size``. Each step will have a + ``input_width`` to calculate the ``step size``. Each step will have a random direction. - :param step_ratio: define maximum step_size = ``step_ratio*param_width`` + :param step_ratio: define maximum step_size = ``step_ratio*input_width`` :type step_ratio: :class:`numpy.ndarray` of shape (num_samples,) - :param param_width: width of the parameter domain - :type param_width: :class:`numpy.ndarray` of shape (ndim,) - :param param_left: minimum boundary of the parameter domain - :type param_left: :class:`numpy.ndarray` of shape (ndim, N) where N is - the length of ``step_ratio`` - :param param_right: maximum boundary of the parameter domain - :type param_right: :class:`numpy.ndarray` of shape (ndim, N) where N is - the length of ``step_ratio`` - :param samples_old: Parameter samples from the previous step. - :type samples_old: :class:`~numpy.ndarray` of shape (num_samples, + :param input_old: Input from the previous step. + :type input_old: :class:`~numpy.ndarray` of shape (num_samples, ndim) :rtype: :class:`numpy.ndarray` of shape (num_samples, ndim) - :returns: samples_new + :returns: input_new """ # calculate maximum step size - step_size = np.repeat([step_ratio], param_width.shape[1], - 0).transpose()*param_width + step_size = np.repeat([step_ratio], input_old.get_dim(), + 0).transpose()*input_old._width_local # check to see if step will take you out of parameter space # calculate maximum proposed step - samples_right = samples_old + 0.5*step_size - samples_left = samples_old - 0.5*step_size + my_right = input_old.get_values_local() + 0.5*step_size + my_left = input_old.get_values_local() - 0.5*step_size # Is the new sample greaters than the right limit? - far_right = samples_right >= param_right - far_left = samples_left <= param_left - # If the samples could leave the domain then truncate the box defining + far_right = my_right >= input_old._right_local + far_left = my_left <= input_old._left_local + # If the input could leave the domain then truncate the box defining # the step_size - samples_right[far_right] = param_right[far_right] - samples_left[far_left] = param_left[far_left] - samples_width = samples_right-samples_left - #samples_center = (samples_right+samples_left)/2.0 - samples_new = samples_width * np.random.random(samples_old.shape) - samples_new = samples_new + samples_left - - return samples_new + my_right[far_right] = input_old._right_local[far_right] + my_left[far_left] = input_old._left_local[far_left] + my_width = my_right-my_left + #input_center = (input_right+input_left)/2.0 + input_new_values = my_width * np.random.random(input_old.shape_local()) + input_new_values = input_new_values + my_left + input_new = input_old.copy() + input_new.set_values_local(input_new_values) + return input_new class kernel(object): """ @@ -609,13 +589,6 @@ class kernel(object): this is simply a skeleton parent class it does not change the step size at all. - TOL - a tolerance used to determine if two different values are close - increase - the multiple to increase the step size by - decrease - the multiple to decrease the step size by - """ def __init__(self, tolerance=1E-08, increase=1.0, decrease=1.0): @@ -627,23 +600,26 @@ def __init__(self, tolerance=1E-08, increase=1.0, decrease=1.0): :param float decrease: The multiple to decrease the step size by """ + #: float, Tolerance for comparing two values self.TOL = tolerance + #: float, The multiple to increase the step size by self.increase = increase + #: float, The multiple to decrease the step size by self.decrease = decrease - def delta_step(self, data_new, kern_old=None): + def delta_step(self, output_new, kern_old=None): """ This method determines the proposed change in step size. - :param data_new: QoI for a given batch of samples - :type data_new: :class:`numpy.ndarray` of shape (num_chains, mdim) + :param output_new: QoI for a given batch of samples + :type output_new: :class:`numpy.ndarray` of shape (num_chains, mdim) :param kern_old: kernel evaluated at previous step :rtype: typle :returns: (kern_new, proposal) """ - return (None, np.ones((data_new.shape[0],))) + return (kern_old, np.ones((output_new.shape[0],))) class rhoD_kernel(kernel): """ @@ -651,22 +627,11 @@ class rhoD_kernel(kernel): determine inverse regions of high probability accurately (in terms of getting the measure correct). This class provides a method for determining the proposed change in step size as follows. We check if the QoI at each of - the samples_new(k) are closer or farther away from a region of high + the input_new(k) are closer or farther away from a region of high probability in D than the QoI at samples_old(k). For example, if they are closer, then we can reduce the step_size(k) by 1/2. Note: This only works well with smooth rho_D. - maximum - maximum value of rho_D on D - rho_D - probability density on D - tolerance - a tolerance used to determine if two different values are close - increase - the multiple to increase the step size by - decrease - the multiple to decrease the step size by - """ def __init__(self, maximum, rho_D, tolerance=1E-08, increase=2.0, @@ -681,17 +646,20 @@ def __init__(self, maximum, rho_D, tolerance=1E-08, increase=2.0, :param float decrease: The multiple to decrease the step size by """ + #: float, maximum value of rho_D self.MAX = maximum + #: callable, function, probability density on D self.rho_D = rho_D + #: bool, flag sort order self.sort_ascending = False super(rhoD_kernel, self).__init__(tolerance, increase, decrease) - def delta_step(self, data_new, kern_old=None): + def delta_step(self, output_new, kern_old=None): """ This method determines the proposed change in step size. - :param data_new: QoI for a given batch of samples - :type data_new: :class:`numpy.ndarray` of shape (num_chains, mdim) + :param output_new: QoI for a given batch of samples + :type output_new: :class:`numpy.ndarray` of shape (num_chains, mdim) :param kern_old: kernel evaluated at previous step :rtype: tuple @@ -699,7 +667,7 @@ def delta_step(self, data_new, kern_old=None): """ # Evaluate kernel for new data. - kern_new = self.rho_D(data_new) + kern_new = self.rho_D(output_new) if kern_old is None: return (kern_new, None) @@ -728,22 +696,10 @@ class maxima_kernel(kernel): the goal is to determine inverse regions of high probability accurately (in terms of getting the measure correct). This class provides a method for determining the proposed change in step size as follows. We check if the - QoI at each of the samples_new(k) are closer or farther away from a region + QoI at each of the input_new(k) are closer or farther away from a region of high probability in D than the QoI at samples_old(k). For example, if they are closer, then we can reduce the step_size(k) by 1/2. - maxima - locations of the maxima of rho_D on D - :class:`numpy.ndarray` of shape (num_maxima, mdim) - rho_max - rho_D(maxima), list of maximum values of rho_D - tolerance - a tolerance used to determine if two different values are close - increase - the multiple to increase the step size by - decrease - the multiple to decrease the step size by - """ def __init__(self, maxima, rho_D, tolerance=1E-08, increase=2.0, @@ -761,18 +717,22 @@ def __init__(self, maxima, rho_D, tolerance=1E-08, increase=2.0, :param float decrease: The multiple to decrease the step size by """ + #: locations of the maxima of rho_D on D self.MAXIMA = maxima + #: int, number of maxima self.num_maxima = maxima.shape[0] + #: list of maximum values of rho_D self.rho_max = rho_D(maxima) super(maxima_kernel, self).__init__(tolerance, increase, decrease) + #: bool, flag sort order self.sort_ascending = True - def delta_step(self, data_new, kern_old=None): + def delta_step(self, output_new, kern_old=None): """ This method determines the proposed change in step size. - :param data_new: QoI for a given batch of samples - :type data_new: :class:`numpy.ndarray` of shape (num_chains, mdim) + :param output_new: QoI for a given batch of samples + :type output_new: :class:`numpy.ndarray` of shape (num_chains, mdim) :param kern_old: kernel evaluated at previous step :rtype: tuple @@ -780,11 +740,11 @@ def delta_step(self, data_new, kern_old=None): """ # Evaluate kernel for new data. - kern_new = np.zeros((data_new.shape[0])) + kern_new = np.zeros((output_new.shape[0])) - for i in xrange(data_new.shape[0]): + for i in xrange(output_new.shape[0]): # calculate distance from each of the maxima - vec_from_maxima = np.repeat([data_new[i, :]], self.num_maxima, 0) + vec_from_maxima = np.repeat([output_new[i, :]], self.num_maxima, 0) vec_from_maxima = vec_from_maxima - self.MAXIMA # weight distances by 1/rho_D(maxima) dist_from_maxima = np.linalg.norm(vec_from_maxima, 2, @@ -818,21 +778,9 @@ class maxima_mean_kernel(maxima_kernel): the goal is to determine inverse regions of high probability accurately (in terms of getting the measure correct). This class provides a method for determining the proposed change in step size as follows. We check if the - QoI at each of the samples_new(k) are closer or farther away from a region + QoI at each of the input_new(k) are closer or farther away from a region of high probability in D than the QoI at samples_old(k). For example, if they are closer, then we can reduce the step_size(k) by 1/2. - - maxima - locations of the maxima of rho_D on D - np.array of shape (num_maxima, mdim) - rho_max - rho_D(maxima), list of maximum values of rho_D - tolerance - a tolerance used to determine if two different values are close - increase - the multiple to increase the step size by - decrease - the multiple to decrease the step size by """ @@ -851,8 +799,11 @@ def __init__(self, maxima, rho_D, tolerance=1E-08, increase=2.0, :param float decrease: The multiple to decrease the step size by """ + #: approximate radius self.radius = None + #: approximate mean self.mean = None + #: current number of estimates for approx. mean, radius self.current_clength = 0 super(maxima_mean_kernel, self).__init__(maxima, rho_D, tolerance, increase, decrease) @@ -866,12 +817,12 @@ def reset(self): self.mean = None self.current_clength = 0 - def delta_step(self, data_new, kern_old=None): + def delta_step(self, output_new, kern_old=None): """ This method determines the proposed change in step size. - :param data_new: QoI for a given batch of samples - :type data_new: :class:`numpy.ndarray` of shape (num_chains, mdim) + :param output_new: QoI for a given batch of samples + :type output_new: :class:`numpy.ndarray` of shape (num_chains, mdim) :param kern_old: kernel evaluated at previous step :rtype: tuple @@ -879,12 +830,12 @@ def delta_step(self, data_new, kern_old=None): """ # Evaluate kernel for new data. - kern_new = np.zeros((data_new.shape[0])) + kern_new = np.zeros((output_new.shape[0])) self.current_clength = self.current_clength + 1 - for i in xrange(data_new.shape[0]): + for i in xrange(output_new.shape[0]): # calculate distance from each of the maxima - vec_from_maxima = np.repeat([data_new[i, :]], self.num_maxima, 0) + vec_from_maxima = np.repeat([output_new[i, :]], self.num_maxima, 0) vec_from_maxima = vec_from_maxima - self.MAXIMA # weight distances by 1/rho_D(maxima) dist_from_maxima = np.linalg.norm(vec_from_maxima, 2, @@ -893,21 +844,21 @@ def delta_step(self, data_new, kern_old=None): kern_new[i] = np.min(dist_from_maxima) if kern_old is None: # calculate the mean - self.mean = np.mean(data_new, 0) + self.mean = np.mean(output_new, 0) # calculate the distance from the mean - vec_from_mean = data_new - np.repeat([self.mean], - data_new.shape[0], 0) + vec_from_mean = output_new - np.repeat([self.mean], + output_new.shape[0], 0) # estimate the radius of D self.radius = np.max(np.linalg.norm(vec_from_mean, 2, 1)) return (kern_new, None) else: # update the estimate of the mean - self.mean = (self.current_clength-1)*self.mean + np.mean(data_new, + self.mean = (self.current_clength-1)*self.mean + np.mean(output_new, 0) self.mean = self.mean / self.current_clength # calculate the distance from the mean - vec_from_mean = data_new - np.repeat([self.mean], - data_new.shape[0], 0) + vec_from_mean = output_new - np.repeat([self.mean], + output_new.shape[0], 0) # esitmate the radius of D self.radius = max(np.max(np.linalg.norm(vec_from_mean, 2, 1)), self.radius) diff --git a/bet/sampling/basicSampling.py b/bet/sampling/basicSampling.py index 4d3113f4..9af6d322 100644 --- a/bet/sampling/basicSampling.py +++ b/bet/sampling/basicSampling.py @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team # Lindley Graham 4/15/2014 """ @@ -13,37 +13,34 @@ import numpy as np import scipy.io as sio from pyDOE import lhs -import bet.util as util from bet.Comm import comm +import bet.sample as sample +import bet.util as util +import collections -def loadmat(save_file, model=None): +def loadmat(save_file, disc_name=None, model=None): """ Loads data from ``save_file`` into a :class:`~bet.basicSampling.sampler` object. :param string save_file: file name + :param string disc_name: name of :class:`~bet.sample.discretization` in + file :param model: runs the model at a given set of parameter samples and returns data + :type model: callable + :rtype: tuple - :returns: (sampler, samples, data) + :returns: (sampler, discretization) """ # load the data from a *.mat file mdat = sio.loadmat(save_file) - # load the samples - if mdat.has_key('samples'): - samples = mdat['samples'] - num_samples = samples.shape[0] - else: - samples = None - num_samples = None - # load the data - if mdat.has_key('data'): - data = mdat['data'] - else: - data = None + num_samples = mdat['num_samples'] + # load the discretization + discretization = sample.load_discretization(save_file, disc_name) loaded_sampler = sampler(model, num_samples) - return (loaded_sampler, samples, data) + return (loaded_sampler, discretization) class sampler(object): """ @@ -54,8 +51,8 @@ class sampler(object): total number of samples OR list of number of samples per dimension such that total number of samples is prob(num_samples) lb_model - :class:`~bet.loadBalance.load_balance` runs the model at a given set of - parameter samples and returns data + callable function that runs the model at a given set of input and + returns output """ def __init__(self, lb_model, num_samples=None): """ @@ -63,12 +60,19 @@ def __init__(self, lb_model, num_samples=None): :param lb_model: Interface to physics-based model takes an input of shape (N, ndim) and returns an output of shape (N, mdim) + :type lb_model: callable function :param int num_samples: N, number of samples (optional) """ + #: int, total number of samples OR list of number of samples per + #: dimension such that total number of samples is prob(num_samples) self.num_samples = num_samples + #: callable function that runs the model at a given set of input and + #: returns output + #: parameter samples and returns data + self.lb_model = lb_model - def save(self, mdict, save_file): + def save(self, mdict, save_file, discretization=None): """ Save matrices to a ``*.mat`` file for use by ``MATLAB BET`` code and :meth:`~bet.sampling.loadmat` @@ -78,6 +82,8 @@ def save(self, mdict, save_file): """ sio.savemat(save_file, mdict, do_compression=True) + if discretization is not None: + sample.save_discretization(discretization, save_file) def update_mdict(self, mdict): """ @@ -88,8 +94,8 @@ def update_mdict(self, mdict): """ mdict['num_samples'] = self.num_samples - def random_samples(self, sample_type, param_min, param_max, - savefile, num_samples=None, criterion='center', parallel=False): + def random_sample_set(self, sample_type, input_sample_set, + num_samples=None, criterion='center'): """ Sampling algorithm with three basic options @@ -102,95 +108,322 @@ def random_samples(self, sample_type, param_min, param_max, :param string sample_type: type sampling random (or r), latin hypercube(lhs), regular grid (rg), or space-filling - curve(TBD) - :param param_min: minimum value for each parameter dimension - :type param_min: :class:`numpy.ndarray` (ndim,) - :param param_max: maximum value for each parameter dimension - :type param_max: :class:`numpy.ndarray` (ndim,) - :param string savefile: filename to save samples and data + curve(TBD) + :param input_sample_set: samples to evaluate the model at + :type input_sample_set: :class:`~bet.sample.sample_set` with + num_smaples + :param string savefile: filename to save discretization :param int num_samples: N, number of samples (optional) :param string criterion: latin hypercube criterion see `PyDOE `_ - :param bool parallel: Flag for parallel implementation. Uses - lowercase ``mpi4py`` methods if ``samples.shape[0]`` is not - divisible by ``size``. Default value is ``False``. - :rtype: tuple - :returns: (``parameter_samples``, ``data_samples``) where - ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) - and ``data_samples`` is np.ndarray of shape (num_samples, mdim) + + :rtype: :class:`~bet.sample.sample_set` + :returns: :class:`~bet.sample.sample_Set` object which contains + input ``num_samples`` """ # Create N samples - if num_samples == None: + dim = input_sample_set.get_dim() + + if num_samples is None: num_samples = self.num_samples - param_left = np.repeat([param_min], num_samples, 0) - param_right = np.repeat([param_max], num_samples, 0) - samples = (param_right-param_left) + + if input_sample_set.get_domain() is None: + # create the domain + input_domain = np.array([[0., 1.]]*dim) + input_sample_set.set_domain(input_domain) + # update the bounds based on the number of samples + input_sample_set.update_bounds(num_samples) + input_values = np.copy(input_sample_set._width) if sample_type == "lhs": - samples = samples * lhs(param_min.shape[-1], + input_values = input_values * lhs(dim, num_samples, criterion) elif sample_type == "random" or "r": - samples = samples * np.random.random(param_left.shape) - samples = samples + param_left - return self.user_samples(samples, savefile, parallel) + input_values = input_values * np.random.random(input_values.shape) + input_values = input_values + input_sample_set._left + input_sample_set.set_values(input_values) + + return input_sample_set + + def random_sample_set_domain(self, sample_type, input_domain, + num_samples=None, criterion='center'): + """ + Sampling algorithm with three basic options + + * ``random`` (or ``r``) generates ``num_samples`` samples in + ``lam_domain`` assuming a Lebesgue measure. + * ``lhs`` generates a latin hyper cube of samples. + + Note: This function is designed only for generalized rectangles and + assumes a Lebesgue measure on the parameter space. + + :param string sample_type: type sampling random (or r), + latin hypercube(lhs), regular grid (rg), or space-filling + curve(TBD) + :param input_domain: min and max bounds for the input values, + ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]`` + :type input_domain: :class:`numpy.ndarray` of shape (ndim, 2) + :param string savefile: filename to save discretization + :param int num_samples: N, number of samples (optional) + :param string criterion: latin hypercube criterion see + `PyDOE `_ + + :rtype: :class:`~bet.sample.sample_set` + :returns: :class:`~bet.sample.sample_Set` object which contains + input ``num_samples`` + + """ + # Create N samples + input_sample_set = sample.sample_set(input_domain.shape[0]) + input_sample_set.set_domain(input_domain) + + return self.random_sample_set(sample_type, input_sample_set, + num_samples, criterion) + + def random_sample_set_dimension(self, sample_type, input_dim, + num_samples=None, criterion='center'): + """ + Sampling algorithm with three basic options + + * ``random`` (or ``r``) generates ``num_samples`` samples in + ``lam_domain`` assuming a Lebesgue measure. + * ``lhs`` generates a latin hyper cube of samples. + + Note: A default input space of a hypercube is created and the + Lebesgue measure is assumed on a space of dimension specified + by ``input_dim`` + + :param string sample_type: type sampling random (or r), + latin hypercube(lhs), regular grid (rg), or space-filling + curve(TBD) + :param int input_dim: the dimension of the input space + :param string savefile: filename to save discretization + :param int num_samples: N, number of samples (optional) + :param string criterion: latin hypercube criterion see + `PyDOE `_ + + :rtype: :class:`~bet.sample.sample_set` + :returns: :class:`~bet.sample.sample_Set` object which contains + input ``num_samples`` + + """ + # Create N samples + input_sample_set = sample.sample_set(input_dim) + + return self.random_sample_set(sample_type, input_sample_set, + num_samples, criterion) + + def regular_sample_set(self, input_sample_set, num_samples_per_dim=1): + """ + Sampling algorithm for generating a regular grid of samples taken + on the domain present with input_sample_set (a default unit hypercube + is used if no domain has been specified) + + :param input_sample_set: samples to evaluate the model at + :type input_sample_set: :class:`~bet.sample.sample_set` with + num_smaples + :param num_samples_per_dim: number of samples per dimension + :type num_samples_per_dim: :class: `~numpy.ndarray` of dimension + (input_sample_set._dim,) + + :rtype: :class:`~bet.sample.sample_set` + :returns: :class:`~bet.sample.sample_Set` object which contains + input ``num_samples`` + """ + + # Create N samples + dim = input_sample_set.get_dim() + + if not isinstance(num_samples_per_dim, collections.Iterable): + num_samples_per_dim = num_samples_per_dim * np.ones((dim,)) + if np.any(np.less_equal(num_samples_per_dim, 0)): + print 'Warning: num_smaples_per_dim must be greater than 0' + + self.num_samples = np.product(num_samples_per_dim) + + if input_sample_set.get_domain() is None: + # create the domain + input_domain = np.array([[0., 1.]] * dim) + input_sample_set.set_domain(input_domain) + else: + input_domain = input_sample_set.get_domain() + # update the bounds based on the number of samples + input_sample_set.update_bounds(self.num_samples) + input_values = np.copy(input_sample_set._width) + + vec_samples_dimension = np.empty((dim), dtype=object) + for i in np.arange(0, dim): + vec_samples_dimension[i] = list(np.linspace( + input_domain[i,0], input_domain[i,1], + num_samples_per_dim[i]+2))[1:num_samples_per_dim[i]+1] + + if np.equal(dim, 1): + arrays_samples_dimension = np.array([vec_samples_dimension]) + else: + arrays_samples_dimension = np.meshgrid( + *[vec_samples_dimension[i] for i in np.arange(0, dim)], indexing='ij') + + if np.equal(dim, 1): + input_values = arrays_samples_dimension.transpose() + else: + for i in np.arange(0, dim): + input_values[:,i:i+1] = np.vstack(arrays_samples_dimension[i].flat[:]) + + input_sample_set.set_values(input_values) + + return input_sample_set + + def regular_sample_set_domain(self, input_domain, num_samples_per_dim=1): + """ + Sampling algorithm for generating a regular grid of samples taken + on the domain present with input_sample_set (a default unit hypercube + is used if no domain has been specified) + + :param input_domain: min and max bounds for the input values, + ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]`` + :type input_domain: :class:`numpy.ndarray` of shape (ndim, 2) + :param num_samples_per_dim: number of samples per dimension + :type num_samples_per_dim: :class: `~numpy.ndarray` of dimension + (input_sample_set._dim,) + + :rtype: :class:`~bet.sample.sample_set` + :returns: :class:`~bet.sample.sample_Set` object which contains + input ``num_samples`` - def user_samples(self, samples, savefile, parallel=False): """ - Samples the model at ``samples`` and saves the results. + # Create N samples + input_sample_set = sample.sample_set(input_domain.shape[0]) + input_sample_set.set_domain(input_domain) + + return self.regular_sample_set(input_sample_set, num_samples_per_dim) + + def regular_sample_set_dimension(self, input_dim, num_samples_per_dim=1): + """ + Sampling algorithm for generating a regular grid of samples taken + on a unit hypercube of dimension input_dim + + :param int input_dim: the dimension of the input space + :param num_samples_per_dim: number of samples per dimension + :type num_samples_per_dim: :class: `~numpy.ndarray` of dimension + (input_sample_set._dim,) + + :rtype: :class:`~bet.sample.sample_set` + :returns: :class:`~bet.sample.sample_Set` object which contains + input ``num_samples`` + + """ + # Create N samples + input_sample_set = sample.sample_set(input_dim) + + return self.regular_sample_set(input_sample_set, num_samples_per_dim) + + def compute_QoI_and_create_discretization(self, input_sample_set, + savefile=None, parallel=False): + """ + Samples the model at ``input_sample_set`` and saves the results. Note: There are many ways to generate samples on a regular grid in Numpy and other Python packages. Instead of reimplementing them here we provide sampler that utilizes user specified samples. - :param samples: samples to evaluate the model at - :type samples: :class:`~numpy.ndarray` of shape (num_smaples, ndim) + :param input_sample_set: samples to evaluate the model at + :type input_sample_set: :class:`~bet.sample.sample_set` with + num_smaples :param string savefile: filename to save samples and data - :param bool parallel: Flag for parallel implementation. Uses - lowercase ``mpi4py`` methods if ``samples.shape[0]`` is not - divisible by ``size``. Default value is ``False``. - :rtype: tuple - :returns: (``parameter_samples``, ``data_samples``) where - ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) - and ``data_samples`` is np.ndarray of shape (num_samples, mdim) + :param bool parallel: Flag for parallel implementation. Default value + is ``False``. + + :rtype: :class:`~bet.sample.discretization` + :returns: :class:`~bet.sample.discretization` object which contains + input and output of ``num_samples`` """ # Update the number of samples - self.num_samples = samples.shape[0] + self.num_samples = input_sample_set.check_num() # Solve the model at the samples if not(parallel) or comm.size == 1: - data = self.lb_model(samples) - elif parallel: - my_len = self.num_samples/comm.size - if comm.rank != comm.size-1: - my_index = range(0+comm.rank*my_len, (comm.rank+1)*my_len) + output_values = self.lb_model(\ + input_sample_set.get_values()) + # figure out the dimension of the output + if len(output_values.shape) == 1: + output_dim = 1 else: - my_index = range(0+comm.rank*my_len, self.num_samples) - if len(samples.shape) == 1: - my_samples = samples[my_index] + output_dim = output_values.shape[1] + output_sample_set = sample.sample_set(output_dim) + output_sample_set.set_values(output_values) + elif parallel: + input_sample_set.global_to_local() + local_output_values = self.lb_model(\ + input_sample_set.get_values_local()) + # figure out the dimension of the output + if len(local_output_values.shape) <= 1: + output_dim = 1 else: - my_samples = samples[my_index, :] - my_data = self.lb_model(my_samples) - data = util.get_global_values(my_data) - samples = util.get_global_values(my_samples) + output_dim = local_output_values.shape[1] + output_sample_set = sample.sample_set(output_dim) + output_sample_set.set_values_local(local_output_values) + input_sample_set.local_to_global() + output_sample_set.local_to_global() - # if data or samples are of shape (num_samples,) expand dimensions - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - if len(data.shape) == 1: - data = np.expand_dims(data, axis=1) - + discretization = sample.discretization(input_sample_set, + output_sample_set) mdat = dict() self.update_mdict(mdat) - mdat['samples'] = samples - mdat['data'] = data - if comm.rank == 0: - self.save(mdat, savefile) - - return (samples, data) + if comm.rank == 0 and savefile is not None: + self.save(mdat, savefile, discretization) + comm.barrier() + return discretization + + def create_random_discretization(self, sample_type, input_obj, + savefile=None, num_samples=None, criterion='center', + parallel=False): + """ + Sampling algorithm with three basic options + * ``random`` (or ``r``) generates ``num_samples`` samples in + ``lam_domain`` assuming a Lebesgue measure. + * ``lhs`` generates a latin hyper cube of samples. + Note: This function is designed only for generalized rectangles and + assumes a Lebesgue measure on the parameter space. + + :param string sample_type: type sampling random (or r), + latin hypercube(lhs), regular grid (rg), or space-filling + curve(TBD) + :param input_obj: Either a :class:`bet.sample.sample_set` object for an + input space, an array of min and max bounds for the input values + with ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]``, + or the dimension of an input space + :type input_obj: :class: `~bet.sample.sample_set`, + :class:`numpy.ndarray` of shape (ndim, 2), or :class: `int` + :param string savefile: filename to save discretization + :param int num_samples: N, number of samples (optional) + :param string criterion: latin hypercube criterion see + `PyDOE `_ + :param bool parallel: Flag for parallel implementation. Default value + is ``False``. + + :rtype: :class:`~bet.sample.discretization` + :returns: :class:`~bet.sample.discretization` object which contains + input and output sample sets with ``num_samples`` total samples + """ + # Create N samples + if num_samples is None: + num_samples = self.num_samples + + if isinstance(input_obj, sample.sample_set_base): + input_sample_set = self.random_sample_set(sample_type, input_obj, + num_samples, criterion) + elif isinstance(input_obj, np.ndarray): + input_sample_set = self.random_sample_set_domain(sample_type, + input_obj, num_samples, criterion) + else: + input_sample_set = self.random_sample_set_dimension(sample_type, + input_obj, num_samples, criterion) + return self.compute_QoI_and_create_discretization(input_sample_set, + savefile, parallel) diff --git a/bet/sensitivity/chooseQoIs.py b/bet/sensitivity/chooseQoIs.py index 68a9a340..db7e195d 100644 --- a/bet/sensitivity/chooseQoIs.py +++ b/bet/sensitivity/chooseQoIs.py @@ -1,199 +1,290 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ This module contains functions choosing optimal QoIs to use in the stochastic inverse problem. """ +import logging import numpy as np from itertools import combinations from bet.Comm import comm import bet.util as util from scipy import stats -def calculate_avg_condnum(grad_tensor, qoi_set): +def calculate_avg_measure(input_set, qoi_set=None, bin_measure=None): r""" - Given gradient vectors at some points (centers) in the parameter space and - given a specific set of QoIs, caculate the average condition number of the - matrices formed by the gradient vectors of each QoI map at each center. - - :param grad_tensor: Gradient vectors at each center in the parameter space - :math:`\Lambda` for each QoI map. - :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, - Lambda_dim) where num_centers is the number of points in :math:`\Lambda` - we have approximated the gradient vectors and num_qois is the number of - QoIs we are given. + If you are using ``bin_ratio`` to define the hyperrectangle in the output + space you must must give this method gradient vectors normalized with + respect to the 1-norm. If you are using ``bin_size`` to define the + hyperrectangle in the output space you must give this method the original + gradient vectors. If you also give a ``bin_measure``, this method will + approximate the measure of the region of non-zero probability in the inverse + solution. + Given gradient vectors at some points (centers) in the input space and + given a specific set of QoIs, calculate the expected measure of the + inverse image of a box in the data space using loca linear approximations + of the map Q. + + :param input_set: The input sample set. Make sure the attribute _jacobians + is not None. + :type input_set: :class:`~bet.sample.sample_set` :param list qoi_set: List of QoI indices + :param float bin_measure: The measure of the output_dim hyperrectangle to + invert into the input space :rtype: tuple - :returns: (condnum, singvals) where condnum is a float and singvals - has shape (num_centers, Data_dim) + :returns: (avg_measure, singvals) where avg_measure is a float and singvals + has shape (num_centers, output_dim) """ + + if input_set._jacobians is None: + raise ValueError("You must have jacobians to use this method.") + if qoi_set is None: + G = input_set._jacobians + else: + G = input_set._jacobians[:, qoi_set, :] + if G.shape[1] > G.shape[2]: + raise ValueError("Measure is not defined for more outputs than inputs.\ + Try adding a qoi_set to evaluate the measure of.") + + # If no measure is given, we consider how this set of QoIs will change the + # measure of the unit hypercube. + if bin_measure is None: + bin_measure = 1.0 + # Calculate the singular values of the matrix formed by the gradient # vectors of each QoI map. This gives a set of singular values for each # center. - singvals = np.linalg.svd(grad_tensor[:, qoi_set, :], compute_uv=False) - indz = singvals[:, -1] == 0 - if np.sum(indz) == singvals.shape[0]: - hmean_condnum = np.inf + singvals = np.linalg.svd(G, compute_uv=False) + + # Find the average produt of the singular values over each center, then use + # this to compute the average measure of the inverse solution. + avg_prod_singvals = np.mean(np.prod(singvals, axis=1)) + if avg_prod_singvals == 0: + avg_measure = np.inf else: - singvals[indz, 0] = np.inf - singvals[indz, -1] = 1 - condnums = singvals[:, 0] / singvals[:, -1] - hmean_condnum = stats.hmean(condnums) + avg_measure = bin_measure / avg_prod_singvals - return hmean_condnum, singvals + return avg_measure, singvals -def calculate_avg_volume(grad_tensor, qoi_set, bin_volume=None): +def calculate_avg_skewness(input_set, qoi_set=None): r""" - If you are using ``bin_ratio`` to define the hyperrectangle in the Data - space you must must give this method gradient vectors normalized with - respect to the 1-norm. If you are using ``bin_size`` to define the - hyperrectangle in the Data space you must give this method the original - gradient vectors. If you also give a ``bin_volume``, this method will - approximate the volume of the region of non-zero probability in the inverse - solution. - Given gradient vectors at some points (centers) in the parameter space - and given a specific set of QoIs, calculate the average volume of the - inverse image of a box in the data space assuming the mapping is linear near - each center. - - :param grad_tensor: Gradient vectors at each point of interest in the - parameter space :math:`\Lambda` for each QoI map. - :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, - Lambda_dim) where num_centers is the number of points in :math:`\Lambda` - we have approximated the gradient vectors and num_qois is the number of - QoIs we are given. + Given gradient vectors at some points (centers) in the input space and + given a specific set of QoIs, caculate the average skewness of the arrays + formed by the gradient vectors of each QoI map at each center. + + :param input_set: The input sample set. Make sure the attribute _jacobians + is not None. + :type input_set: :class:`~bet.sample.sample_set` + :param list qoi_set: List of QoI indices + :rtype: tuple + :returns: (hmean_skewG, skewgi) where hmean_skewG is the harmonic mean of + skewness at each center in the input space (float) and skewgi + has shape (num_centers, output_dim) + """ + + if input_set._jacobians is None: + raise ValueError("You must have jacobians to use this method.") + if qoi_set is None: + G = input_set._jacobians + else: + G = input_set._jacobians[:, qoi_set, :] + if G.shape[1] > G.shape[2]: + msg = "Skewness is not defined for more outputs than inputs." + msg += " Try adding a qoi_set to evaluate the skewness of." + raise ValueError(msg) + + num_centers = G.shape[0] + output_dim = G.shape[1] + + # Calculate the singular values of the matrix formed by the gradient + # vectors of each QoI map. This gives a set of singular values for each + # center. + singvals = np.linalg.svd(G, compute_uv=False) + + # The measure of the parallelepipeds defined by the rows of each Jacobian + muG = np.tile(np.prod(singvals, axis=1), [output_dim, 1]).transpose() + + # Calculate the measure of the parallelepipeds defined by the rows of each + # Jacobian if we remove the i'th row. + muGi = np.zeros([num_centers, output_dim]) + for i in range(G.shape[1]): + muGi[:, i] = np.prod(np.linalg.svd(np.delete(G, i, axis=1), + compute_uv=False), axis=1) + + # Find the norm of each gradient vector + normgi = np.linalg.norm(G, axis=2) + + # Find the norm of the new vector, giperp, that is perpendicular to the span + # of the other vectors and defines a parallelepiped of the same measure. + normgiperp = muG / muGi + + # We now calculate the local skewness + skewgi = np.zeros([num_centers, output_dim]) + + # The local skewness is calculated for nonzero giperp + skewgi[normgiperp != 0] = normgi[normgiperp != 0] / \ + normgiperp[normgiperp != 0] + + # If giperp is the zero vector, it is not GD from the rest of the gradient + # vectors, so the skewness is infinity. + skewgi[normgiperp == 0] = np.inf + + # If the norm of giperp is infinity, then the rest of the vector were not GD + # to begin with, so skewness is infinity. + skewgi[normgiperp == np.inf] = np.inf + + # The local skewness is the max skewness of each vector relative the rest + skewG = np.max(skewgi, axis=1) + skewG[np.isnan(skewG)] = np.inf + + # We may have values equal to infinity, so we consider the harmonic mean. + hmean_skewG = stats.hmean(skewG) + + return hmean_skewG, skewgi + +def calculate_avg_condnum(input_set, qoi_set=None): + r""" + Given gradient vectors at some points (centers) in the input space and + given a specific set of QoIs, caculate the average condition number of the + matrices formed by the gradient vectors of each QoI map at each center. + + :param input_set: The input sample set. Make sure the attribute _jacobians + is not None. + :type input_set: :class:`~bet.sample.sample_set` :param list qoi_set: List of QoI indices - :param float bin_volume: The volume of the Data_dim hyperrectangle to - invert into :math:`\Lambda` :rtype: tuple - :returns: (avg_volume, singvals) where avg_volume is a float and singvals - has shape (num_centers, Data_dim) + :returns: (condnum, singvals) where condnum is a float and singvals + has shape (num_centers, output_dim) """ - # If no volume is given, we consider how this set of QoIs we change the - # volume of the unit hypercube. - if bin_volume is None: - bin_volume = 1.0 + + if input_set._jacobians is None: + raise ValueError("You must have jacobians to use this method.") + if qoi_set is None: + G = input_set._jacobians + else: + G = input_set._jacobians[:, qoi_set, :] + if G.shape[1] > G.shape[2]: + msg = "Condition number is not defined for more outputs than inputs." + msg += " Try adding a qoi_set to evaluate the condition number of." + raise ValueError(msg) # Calculate the singular values of the matrix formed by the gradient # vectors of each QoI map. This gives a set of singular values for each # center. - singvals = np.linalg.svd(grad_tensor[:, qoi_set, :], compute_uv=False) - - # Find the average produt of the singular values over each center, then use - # this to compute the average volume of the inverse solution. - avg_prod_singvals = np.mean(np.prod(singvals, axis=1)) - if avg_prod_singvals == 0: - avg_volume = np.inf + singvals = np.linalg.svd(G, compute_uv=False) + indz = singvals[:, -1] == 0 + if np.sum(indz) == singvals.shape[0]: + hmean_condnum = np.inf else: - avg_volume = bin_volume / avg_prod_singvals + singvals[indz, 0] = np.inf + singvals[indz, -1] = 1 + condnums = singvals[:, 0] / singvals[:, -1] + hmean_condnum = stats.hmean(condnums) - return avg_volume, singvals + return hmean_condnum, singvals -def chooseOptQoIs(grad_tensor, qoiIndices=None, num_qois_return=None, - num_optsets_return=None, inner_prod_tol=1.0, volume=False, +def chooseOptQoIs(input_set, qoiIndices=None, num_qois_return=None, + num_optsets_return=None, inner_prod_tol=1.0, measure=False, remove_zeros=True): r""" Given gradient vectors at some points (centers) in the parameter space, a set of QoIs to choose from, and the number of desired QoIs to return, this method returns the ``num_optsets_return`` best sets of QoIs with with - repsect to either the average condition number of the matrix formed by the - gradient vectors of each QoI map, or the average volume of the inverse - problem us this set of QoIs, computed as the product of the singular values + repsect to either the average measure of the matrix formed by the + gradient vectors of each QoI map, OR the average skewness of the inverse + image of this set of QoIs, computed as the product of the singular values of the same matrix. This method is brute force, i.e., if the method is given 10,000 QoIs and told to return the N best sets of 3, it will check all 10,000 choose 3 possible sets. See chooseOptQoIs_large for a less computationally expensive approach. - :param grad_tensor: Gradient vectors at each point of interest in the - parameter space :math:`\Lambda` for each QoI map. - :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, - Lambda_dim) where num_centers is the number of points in :math:`\Lambda` - we have approximated the gradient vectors and num_qois is the total - number of possible QoIs to choose from - :param qoiIndices: Set of QoIs to consider from grad_tensor. Default is - range(0, grad_tensor.shape[1]) + :param input_set: The input sample set. Make sure the attribute _jacobians + is not None. + :type input_set: :class:`~bet.sample.sample_set` + :param qoiIndices: Set of QoIs to consider. Default is + range(0, input_set._jacobians.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int num_qois_return: Number of desired QoIs to use in the - inverse problem. Default is Lambda_dim + inverse problem. Default is input_dim :param int num_optsets_return: Number of best sets to return Default is 10 - :param boolean volume: If measure is True, use ``calculate_avg_volume`` - to determine optimal QoIs + :param boolean measure: If measure is True, use ``calculate_avg_measure`` + to determine optimal QoIs, else use ``calculate_avg_skewness`` :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any - QoIs that have a zero gradient vector at atleast one point in - :math:`\Lambda`. + QoIs that have a zero gradient. :rtype: `np.ndarray` of shape (num_optsets_returned, num_qois_returned + 1) - :returns: condnum_indices_mat + :returns: measure_skewness_indices_mat """ - (condnum_indices_mat, _) = chooseOptQoIs_verbose(grad_tensor, - qoiIndices, num_qois_return, num_optsets_return, inner_prod_tol, volume, - remove_zeros) - return condnum_indices_mat + (measure_skewness_indices_mat, _) = chooseOptQoIs_verbose(input_set, + qoiIndices, num_qois_return, num_optsets_return, inner_prod_tol, + measure, remove_zeros) + + return measure_skewness_indices_mat -def chooseOptQoIs_verbose(grad_tensor, qoiIndices=None, num_qois_return=None, - num_optsets_return=None, inner_prod_tol=1.0, volume=False, +def chooseOptQoIs_verbose(input_set, qoiIndices=None, num_qois_return=None, + num_optsets_return=None, inner_prod_tol=1.0, measure=False, remove_zeros=True): r""" Given gradient vectors at some points (centers) in the parameter space, a set of QoIs to choose from, and the number of desired QoIs to return, this method returns the ``num_optsets_return`` best sets of QoIs with with - repsect to either the average condition number of the matrix formed by the - gradient vectors of each QoI map, or the average volume of the inverse - problem us this set of QoIs, computed as the product of the singular values + repsect to either the average measure of the matrix formed by the + gradient vectors of each QoI map, OR the average skewness of the inverse + image of this set of QoIs, computed as the product of the singular values of the same matrix. This method is brute force, i.e., if the method is given 10,000 QoIs and told to return the N best sets of 3, it will check all 10,000 choose 3 possible sets. See chooseOptQoIs_large for a less computationally expensive approach. - :param grad_tensor: Gradient vectors at each point of interest in the - parameter space :math:`\Lambda` for each QoI map. - :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, - Lambda_dim) where num_centers is the number of points in :math:`\Lambda` - we have approximated the gradient vectors and num_qois is the total - number of possible QoIs to choose from - :param qoiIndices: Set of QoIs to consider from grad_tensor. Default is - range(0, grad_tensor.shape[1]) + :param input_set: The input sample set. Make sure the attribute _jacobians + is not None. + :type input_set: :class:`~bet.sample.sample_set` + :param qoiIndices: Set of QoIs to consider. Default is + range(0, input_set._jacobians.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int num_qois_return: Number of desired QoIs to use in the - inverse problem. Default is Lambda_dim + inverse problem. Default is input_dim :param int num_optsets_return: Number of best sets to return Default is 10 - :param boolean volume: If volume is True, use ``calculate_avg_volume`` - to determine optimal QoIs + :param boolean measure: If measure is True, use ``calculate_avg_measure`` + to determine optimal QoIs, else use ``calculate_avg_skewness`` :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any - QoIs that have a zero gradient vector at atleast one point in - :math:`\Lambda`. + QoIs that have a zero gradient. - :rtype: tuple - :returns: (condnum_indices_mat, optsingvals) where condnum_indices_mat has - shape (num_optsets_return, num_qois_return+1) and optsingvals - has shape (num_centers, num_qois_return, num_optsets_return) + :rtype: `np.ndarray` of shape (num_optsets_returned, num_qois_returned + 1) + :returns: measure_skewness_indices_mat """ - num_centers = grad_tensor.shape[0] - Lambda_dim = grad_tensor.shape[2] + + G = input_set._jacobians + if G is None: + raise ValueError("You must have jacobians to use this method.") + input_dim = input_set._dim + num_centers = G.shape[0] + if qoiIndices is None: - qoiIndices = range(0, grad_tensor.shape[1]) + qoiIndices = range(0, G.shape[1]) if num_qois_return is None: - num_qois_return = Lambda_dim + num_qois_return = input_dim if num_optsets_return is None: num_optsets_return = 10 - qoiIndices = find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices, + # Remove QoIs that have zero gradients at any of the centers + qoiIndices = find_unique_vecs(input_set, inner_prod_tol, qoiIndices, remove_zeros) # Find all posible combinations of QoIs if comm.rank == 0: qoi_combs = np.array(list(combinations(list(qoiIndices), num_qois_return))) - print 'Possible sets of QoIs : ', qoi_combs.shape[0] + logging.info('Possible sets of QoIs : {}'.format(qoi_combs.shape[0])) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None @@ -203,23 +294,24 @@ def chooseOptQoIs_verbose(grad_tensor, qoiIndices=None, num_qois_return=None, # For each combination, check the skewness and keep the sets # that have the best skewness, i.e., smallest condition number - condnum_indices_mat = np.zeros([num_optsets_return, num_qois_return + 1]) - condnum_indices_mat[:, 0] = np.inf + measure_skewness_indices_mat = np.zeros([num_optsets_return, + num_qois_return + 1]) + measure_skewness_indices_mat[:, 0] = np.inf optsingvals_tensor = np.zeros([num_centers, num_qois_return, num_optsets_return]) for qoi_set in range(len(qoi_combs)): - if volume == False: - (current_condnum, singvals) = calculate_avg_condnum(grad_tensor, + if measure == False: + (current_measskew, singvals) = calculate_avg_skewness(input_set, qoi_combs[qoi_set]) else: - (current_condnum, singvals) = calculate_avg_volume(grad_tensor, + (current_measskew, singvals) = calculate_avg_measure(input_set, qoi_combs[qoi_set]) - if current_condnum < condnum_indices_mat[-1, 0]: - condnum_indices_mat[-1, :] = np.append(np.array([current_condnum]), - qoi_combs[qoi_set]) - order = condnum_indices_mat[:, 0].argsort() - condnum_indices_mat = condnum_indices_mat[order] + if current_measskew < measure_skewness_indices_mat[-1, 0]: + measure_skewness_indices_mat[-1, :] = np.append(np.array(\ + [current_measskew]), qoi_combs[qoi_set]) + order = measure_skewness_indices_mat[:, 0].argsort() + measure_skewness_indices_mat = measure_skewness_indices_mat[order] optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] @@ -228,29 +320,32 @@ def chooseOptQoIs_verbose(grad_tensor, qoiIndices=None, num_qois_return=None, comm.Barrier() # Gather the best sets and condition numbers from each processor - condnum_indices_mat = np.array(comm.gather(condnum_indices_mat, root=0)) + measure_skewness_indices_mat = np.array(comm.gather(\ + measure_skewness_indices_mat, root=0)) optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0)) # Find the num_optsets_return smallest condition numbers from all processors if comm.rank == 0: - condnum_indices_mat = condnum_indices_mat.reshape(num_optsets_return * \ - comm.size, num_qois_return + 1) + measure_skewness_indices_mat = measure_skewness_indices_mat.reshape(\ + num_optsets_return * comm.size, num_qois_return + 1) optsingvals_tensor = optsingvals_tensor.reshape(num_centers, num_qois_return, num_optsets_return * comm.size) - order = condnum_indices_mat[:, 0].argsort() + order = measure_skewness_indices_mat[:, 0].argsort() - condnum_indices_mat = condnum_indices_mat[order] - condnum_indices_mat = condnum_indices_mat[:num_optsets_return, :] + measure_skewness_indices_mat = measure_skewness_indices_mat[order] + measure_skewness_indices_mat = measure_skewness_indices_mat[\ + :num_optsets_return, :] optsingvals_tensor = optsingvals_tensor[:, :, order] optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return] - condnum_indices_mat = comm.bcast(condnum_indices_mat, root=0) + measure_skewness_indices_mat = comm.bcast(measure_skewness_indices_mat, + root=0) optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0) - return (condnum_indices_mat, optsingvals_tensor) + return (measure_skewness_indices_mat, optsingvals_tensor) -def find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices=None, +def find_unique_vecs(input_set, inner_prod_tol, qoiIndices=None, remove_zeros=True): r""" Given gradient vectors at each center in the parameter space, sort throught @@ -259,12 +354,9 @@ def find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices=None, some tolerance, i.e., an average angle between the two vectors smaller than some tolerance. - :param grad_tensor: Gradient vectors at each point of interest in the - parameter space :math:'\Lambda' for each QoI map. - :type grad_tensor: :class:`np.ndarray` of shape (num_centers,num_qois,Ldim) - where num_centers is the number of points in :math:'\Lambda' we have - approximated the gradient vectors, num_qois is the total number of - possible QoIs to choose from, Ldim is the dimension of :math:`\Lambda`. + :param input_set: The input sample set. Make sure the attribute _jacobians + is not None. + :type input_set: :class:`~bet.sample.sample_set` :param float inner_prod_tol: Maximum acceptable average inner product between two QoI maps. :param qoiIndices: Set of QoIs to consider. @@ -278,35 +370,37 @@ def find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices=None, """ - Lambda_dim = grad_tensor.shape[2] + if input_set._jacobians is None: + raise ValueError("You must have jacobians to use this method.") + input_dim = input_set._dim + G = input_set._jacobians if qoiIndices is None: - qoiIndices = range(0, grad_tensor.shape[1]) + qoiIndices = range(0, G.shape[1]) # Normalize the gradient vectors with respect to the 2-norm so the inner # product tells us about the angle between the two vectors. - norm_grad_tensor = np.linalg.norm(grad_tensor, ord=2, axis=2) + norm_G = np.linalg.norm(G, ord=2, axis=2) # Remove any QoI that has a zero vector at atleast one of the centers. if remove_zeros: indz = np.array([]) - for i in range(norm_grad_tensor.shape[1]): - if np.sum(norm_grad_tensor[:, i] == 0) > 0: + for i in range(norm_G.shape[1]): + if np.sum(norm_G[:, i] == 0) > 0: indz = np.append(indz, i) else: indz = [] # If it is a zero vector (has 0 norm), set norm=1, avoid divide by zero - norm_grad_tensor[norm_grad_tensor == 0] = 1.0 + norm_G[norm_G == 0] = 1.0 # Normalize each gradient vector - grad_tensor = grad_tensor/np.tile(norm_grad_tensor, (Lambda_dim, 1, - 1)).transpose(1, 2, 0) + G = G/np.tile(norm_G, (input_dim, 1, 1)).transpose(1, 2, 0) if comm.rank == 0: - print '*** find_unique_vecs ***' - print 'num_zerovec : ', len(indz), 'of (', grad_tensor.shape[1],\ - ') original QoIs' - print 'Possible QoIs : ', len(qoiIndices) - len(indz) + logging.info('*** find_unique_vecs ***') + logging.info('num_zerovec : {} of ({}) original QoIs'.\ + format(len(indz), G.shape[1])) + logging.info('Possible QoIs : {}'.format(len(qoiIndices)-len(indz))) qoiIndices = list(set(qoiIndices) - set(indz)) # Find all num_qois choose 2 pairs of QoIs @@ -321,8 +415,8 @@ def find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices=None, # If neither of the current QoIs are in the repeat_vec, test them if curr_set[0] not in repeat_vec and curr_set[1] not in repeat_vec: - curr_inner_prod = np.sum(grad_tensor[:, curr_set[0], :] * \ - grad_tensor[:, curr_set[1], :]) / grad_tensor.shape[0] + curr_inner_prod = np.sum(G[:, curr_set[0], :] * \ + G[:, curr_set[1], :]) / G.shape[0] # If the innerprod>tol, throw out the second QoI if np.abs(curr_inner_prod) > inner_prod_tol: @@ -330,45 +424,46 @@ def find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices=None, unique_vecs = np.array(list(set(qoiIndices) - set(repeat_vec))) if comm.rank == 0: - print 'Unique QoIs : ', unique_vecs.shape[0] + logging.info('Unique QoIs : {}'.format(unique_vecs.shape[0])) return unique_vecs -def find_good_sets(grad_tensor, good_sets_prev, unique_indices, - num_optsets_return, cond_tol, volume): +def find_good_sets(input_set, good_sets_prev, unique_indices, + num_optsets_return, measskew_tol, measure): r""" .. todo:: Use the idea we only know vectors are with 10% accuracy to guide inner_prod tol and condnum_tol. Given gradient vectors at each center in the parameter space and given - good sets of size n - 1, return good sets of size n. That is, return - sets of size n that have average condition number less than some tolerance. + good sets of size (n - 1), return good sets of size n. That is, return + sets of size n that have average measure(skewness) less than some tolerance. - :param grad_tensor: Gradient vectors at each centers in the parameter - space :math:`\Lambda` for each QoI map. - :type grad_tensor: :class:`np.ndarray` of shape (num_centers,num_qois,Ldim) - where num_centers is the number of points in :math:'\Lambda' we have - approximated the gradient vectors, num_qois is the total number of - possible QoIs to choose from, Ldim is the dimension of :math:`\Lambda`. + :param input_set: The input sample set. Make sure the attribute _jacobians + is not None. + :type input_set: :class:`~bet.sample.sample_set` :param good_sets_prev: Good sets of QoIs of size n - 1. :type good_sets_prev: :class:`np.ndarray` of size (num_good_sets_prev, n - 1) :param unique_indices: Unique QoIs to consider. :type unique_indices: :class:`np.ndarray` of size (num_unique_qois, 1) :param int num_optsets_return: Number of best sets to return - :param float cond_tol: Throw out all sets of QoIs with average condition - number greater than this. - :param boolean volume: If volume is True, use ``calculate_avg_volume`` - to determine optimal QoIs + :param float measskew_tol: Throw out all sets of QoIs with average + measure(skewness) number greater than this. + :param boolean measure: If measure is True, use ``calculate_avg_measure`` + to determine optimal QoIs, else use ``calculate_avg_skewness`` :rtype: tuple :returns: (good_sets, best_sets, optsingvals_tensor) where good sets has size (num_good_sets, n), best sets has size (num_optsets_return, - n + 1) and optsingvals_tensor has size (num_centers, n, Lambda_dim) + n + 1) and optsingvals_tensor has size (num_centers, n, input_dim) """ - num_centers = grad_tensor.shape[0] + + if input_set._jacobians is None: + raise ValueError("You must have jacobians to use this method.") + + num_centers = input_set._jacobians.shape[0] num_qois_return = good_sets_prev.shape[1] + 1 comm.Barrier() @@ -380,12 +475,13 @@ def find_good_sets(grad_tensor, good_sets_prev, unique_indices, optsingvals_tensor = np.zeros([num_centers, num_qois_return, num_optsets_return]) - # For each good set of size n - 1, find the possible sets of size n and + # For each good set of size (n - 1), find the possible sets of size n and # compute the average condition number of each count_qois = 0 for i in range(good_sets_prev.shape[0]): min_ind = np.max(good_sets_prev[i, :]) - # Find all possible combinations of QoIs that include this set of n - 1 + # Find all possible combinations of QoIs that include this set of + # (n - 1) if comm.rank == 0: inds_notin_set = util.fix_dimensions_vector_2darray(list(set(\ unique_indices) - set(good_sets_prev[i, :]))) @@ -403,27 +499,27 @@ def find_good_sets(grad_tensor, good_sets_prev, unique_indices, # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) - # For each combination, compute the average condition number and add the - # set to good_sets if it is less than cond_tol + # For each combination, compute the average measure(skewness) and add + # the set to good_sets if it is less than measskew_tol for qoi_set in range(len(qoi_combs)): count_qois += 1 curr_set = util.fix_dimensions_vector_2darray(qoi_combs[qoi_set])\ .transpose() - if volume == False: - (current_condnum, singvals) = calculate_avg_condnum(grad_tensor, + if measure == False: + (current_measskew, singvals) = calculate_avg_condnum(input_set, qoi_combs[qoi_set]) else: - (current_condnum, singvals) = calculate_avg_volume(grad_tensor, + (current_measskew, singvals) = calculate_avg_measure(input_set, qoi_combs[qoi_set]) # If its a good set, add it to good_sets - if current_condnum < cond_tol: + if current_measskew < measskew_tol: good_sets = np.append(good_sets, curr_set, axis=0) # If the average condition number is less than the max condition # number in our best_sets, add it to best_sets - if current_condnum < best_sets[-1, 0]: - best_sets[-1, :] = np.append(np.array([current_condnum]), + if current_measskew < best_sets[-1, 0]: + best_sets[-1, :] = np.append(np.array([current_measskew]), qoi_combs[qoi_set]) order = best_sets[:, 0].argsort() best_sets = best_sets[order] @@ -457,10 +553,10 @@ def find_good_sets(grad_tensor, good_sets_prev, unique_indices, good_sets_new = np.append(good_sets_new, each[1:], axis=0) good_sets = good_sets_new - print 'Possible sets of QoIs of size %i : '%good_sets.shape[1],\ - np.sum(count_qois) - print 'Good sets of QoIs of size %i : '%good_sets.shape[1],\ - good_sets.shape[0] - 1 + logging.info('Possible sets of QoIs of size {} : {}'.format(\ + good_sets.shape[1], np.sum(count_qois))) + logging.info('Good sets of QoIs of size {} : {}'.format(\ + good_sets.shape[1], good_sets.shape[0] - 1)) comm.Barrier() best_sets = comm.bcast(best_sets, root=0) @@ -468,54 +564,52 @@ def find_good_sets(grad_tensor, good_sets_prev, unique_indices, return (good_sets[1:].astype(int), best_sets, optsingvals_tensor) -def chooseOptQoIs_large(grad_tensor, qoiIndices=None, max_qois_return=None, - num_optsets_return=None, inner_prod_tol=None, cond_tol=None, - volume=False, remove_zeros=True): +def chooseOptQoIs_large(input_set, qoiIndices=None, max_qois_return=None, + num_optsets_return=None, inner_prod_tol=None, measskew_tol=None, + measure=False, remove_zeros=True): r""" - Given gradient vectors at some points (centers) in the parameter space, a - large set of QoIs to choose from, and the number of desired QoIs to return, - this method return the set of optimal QoIs of size 2, 3, ... max_qois_return - to use in the inverse problem by choosing the sets with the smallext average - condition number or volume. + Given gradient vectors at some points (centers) in the input space, a large + set of QoIs to choose from, and the number of desired QoIs to return, this + method returns the set of optimal QoIs of size 2, 3, ... max_qois_return + to use in the inverse problem by choosing the sets with the smallest + average measure(skewness). - :param grad_tensor: Gradient vectors at each point of interest in the - parameter space :math:`\Lambda` for each QoI map. - :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, - Lambda_dim) where num_centers is the number of points in :math:`\Lambda` - we have approximated the gradient vectors and num_qois is the total - number of possible QoIs to choose from - :param qoiIndices: Set of QoIs to consider from grad_tensor. Default is - range(0, grad_tensor.shape[1]) + :param input_set: The input sample set. Make sure the attribute _jacobians + is not None. + :type input_set: :class:`~bet.sample.sample_set` + :param qoiIndices: Set of QoIs to consider from input_set._jacobians. + Default is range(0, input_set._jacobians.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int max_qois_return: Maximum number of desired QoIs to use in the - inverse problem. Default is Lambda_dim + inverse problem. Default is input_dim :param int num_optsets_return: Number of best sets to return Default is 10 :param float inner_prod_tol: Maximum acceptable average inner product between two QoI maps. - :param float cond_tol: Throw out all sets of QoIs with average condition - number greater than this. - :param boolean volume: If volume is True, use ``calculate_avg_volume`` - to determine optimal QoIs + :param float measskew_tol: Throw out all sets of QoIs with average + measure(skewness) number greater than this. + :param boolean measure: If measure is True, use ``calculate_avg_measure`` + to determine optimal QoIs, else use ``calculate_avg_skewness`` :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any QoIs that have a zero gradient vector at atleast one point in :math:`\Lambda`. :rtype: tuple - :returns: (condnum_indices_mat, optsingvals) where condnum_indices_mat has - shape (num_optsets_return, num_qois_return+1) and optsingvals - has shape (num_centers, num_qois_return, num_optsets_return) + :returns: (measure_skewness_indices_mat, optsingvals) where + measure_skewness_indices_mat has shape (num_optsets_return, + num_qois_return+1) and optsingvals has shape (num_centers, + num_qois_return, num_optsets_return) """ - (best_sets, _) = chooseOptQoIs_large_verbose(grad_tensor, qoiIndices, - max_qois_return, num_optsets_return, inner_prod_tol, cond_tol, volume, + (best_sets, _) = chooseOptQoIs_large_verbose(input_set, qoiIndices, + max_qois_return, num_optsets_return, inner_prod_tol, measskew_tol, measure, remove_zeros) return best_sets -def chooseOptQoIs_large_verbose(grad_tensor, qoiIndices=None, +def chooseOptQoIs_large_verbose(input_set, qoiIndices=None, max_qois_return=None, num_optsets_return=None, inner_prod_tol=None, - cond_tol=None, volume=False, remove_zeros=True): + measskew_tol=None, measure=False, remove_zeros=True): r""" Given gradient vectors at some points (centers) in the parameter space, a large set of QoIs to choose from, and the number of desired QoIs to return, @@ -525,67 +619,66 @@ def chooseOptQoIs_large_verbose(grad_tensor, qoiIndices=None, matrices formed by the gradient vectors of the optimal QoIs at each center is returned. - :param grad_tensor: Gradient vectors at each point of interest in the - parameter space :math:`\Lambda` for each QoI map. - :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, - Lambda_dim) where num_centers is the number of points in :math:`\Lambda` - we have approximated the gradient vectors and num_qois is the total - number of possible QoIs to choose from. - :param qoiIndices: Set of QoIs to consider from grad_tensor. Default is - range(0, grad_tensor.shape[1]). + :param input_set: The input sample set. Make sure the attribute _jacobians + is not None. + :type input_set: :class:`~bet.sample.sample_set` + :param qoiIndices: Set of QoIs to consider from G. Default is + range(0, G.shape[1]). :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int max_qois_return: Maximum number of desired QoIs to use in the - inverse problem. Default is Lambda_dim. + inverse problem. Default is input_dim. :param int num_optsets_return: Number of best sets to return. Default is 10. :param float inner_prod_tol: Throw out one vectors from each pair of QoIs that has average inner product greater than this. Default is 0.9. - :param float cond_tol: Throw out all sets of QoIs with average condition - number greater than this. Default is max_float. - :param boolean volume: If volume is True, use ``calculate_avg_volume`` - to determine optimal QoIs + :param float measskew_tol: Throw out all sets of QoIs with average + measure(skewness) number greater than this. Default is max_float. + :param boolean measure: If measure is True, use ``calculate_avg_measure`` + to determine optimal QoIs, else use ``calculate_avg_skewness`` :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any QoIs that have a zero gradient vector at atleast one point in :math:`\Lambda`. :rtype: tuple - :returns: (condnum_indices_mat, optsingvals) where condnum_indices_mat has - shape (num_optsets_return, num_qois_return+1) and optsingvals is a list - where each element has shape (num_centers, num_qois_return, - num_optsets_return). num_qois_return will change for each element of - the list. + :returns: (measure_skewness_indices_mat, optsingvals) where + measure_skewness_indices_mat has shape (num_optsets_return, + num_qois_return+1) and optsingvals is a list where each element has + shape (num_centers, num_qois_return, num_optsets_return). + num_qois_return will change for each element of the list. """ - Lambda_dim = grad_tensor.shape[2] + input_dim = input_set._dim + if input_set._jacobians is None: + raise ValueError("You must have jacobians to use this method.") if qoiIndices is None: - qoiIndices = range(0, grad_tensor.shape[1]) + qoiIndices = range(0, input_set._jacobians.shape[1]) if max_qois_return is None: - max_qois_return = Lambda_dim + max_qois_return = input_dim if num_optsets_return is None: num_optsets_return = 10 if inner_prod_tol is None: inner_prod_tol = 1.0 - if cond_tol is None: - cond_tol = np.inf + if measskew_tol is None: + measskew_tol = np.inf # Find the unique QoIs to consider - unique_indices = find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices, + unique_indices = find_unique_vecs(input_set, inner_prod_tol, qoiIndices, remove_zeros) if comm.rank == 0: - print 'Unique Indices are : ', unique_indices + logging.info('Unique Indices are : {}'.format(unique_indices)) good_sets_curr = util.fix_dimensions_vector_2darray(unique_indices) best_sets = [] optsingvals_list = [] - # Given good sets of QoIs of size n - 1, find the good sets of size n + # Given good sets of QoIs of size (n - 1), find the good sets of size n for qois_return in range(2, max_qois_return + 1): (good_sets_curr, best_sets_curr, optsingvals_tensor_curr) = \ - find_good_sets(grad_tensor, good_sets_curr, unique_indices, - num_optsets_return, cond_tol, volume) + find_good_sets(input_set, good_sets_curr, unique_indices, + num_optsets_return, measskew_tol, measure) best_sets.append(best_sets_curr) optsingvals_list.append(optsingvals_tensor_curr) if comm.rank == 0: - print best_sets_curr + logging.info(best_sets_curr) return (best_sets, optsingvals_list) diff --git a/bet/sensitivity/gradients.py b/bet/sensitivity/gradients.py index 266965c7..b833bdb5 100644 --- a/bet/sensitivity/gradients.py +++ b/bet/sensitivity/gradients.py @@ -1,58 +1,62 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ -This module contains functions for approximating gradient vectors of QoI maps. -All methods that cluster points around centers are written to return the samples -in the following order : CENTERS, FOLLOWED BY THE CLUSTER AROUND THE FIRST -CENTER, THEN THE CLUSTER AROUND THE SECOND CENTER AND SO ON. +This module contains functions for approximating jacobians of QoI maps. +All methods that cluster points around centers are written to return the +input_set._values in the following order : CENTERS, FOLLOWED BY THE CLUSTER +AROUND THE FIRST CENTER, THEN THE CLUSTER AROUND THE SECOND CENTER AND SO ON. """ import numpy as np import scipy.spatial as spatial import bet.util as util import sys -def sample_linf_ball(centers, num_close, rvec, lam_domain=None): +def sample_linf_ball(input_set, num_close, rvec): r""" Pick num_close points in a the l-infinity ball of length 2*rvec around a - point in :math:`\Lambda`, do this for each point in centers. If this box - extends outside of :math:`\Lambda`, we sample the intersection. + point in the input space, do this for each point in centers. If this box + extends outside of the domain of the input space, we sample the + intersection. - :param centers: Points in :math:`\Lambda` to cluster points around - :type centers: :class:`np.ndarray` of shape (num_centers, Lambda_dim) + :param input_set: The input sample set. Make sure the attribute _values is + not None. + :type input_set: :class:`~bet.sample.sample_set` :param int num_close: Number of points in each cluster :param rvec: Each side of the box will have length 2*rvec[i] - :type rvec: :class:`np.ndarray` of shape (Lambda_dim,) - :param lam_domain: The domain of the parameter space - :type lam_domain: :class:`np.ndarray` of shape (Lambda_dim, 2) + :type rvec: :class:`np.ndarray` of shape (input_dim,) - :rtype: :class:`np.ndarray` of shape ((num_close+1)*num_centers, Lambda_dim) + :rtype: :class:`np.ndarray` of shape ((num_close+1)*num_centers, input_dim) :returns: Centers and clusters of samples near each center """ - Lambda_dim = centers.shape[1] + if input_set._values is None: + raise ValueError("You must have values to use this method.") + input_dim = input_set._dim + centers = input_set._values num_centers = centers.shape[0] + input_domain = input_set._domain rvec = util.fix_dimensions_vector(rvec) #If no lam_domain, set domain large - if lam_domain is None: - lam_domain = np.zeros([Lambda_dim, 2]) - lam_domain[:, 0] = -sys.float_info[0] - lam_domain[:, 1] = sys.float_info[0] + if input_domain is None: + input_domain = np.zeros([input_dim, 2]) + input_domain[:, 0] = -sys.float_info[0] + input_domain[:, 1] = sys.float_info[0] # Define bounds for each box left = np.maximum( - centers - rvec, np.ones([num_centers, Lambda_dim]) * lam_domain[:, 0]) + centers - rvec, np.ones([num_centers, input_dim]) * input_domain[:, 0]) right = np.minimum( - centers + rvec, np.ones([num_centers, Lambda_dim]) * lam_domain[:, 1]) + centers + rvec, np.ones([num_centers, input_dim]) * input_domain[:, 1]) # Samples each box uniformly samples = np.repeat(right - left, num_close, axis=0) * np.random.random( - [num_centers * num_close, Lambda_dim]) + np.repeat(left, num_close, \ + [num_centers * num_close, input_dim]) + np.repeat(left, num_close, \ axis=0) return np.concatenate([centers, samples]) -def sample_l1_ball(centers, num_close, rvec): +def sample_l1_ball(input_set, num_close, rvec): r""" Uniformly sample the l1-ball (defined by 2^dim simplices). Then scale each dimension according to rvec and translate the center to centers. @@ -60,17 +64,21 @@ def sample_l1_ball(centers, num_close, rvec): samples to be placed outside of lam_domain. Please place your centers accordingly.* - :param centers: Points in :math:`\Lambda` to cluster samples around - :type centers: :class:`np.ndarray` of shape (num_centers, Ldim) + :param input_set: The input sample set. Make sure the attribute _values is + not None. + :type input_set: :class:`~bet.sample.sample_set` :param int num_close: Number of samples in each l1 ball :param rvec: The radius of the l1 ball, along each axis - :type rvec: :class:`np.ndarray` of shape (Lambda_dim) + :type rvec: :class:`np.ndarray` of shape (input_dim) - :rtype: :class:`np.ndarray` of shape ((num_close+1)*num_centers, Lambda_dim) + :rtype: :class:`np.ndarray` of shape ((num_close+1)*num_centers, input_dim) :returns: Uniform random samples from an l1 ball around each center """ - Lambda_dim = centers.shape[1] + if input_set._values is None: + raise ValueError("You must have values to use this method.") + input_dim = input_set._dim + centers = input_set._values rvec = util.fix_dimensions_vector(rvec) samples = np.zeros([(num_close + 1) * centers.shape[0], centers.shape[1]]) @@ -78,36 +86,36 @@ def sample_l1_ball(centers, num_close, rvec): # We choose weighted random distance from the center for each new sample random_dist = np.random.random([num_close, 1]) - weight_vec = random_dist**(1. / Lambda_dim) + weight_vec = random_dist**(1. / input_dim) # For each center, randomly sample the l1_ball for cen in range(centers.shape[0]): # Begin by uniformly sampling the unit simplex in the first quadrant - # Choose Lambda_dim-1 reals uniformly between 0 and weight_vec for each + # Choose input_dim-1 reals uniformly between 0 and weight_vec for each # new sample - random_mat = np.random.random([num_close, Lambda_dim - 1]) * \ - np.tile(weight_vec, (1, Lambda_dim - 1)) + random_mat = np.random.random([num_close, input_dim - 1]) * \ + np.tile(weight_vec, (1, input_dim - 1)) # Sort the random_mat random_mat = np.sort(random_mat, 1) # Contrust weight_mat so that the first column is zeros, the next - # Lambda_dim-1 columns are the sorted reals between 0 and weight_vec, + # input_dim-1 columns are the sorted reals between 0 and weight_vec, # and the last column is weight_vec. - weight_mat = np.zeros([num_close, Lambda_dim + 1]) - weight_mat[:, 1:Lambda_dim] = random_mat - weight_mat[:, Lambda_dim] = np.array(weight_vec).transpose() - - # The differences between the Lambda_dim+1 columns will give us - # random points in the unit simplex of dimension Lambda_dim. - samples_cen = np.zeros([num_close, Lambda_dim]) - for Ldim in range(Lambda_dim): + weight_mat = np.zeros([num_close, input_dim + 1]) + weight_mat[:, 1:input_dim] = random_mat + weight_mat[:, input_dim] = np.array(weight_vec).transpose() + + # The differences between the input_dim+1 columns will give us + # random points in the unit simplex of dimension input_dim. + samples_cen = np.zeros([num_close, input_dim]) + for Ldim in range(input_dim): samples_cen[:, Ldim] = weight_mat[:, Ldim + 1] - weight_mat[:, Ldim] # Assign a random sign to each element of each new sample # This give us samples in the l1_ball, not just the unit simplex in # the first quadrant - rand_sign = 2 * np.round(np.random.random([num_close, Lambda_dim])) - 1 + rand_sign = 2 * np.round(np.random.random([num_close, input_dim])) - 1 samples_cen = samples_cen * rand_sign # Scale each dimension according to rvec and translate to center @@ -119,39 +127,43 @@ def sample_l1_ball(centers, num_close, rvec): return samples -def pick_ffd_points(centers, rvec): +def pick_ffd_points(input_set, rvec): r""" - Pick Lambda_dim points, for each centers, for a forward finite + Pick input_dim points, for each centers, for a forward finite difference gradient approximation. The points are returned in the order: centers, followed by the cluster around the first center, then the cluster around the second center and so on. - :param centers: Points in :math:`\Lambda` the place stencil around - :type centers: :class:`np.ndarray` of shape (num_centers, Lambda_dim) + :param input_set: The input sample set. Make sure the attribute _values is + not None. + :type input_set: :class:`~bet.sample.sample_set` :param rvec: The radius of the stencil, along each axis - :type rvec: :class:`np.ndarray` of shape (Lambda_dim,) + :type rvec: :class:`np.ndarray` of shape (input_dim,) - :rtype: :class:`np.ndarray` of shape ((Lambda_dim+1)*num_centers, - Lambda_dim) + :rtype: :class:`np.ndarray` of shape ((input_dim+1)*num_centers, + input_dim) :returns: Samples for centered finite difference stencil for each point in centers. """ - Lambda_dim = centers.shape[1] + if input_set._values is None: + raise ValueError("You must have values to use this method.") + input_dim = input_set._dim + centers = input_set._values num_centers = centers.shape[0] - samples = np.repeat(centers, Lambda_dim, axis=0) + samples = np.repeat(centers, input_dim, axis=0) rvec = util.fix_dimensions_vector(rvec) - # Construct a [num_centers*(Lambda_dim+1), Lambda_dim] matrix that + # Construct a [num_centers*(input_dim+1), input_dim] matrix that # translates the centers to the FFD points. - translate = np.tile(np.eye(Lambda_dim) * rvec, (num_centers, 1)) + translate = np.tile(np.eye(input_dim) * rvec, (num_centers, 1)) samples = samples + translate return np.concatenate([centers, samples]) -def pick_cfd_points(centers, rvec): +def pick_cfd_points(input_set, rvec): r""" - Pick 2*Lambda_dim points, for each center, for centered finite difference + Pick 2*input_dim points, for each center, for centered finite difference gradient approximation. The center are not needed for the CFD gradient approximation, they are returned for consistency with the other methods and because of the common need to have not just the gradient but also the QoI @@ -159,25 +171,29 @@ def pick_cfd_points(centers, rvec): in the order: centers, followed by the cluster around the first center, then the cluster around the second center and so on. - :param centers: Points in :math:`\Lambda` to cluster points around - :type centers: :class:`np.ndarray` of shape (num_centers, Lambda_dim) + :param input_set: The input sample set. Make sure the attribute _values is + not None. + :type input_set: :class:`~bet.sample.sample_set` :param rvec: The radius of the stencil, along each axis - :type rvec: :class:`np.ndarray` of shape (Lambda_dim,) + :type rvec: :class:`np.ndarray` of shape (input_dim,) - :rtype: :class:`np.ndarray` of shape ((2*Lambda_dim+1)*num_centers, - Lambda_dim) + :rtype: :class:`np.ndarray` of shape ((2*input_dim+1)*num_centers, + input_dim) :returns: Samples for centered finite difference stencil for each point in centers. """ - Lambda_dim = centers.shape[1] + if input_set._values is None: + raise ValueError("You must have values to use this method.") + input_dim = input_set._dim + centers = input_set._values num_centers = centers.shape[0] - samples = np.repeat(centers, 2 * Lambda_dim, axis=0) + samples = np.repeat(centers, 2 * input_dim, axis=0) rvec = util.fix_dimensions_vector(rvec) - # Contstruct a [num_centers*2*Lambda_dim, Lambda_dim] matrix that + # Contstruct a [num_centers*2*input_dim, input_dim] array that # translates the centers to the CFD points - ident = np.eye(Lambda_dim) * rvec + ident = np.eye(input_dim) * rvec translate = np.tile(np.append(ident, -ident, axis=0), (num_centers, 1)) samples = samples + translate @@ -249,63 +265,72 @@ def radial_basis_function_dxi(r, xi, kernel=None, ep=None): return rbfdxi -def calculate_gradients_rbf(samples, data, centers=None, num_neighbors=None, - RBF=None, ep=None, normalize=True): +def calculate_gradients_rbf(input_set, output_set, input_set_centers=None, + num_neighbors=None, RBF=None, ep=None, normalize=True): r""" Approximate gradient vectors at ``num_centers, centers.shape[0]`` points in the parameter space for each QoI map using a radial basis function interpolation method. - :param samples: Samples for which the model has been solved. - :type samples: :class:`np.ndarray` of shape (num_samples, Lambda_dim) - :param data: QoI values corresponding to each sample. - :type data: :class:`np.ndarray` of shape (num_samples, Data_dim) - :param centers: Points in :math:`\Lambda` at which to approximate gradient - information. - :type centers: :class:`np.ndarray` of shape (num_exval, Lambda_dim) + :param input_set: The input sample set. Make sure the attribute _values is + not None. + :type input_set: :class:`~bet.sample.sample_set` + :param output_set: The output sample set. Make sure the attribute _values + is not None. + :type output_set: :class:`~bet.sample.sample_set` + :param input_set_centers: The input centers sample set. Make sure the + attribute _values is not None. + :type input_set_centers: :class:`~bet.sample.sample_set` :param int num_neighbors: Number of nearest neighbors to use in gradient - approximation. Default value is Lambda_dim + 2. + approximation. Default value is input_dim + 2. :param string RBF: Choice of radial basis function. Default is Gaussian :param float ep: Choice of shape parameter for radial basis function. Default value is 1.0 :param boolean normalize: If normalize is True, normalize each gradient vector - :rtype: :class:`np.ndarray` of shape (num_samples, Data_dim, Lambda_dim) + :rtype: :class:`np.ndarray` of shape (num_samples, output_dim, input_dim) :returns: Tensor representation of the gradient vectors of each QoI map at each point in centers """ + if input_set._values is None or output_set._values is None: + raise ValueError("You must have values to use this method.") + samples = input_set._values + data = output_set._values + data = util.fix_dimensions_vector_2darray(util.clean_data(data)) - Lambda_dim = samples.shape[1] + input_dim = samples.shape[1] num_model_samples = samples.shape[0] - Data_dim = data.shape[1] + output_dim = data.shape[1] if num_neighbors is None: - num_neighbors = Lambda_dim + 2 + num_neighbors = input_dim + 2 if ep is None: ep = 1.0 if RBF is None: RBF = 'Gaussian' # If centers is None we assume the user chose clusters of size - # Lambda_dim + 2 - if centers is None: - num_centers = num_model_samples / (Lambda_dim + 2) + # input_dim + 2 + if input_set_centers is None: + num_centers = num_model_samples / (input_dim + 2) centers = samples[:num_centers] else: + centers = input_set_centers._values num_centers = centers.shape[0] - rbf_tensor = np.zeros([num_centers, num_model_samples, Lambda_dim]) - gradient_tensor = np.zeros([num_centers, Data_dim, Lambda_dim]) + rbf_tensor = np.zeros([num_centers, num_model_samples, input_dim]) + gradient_tensor = np.zeros([num_centers, output_dim, input_dim]) tree = spatial.KDTree(samples) - # For each centers, interpolate the data using the rbf chosen and - # then evaluate the partial derivative of that rbf at the desired point. + # For each center, interpolate the data using the rbf chosen and + # then evaluate the partial derivative of that interpolant at the desired + # point. for c in range(num_centers): # Find the k nearest neighbors and their distances to centers[c,:] [r, nearest] = tree.query(centers[c, :], k=num_neighbors) - r = np.tile(r, (Lambda_dim, 1)) + r = np.tile(r, (input_dim, 1)) # Compute the linf distances to each of the nearest neighbors diffVec = (centers[c, :] - samples[nearest, :]).transpose() @@ -335,51 +360,58 @@ def calculate_gradients_rbf(samples, data, centers=None, num_neighbors=None, # Normalize each gradient vector gradient_tensor = gradient_tensor/np.tile(norm_gradient_tensor, - (Lambda_dim, 1, 1)).transpose(1, 2, 0) + (input_dim, 1, 1)).transpose(1, 2, 0) return gradient_tensor -def calculate_gradients_ffd(samples, data, normalize=True): +def calculate_gradients_ffd(input_set, output_set, normalize=True): """ Approximate gradient vectors at ``num_centers, centers.shape[0]`` points in the parameter space for each QoI map. THIS METHOD IS DEPENDENT ON USING :meth:~bet.sensitivity.gradients.pick_ffd_points TO CHOOSE SAMPLES FOR THE FFD STENCIL AROUND EACH CENTER. THE ORDERING MATTERS. - :param samples: Samples for which the model has been solved. - :type samples: :class:`np.ndarray` of shape (num_samples, Lambda_dim) - :param data: QoI values corresponding to each sample. - :type data: :class:`np.ndarray` of shape (num_samples, Data_dim) + :param input_set: The input sample set. Make sure the attribute _values is + not None. + :type input_set: :class:`~bet.sample.sample_set` + :param output_set: The output sample set. Make sure the attribute _values + is not None. + :type output_set: :class:`~bet.sample.sample_set` :param boolean normalize: If normalize is True, normalize each gradient vector - :rtype: :class:`np.ndarray` of shape (num_samples, Data_dim, Lambda_dim) + :rtype: :class:`np.ndarray` of shape (num_samples, output_dim, input_dim) :returns: Tensor representation of the gradient vectors of each QoI map at each point in centers """ + if input_set._values is None or output_set._values is None: + raise ValueError("You must have values to use this method.") + samples = input_set._values + data = output_set._values + num_model_samples = samples.shape[0] - Lambda_dim = samples.shape[1] - num_centers = num_model_samples / (Lambda_dim + 1) + input_dim = samples.shape[1] + num_centers = num_model_samples / (input_dim + 1) # Find rvec from the first cluster of samples - rvec = samples[num_centers:num_centers + Lambda_dim, :] - samples[0, :] + rvec = samples[num_centers:num_centers + input_dim, :] - samples[0, :] rvec = util.fix_dimensions_vector_2darray(rvec.diagonal()) # Clean the data data = util.fix_dimensions_vector_2darray(util.clean_data(data)) num_qois = data.shape[1] - gradient_tensor = np.zeros([num_centers, num_qois, Lambda_dim]) + gradient_tensor = np.zeros([num_centers, num_qois, input_dim]) rvec = np.tile(np.repeat(rvec, num_qois, axis=1), [num_centers, 1]) # Compute the gradient vectors using the standard FFD stencil gradient_mat = (data[num_centers:] - np.repeat(data[0:num_centers], \ - Lambda_dim, axis=0)) * (1. / rvec) + input_dim, axis=0)) * (1. / rvec) # Reshape and organize gradient_tensor = np.reshape(gradient_mat.transpose(), [num_qois, - Lambda_dim, num_centers], order='F').transpose(2, 0, 1) + input_dim, num_centers], order='F').transpose(2, 0, 1) if normalize: # Compute the norm of each vector @@ -390,56 +422,62 @@ def calculate_gradients_ffd(samples, data, normalize=True): # Normalize each gradient vector gradient_tensor = gradient_tensor/np.tile(norm_gradient_tensor, - (Lambda_dim, 1, 1)).transpose(1, 2, 0) + (input_dim, 1, 1)).transpose(1, 2, 0) return gradient_tensor -def calculate_gradients_cfd(samples, data, normalize=True): +def calculate_gradients_cfd(input_set, output_set, normalize=True): """ Approximate gradient vectors at ``num_centers, centers.shape[0]`` points in the parameter space for each QoI map. THIS METHOD IS DEPENDENT ON USING :meth:~bet.sensitivity.pick_cfd_points TO CHOOSE SAMPLES FOR THE CFD STENCIL AROUND EACH CENTER. THE ORDERING MATTERS. - :param samples: Samples for which the model has been solved. - :type samples: :class:`np.ndarray` of shape - (2*Lambda_dim*num_centers, Lambda_dim) - :param data: QoI values corresponding to each sample. - :type data: :class:`np.ndarray` of shape (num_samples, Data_dim) + :param input_set: The input sample set. Make sure the attribute _values is + not None. + :type input_set: :class:`~bet.sample.sample_set` + :param output_set: The output sample set. Make sure the attribute _values + is not None. + :type output_set: :class:`~bet.sample.sample_set` :param boolean normalize: If normalize is True, normalize each gradient vector - :rtype: :class:`np.ndarray` of shape (num_samples, Data_dim, Lambda_dim) + :rtype: :class:`np.ndarray` of shape (num_samples, output_dim, input_dim) :returns: Tensor representation of the gradient vectors of each QoI map at each point in centers """ + if input_set._values is None or output_set._values is None: + raise ValueError("You must have values to use this method.") + samples = input_set._values + data = output_set._values + num_model_samples = samples.shape[0] - Lambda_dim = samples.shape[1] - num_centers = num_model_samples / (2*Lambda_dim + 1) + input_dim = samples.shape[1] + num_centers = num_model_samples / (2*input_dim + 1) # Find rvec from the first cluster of samples - rvec = samples[num_centers:num_centers + Lambda_dim, :] - samples[0, :] + rvec = samples[num_centers:num_centers + input_dim, :] - samples[0, :] rvec = util.fix_dimensions_vector_2darray(rvec.diagonal()) # Clean the data data = util.fix_dimensions_vector_2darray(util.clean_data( data[num_centers:])) num_qois = data.shape[1] - gradient_tensor = np.zeros([num_centers, num_qois, Lambda_dim]) + gradient_tensor = np.zeros([num_centers, num_qois, input_dim]) rvec = np.tile(np.repeat(rvec, num_qois, axis=1), [num_centers, 1]) # Construct indices for CFD gradient approxiation - inds = np.repeat(range(0, 2 * Lambda_dim * num_centers, 2 * Lambda_dim), - Lambda_dim) + np.tile(range(0, Lambda_dim), num_centers) - inds = np.array([inds, inds+Lambda_dim]).transpose() + inds = np.repeat(range(0, 2 * input_dim * num_centers, 2 * input_dim), + input_dim) + np.tile(range(0, input_dim), num_centers) + inds = np.array([inds, inds+input_dim]).transpose() gradient_mat = (data[inds[:, 0]] - data[inds[:, 1]]) * (0.5 / rvec) # Reshape and organize gradient_tensor = np.reshape(gradient_mat.transpose(), [num_qois, - Lambda_dim, num_centers], order='F').transpose(2, 0, 1) + input_dim, num_centers], order='F').transpose(2, 0, 1) if normalize: # Compute the norm of each vector @@ -450,6 +488,6 @@ def calculate_gradients_cfd(samples, data, normalize=True): # Normalize each gradient vector gradient_tensor = gradient_tensor/np.tile(norm_gradient_tensor, - (Lambda_dim, 1, 1)).transpose(1, 2, 0) + (input_dim, 1, 1)).transpose(1, 2, 0) return gradient_tensor diff --git a/bet/util.py b/bet/util.py index afdac6f2..80ce94e5 100644 --- a/bet/util.py +++ b/bet/util.py @@ -1,7 +1,7 @@ # Copyright (C) 2014-2015 The BET Development Team """ -The module contains general tools for BET. +This module contains general tools for BET. """ import numpy as np diff --git a/doc/bet.rst b/doc/bet.rst index cf9783e1..31d46ab6 100644 --- a/doc/bet.rst +++ b/doc/bet.rst @@ -22,6 +22,14 @@ bet.Comm module :undoc-members: :show-inheritance: +bet.sample module +----------------- + +.. automodule:: bet.sample + :members: + :undoc-members: + :show-inheritance: + bet.util module --------------- diff --git a/doc/bet.sampling.rst b/doc/bet.sampling.rst index 7a53b1b9..b443ee02 100644 --- a/doc/bet.sampling.rst +++ b/doc/bet.sampling.rst @@ -4,6 +4,14 @@ bet.sampling package Submodules ---------- +bet.sampling.LpGeneralizedSamples module +---------------------------------------- + +.. automodule:: bet.sampling.LpGeneralizedSamples + :members: + :undoc-members: + :show-inheritance: + bet.sampling.adaptiveSampling module ------------------------------------ diff --git a/examples/FEniCS/BET_script.py b/examples/FEniCS/BET_script.py new file mode 100644 index 00000000..4df77161 --- /dev/null +++ b/examples/FEniCS/BET_script.py @@ -0,0 +1,163 @@ +#! /usr/bin/env python + +# Copyright (C) 2014-2016 The BET Development Team + +r""" +An installation of FEniCS using the same python as used for +installing BET is required to run this example. + +This example generates samples for a KL expansion associated with +a covariance defined by ``cov`` in myModel.py that on an L-shaped +mesh defining the permeability field for a Poisson equation. + +The quantities of interest (QoI) are defined as two spatial +averages of the solution to the PDE. + +The user defines the dimension of the parameter space (corresponding +to the number of KL terms) and the number of samples in this space. +""" + +import numpy as np +import bet.calculateP as calculateP +import bet.postProcess as postProcess +import bet.calculateP.simpleFunP as simpleFunP +import bet.calculateP.calculateP as calculateP +import bet.postProcess.plotP as plotP +import bet.postProcess.plotDomains as plotD +import bet.sample as samp +import bet.sampling.basicSampling as bsam +from myModel import my_model + +# Initialize input parameter sample set object +num_KL_terms = 2 +input_samples = samp.sample_set(2) + +# Set parameter domain +KL_term_min = -3.0 +KL_term_max = 3.0 +input_samples.set_domain(np.repeat([[KL_term_min, KL_term_max]], + num_KL_terms, + axis=0)) + +# Define the sampler that will be used to create the discretization +# object, which is the fundamental object used by BET to compute +# solutions to the stochastic inverse problem +sampler = bsam.sampler(my_model) + +''' +Suggested changes for user: + +Try with and without random sampling. + +If using random sampling, try num_samples = 1E3 and 1E4. +What happens when num_samples = 1E2? +Try using 'lhs' instead of 'random' in the random_sample_set. + +If using regular sampling, try different numbers of samples +per dimension. +''' +# Generate samples on the parameter space +randomSampling = False +if randomSampling is True: + sampler.random_sample_set('random', input_samples, num_samples=1E4) +else: + sampler.regular_sample_set(input_samples, num_samples_per_dim=[50, 50]) + +''' +Suggested changes for user: + +A standard Monte Carlo (MC) assumption is that every Voronoi cell +has the same volume. If a regular grid of samples was used, then +the standard MC assumption is true. + +See what happens if the MC assumption is not assumed to be true, and +if different numbers of points are used to estimate the volumes of +the Voronoi cells. +''' +MC_assumption = True +# Estimate volumes of Voronoi cells associated with the parameter samples +if MC_assumption is False: + input_samples.estimate_volume(n_mc_points=1E5) +else: + input_samples.estimate_volume_mc() + +# Create the discretization object using the input samples +my_discretization = sampler.compute_QoI_and_create_discretization(input_samples, + savefile='FEniCS_Example.txt.gz') + +''' +Suggested changes for user: + +Try different reference parameters. +''' +# Define the reference parameter +#param_ref = np.zeros((1,num_KL_terms)) +param_ref = np.ones((1,num_KL_terms)) + +# Compute the reference QoI +Q_ref = my_model(param_ref) + +# Create some plots of input and output discretizations +plotD.scatter_2D(input_samples, p_ref=param_ref[0,:], filename='FEniCS_ParameterSamples.eps') +if Q_ref.size == 2: + plotD.show_data(my_discretization, Q_ref=Q_ref[0,:]) + +''' +Suggested changes for user: + +Try different ways of discretizing the probability measure on D defined +as a uniform probability measure on a rectangle or interval depending +on choice of QoI_num in myModel.py. +''' +randomDataDiscretization = False +if randomDataDiscretization is False: + simpleFunP.regular_partition_uniform_distribution_rectangle_scaled( + data_set=my_discretization, Q_ref=Q_ref[0,:], rect_scale=0.1, + center_pts_per_edge=3) +else: + simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled( + data_set=my_discretization, Q_ref=Q_ref[0,:], rect_scale=0.1, + M=50, num_d_emulate=1E5) + +# calculate probablities +calculateP.prob(my_discretization) + +######################################## +# Post-process the results +######################################## +''' +Suggested changes for user: + +At this point, the only thing that should change in the plotP.* inputs +should be either the nbins values or sigma (which influences the kernel +density estimation with smaller values implying a density estimate that +looks more like a histogram and larger values smoothing out the values +more). + +There are ways to determine "optimal" smoothing parameters (e.g., see CV, GCV, +and other similar methods), but we have not incorporated these into the code +as lower-dimensional marginal plots generally have limited value in understanding +the structure of a high dimensional non-parametric probability measure. +''' +# calculate 2d marginal probs +(bins, marginals2D) = plotP.calculate_2D_marginal_probs(input_samples, + nbins=20) +# smooth 2d marginals probs (optional) +marginals2D = plotP.smooth_marginals_2D(marginals2D, bins, sigma=0.5) + +# plot 2d marginals probs +plotP.plot_2D_marginal_probs(marginals2D, bins, input_samples, filename="FEniCS", + lam_ref=param_ref[0,:], file_extension=".eps", + plot_surface=False) + +# calculate 1d marginal probs +(bins, marginals1D) = plotP.calculate_1D_marginal_probs(input_samples, + nbins=20) +# smooth 1d marginal probs (optional) +marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=0.5) +# plot 2d marginal probs +plotP.plot_1D_marginal_probs(marginals1D, bins, input_samples, filename="FEniCS", + lam_ref=param_ref[0,:], file_extension=".eps") + + + diff --git a/examples/FEniCS/Lshaped.xml b/examples/FEniCS/Lshaped.xml new file mode 100644 index 00000000..4cfe598b --- /dev/null +++ b/examples/FEniCS/Lshaped.xml @@ -0,0 +1,1275 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/FEniCS/meshDS.py b/examples/FEniCS/meshDS.py new file mode 100644 index 00000000..cadfccf1 --- /dev/null +++ b/examples/FEniCS/meshDS.py @@ -0,0 +1,83 @@ +#!/usr/bin/en python + +from dolfin import * +from numpy import * + + +class meshDS(object): + + """Docstring for meshDS. """ + + def __init__(self,mesh): + """TODO: to be defined1. + + :mesh: reads a fenics mesh object + + """ + self._mesh = mesh + self.node_elem = {} # empty dictionary of node to elements connectivity + self.edges_elem = {} # empty dictionary of edges to elements connectivity + + # initialize the mesh and read in the values + self._mesh.init() + self._dim = self._mesh.topology().dim() + self.num_nodes = self._mesh.num_vertices() + self.num_elements = self._mesh.num_cells() + self.num_edges = self._mesh.num_edges() + + def getNodes(self): + """TODO: Docstring for getNodes. + :returns: num of nodes in the mesh + + """ + return self.num_nodes + def getElements(self): + """TODO: Docstring for getElements. + :returns: number of elements in the mesh + + """ + return self.num_elements + + def getEdges(self): + """TODO: Docstring for getElements. + :returns: number of elements in the mesh + + """ + return self.num_edges + def getElemToNodes(self): + """TODO: Docstring for getElemToNodes. + :returns: Elements - Nodes Connectivity array of array + + """ + return self._mesh.cells() + def getNodesToElem(self): + """TODO: Docstring for getNodesToElem. + :returns: returns Nodes to Element connectivity as a dictionary + where nodes_elem[i] is an array of all the elements attached to node i + + """ + for nodes in entities(self._mesh,0): + self.node_elem[nodes.index()] = nodes.entities(self._dim) + return self.node_elem + def getElemVCArray(self): + + """TODO: Docstring for getElemVCArray. + :returns: array of element volume and and an array of element centroid object + Thus elem_centroid_array[i][0] means the x co-ordinate of the centroid for element number i + Thus elem_centroid_array[i][1] means the y co-ordinate of the centroid for element number i + """ + + elem_vol_array = empty((self.num_elements),dtype=float) + elem_centroid_array = empty((self.num_elements),dtype=object) + + cell_indx = 0 + for node_list in self._mesh.cells(): + # First get the cell object corresponding to the cell_indx + cell_obj = Cell(self._mesh,cell_indx) + # Find the cell volume and cell centroid + elem_vol_array[cell_indx] = cell_obj.volume() + elem_centroid_array[cell_indx] = cell_obj.midpoint() + # update cell index + cell_indx = cell_indx + 1 + return elem_vol_array,elem_centroid_array + diff --git a/examples/FEniCS/myModel.py b/examples/FEniCS/myModel.py new file mode 100644 index 00000000..d72f7d60 --- /dev/null +++ b/examples/FEniCS/myModel.py @@ -0,0 +1,182 @@ +# Copyright (C) 2016 The BET Development Team + +# -*- coding: utf-8 -*- +import numpy as np +from dolfin import * +from meshDS import meshDS +from projectKL import projectKL +from poissonRandField import solvePoissonRandomField +import scipy.io as sio + +def my_model(parameter_samples): + # number of parameter samples + numSamples = parameter_samples.shape[0] + + # number of KL expansion terms. + numKL = parameter_samples.shape[1] + + # the samples are the coefficients of the KL expansion typically denoted by xi_k + xi_k = parameter_samples + + ''' + ++++++++++++++++ Steps in Computing the Numerical KL Expansion ++++++++++ + We proceed by loading the mesh and defining the function space for which + the eigenfunctions are defined upon. + + Then, we define the covariance kernel which requires correlation lengths + and a standard deviation. + + We then compute the truncated KL expansion. + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + ''' + + # Step 1: Set up the Mesh and Function Space + + mesh = Mesh("Lshaped.xml") + #mesh = RectangleMesh(0,0,10,10,20,20) + + # Plot the mesh for visual check + #plot(mesh,interactive=True) + + # initialize the mesh to generate connectivity + mesh.init() + + # Random field is projected on the space of Hat functions in the mesh + V = FunctionSpace(mesh, "CG", 1) + + # Step 2: Project covariance in the mesh and get the eigenfunctions + + # Initialize the projectKL object with the mesh + Lmesh = projectKL(mesh) + + # Create the covariance expression to project on the mesh. + etaX = 10.0 + etaY = 10.0 + C = 1 + + # Pick your favorite covariance. Popular choices are Gaussian (of course), + # Exponential, triangular (has finite support which is nice). Check out + # Ghanem and Spanos' book for more classical options. + + # A Gaussian Covariance + ''' + cov = Expression("C*exp(-((x[0]-x[1]))*((x[0]-x[1]))/ex - \ + ((x[2]-x[3]))*((x[2]-x[3]))/ey)", + ex=etaX,ey=etaY, C=C) + ''' + # An Exponential Covariance + cov = Expression("C*exp(-fabs(x[0]-x[1])/ex - fabs(x[2]-x[3])/ey)",ex=etaX,ey=etaY, C=C) + + # Solve the discrete covariance relation on the mesh + Lmesh.projectCovToMesh(numKL,cov) + + # Get the eigenfunctions and eigenvalues + eigen_func = Lmesh.eigen_funcs + eigen_val = Lmesh.eigen_vals + + #print 'eigen_vals' + #print eigen_val + #print eigen_val.sum() + + ''' + ++++++++++++++++ Steps in Solving Poisson with the KL fields ++++++++++++ + First set up the necessary variables and boundary conditions for the + problem. + + Then create the QoI maps defined by average values over some part of the + physical domain. + + Loop through the sample fields and create the permeability defined by the + exponential of the KL field (i.e., the KL expansion represents the log of + the permeability). + + For each sample field, call the PoissonRandomField solver. + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + ''' + # permeability + perm_k = Function(V) + + # Create Boundary Conditions -- Dirichlet on left and bottom boundary. + # Left Dirichlet Bc + def left_boundary(x,on_boundary): + """TODO: Docstring for left_boundary. + + :x: TODO + :on_boundary: TODO + :returns: TODO + + """ + tol = 1e-14 + return on_boundary and abs(x[0]) < tol + Gamma_0 = DirichletBC(V,Constant(0.0),left_boundary) + + def bottom_boundary(x,on_boundary): + """TODO: Docstring for left_boundary. + + :x: TODO + :on_boundary: TODO + :returns: TODO + + """ + tol = 1e-14 + return on_boundary and abs(x[1]) < tol + Gamma_1 = DirichletBC(V,Constant(0.0),bottom_boundary) + bcs = [Gamma_0,Gamma_1] + + # Setup adjoint boundary conditions. + Gamma_adj_0 = DirichletBC(V,Constant(0.0),left_boundary) + Gamma_adj_1 = DirichletBC(V,Constant(0.0),bottom_boundary) + bcs_adj = [Gamma_adj_0, Gamma_adj_1] + + # Setup the QoI class + class CharFunc(Expression): + def __init__(self, region): + self.a = region[0] + self.b = region[1] + self.c = region[2] + self.d = region[3] + def eval(self, v, x): + v[0] = 0 + if (x[0] >= self.a) & (x[0] <= self.b) & (x[1] >= self.c) & (x[1] <= self.d): + v[0] = 1 + return v + + # Define the QoI maps + Chi_1 = CharFunc([0.75, 1.25, 7.75, 8.25]) + Chi_2 = CharFunc([7.75, 8.25, 0.75, 1.25]) + + QoI_samples = np.zeros([numSamples,2]) + + QoI_deriv_1 = np.zeros([numSamples,numKL]) + QoI_deriv_2 = np.zeros([numSamples,numKL]) + + # For each sample solve the PDE + f = Constant(-1.0) # forcing of Poisson + + for i in range(0, numSamples): + + print "Sample point number: %g" % i + + # create a temp array to store logPerm as sum of KL expansions + # logPerm is log permeability + logPerm = np.zeros((mesh.num_vertices()), dtype=float) + for kl in range(0, numKL): + logPerm += xi_k[i, kl] * \ + sqrt(eigen_val[kl]) * eigen_func[kl].vector().array() + + # permiability is the exponential of log permeability logPerm + perm_k_array = 0.1 + np.exp(logPerm) + # print "Mean value of the random field is: %g" % perm_k_array.mean() + + ## use dof_to_vertex map to map values to the function space + perm_k.vector()[:] = perm_k_array + + # solve Poisson with this random field using FEM + u = solvePoissonRandomField(perm_k, mesh, 1, f, bcs) + + # Compute QoI + QoI_samples[i, 0] = assemble(u * Chi_1 * dx) + QoI_samples[i, 1] = assemble(u * Chi_2 * dx) + + + return QoI_samples \ No newline at end of file diff --git a/examples/FEniCS/poissonRandField.py b/examples/FEniCS/poissonRandField.py new file mode 100644 index 00000000..626474a6 --- /dev/null +++ b/examples/FEniCS/poissonRandField.py @@ -0,0 +1,16 @@ +from dolfin import* + +def solvePoissonRandomField(rand_field,mesh,poly_order,f,bcs): + """ + Solves the poisson equation with a random field : + (\grad \dot (rand_field \grad(u)) = -f) + """ + # create the function space + V = FunctionSpace(mesh, "CG", poly_order) + u = TrialFunction(V) + v = TestFunction(V) + L = f*v*dx + a = inner(rand_field*nabla_grad(u),nabla_grad(v))*dx + u = Function(V) + solve(a == L,u,bcs) + return u diff --git a/examples/FEniCS/projectKL.py b/examples/FEniCS/projectKL.py new file mode 100644 index 00000000..3d57c88c --- /dev/null +++ b/examples/FEniCS/projectKL.py @@ -0,0 +1,168 @@ +from dolfin import * +import numpy as np +import petsc4py +from petsc4py import PETSc +from slepc4py import SLEPc +from meshDS import* +# initialize petsc +petsc4py.init() + +class projectKL(object): + + """Docstring for projectKL. """ + + def __init__(self,mesh): + """TODO: to be defined1. """ + # create meshDS obect + self._mesh = mesh + self.domain = meshDS(mesh) + self.c_volume_array,self.c_centroid_array = self.domain.getElemVCArray() + self.node_to_elem = self.domain.getNodesToElem() + self.flag = False + def getCovMat(self, cov_expr): + """TODO: Docstring for getCovMat. + + :cov_expr: Expression (dolfin) as a function of + :returns: covariance PETSC matrix cov_mat + + """ + # store the expression + self.expr = cov_expr + # create a PETSC matrix for cov_mat + cov_mat = PETSc.Mat().create() + cov_mat.setType('aij') + cov_mat.setSizes(self.domain.getNodes(),self.domain.getNodes()) + cov_mat.setUp() + + cov_ij = np.empty((1),dtype=float) # scalar valued function is evaluated in this variable + xycor = np.empty((4),dtype=float) # the points to evalute the expression + + print '---------------------------' + print '---------------------------' + print ' Building Covariance Matrix' + print '---------------------------' + print '---------------------------' + # Loop through global nodes and build the matrix for i < j because of symmetric nature. + for node_i in range(0,self.domain.getNodes()): + # global node node_i + for node_j in range(node_i,self.domain.getNodes()): + # global node node_j + temp_cov_ij = 0 + for elem_i in self.node_to_elem[node_i]: + # elem_i : element attached to node_i + # x1 : x co-ordinate of the centroid of element elem_i + x1 = self.c_centroid_array[elem_i].x() + # y1 : x co-ordinate of the centroid of element elem_i + y1 = self.c_centroid_array[elem_i].y() + for elem_j in self.node_to_elem[node_j]: + # elem_j : element attached to node_j + # x2 : x co-ordinate for the centroid of element elem_j + x2 = self.c_centroid_array[elem_j].x() + # y2 : y co-ordinate for the centroid of element elem_j + y2 = self.c_centroid_array[elem_j].y() + xycor[0] = x1 + xycor[1] = x2 + xycor[2] = y1 + xycor[3] = y2 + # evaluate the expression + cov_expr.eval(cov_ij,xycor) + if cov_ij[0] > 0: + temp_cov_ij += (1.0/3)*(1.0/3)*cov_ij[0]*self.c_volume_array[elem_i]* \ + self.c_volume_array[elem_j] + cov_mat.setValue(node_i,node_j,temp_cov_ij) + cov_mat.setValue(node_j,node_i,temp_cov_ij) + cov_mat.assemblyBegin() + cov_mat.assemblyEnd() + print '---------------------------' + print '---------------------------' + print ' Finished Covariance Matrix' + print '---------------------------' + print '---------------------------' + + return cov_mat + + def _getBMat(self): + """TODO: Docstring for getBmat. We are solving for CX = BX where C is the covariance matrix + and B is just a mass matrix. Here we assemble B. This is a private function. DONT call this + unless debuging. + + :returns: PETScMatrix B + """ + + # B matrix is just a mass matrix, can be easily assembled through fenics + # however, the ordering in fenics is not the mesh ordering. so we build a temp matrix + # then use the vertex to dof map to get the right ordering interms of our mesh nodes + V = FunctionSpace(self._mesh, "CG", 1) + # Define basis and bilinear form + u = TrialFunction(V) + v = TestFunction(V) + a = u*v*dx + B_temp = assemble(a) + + B = PETSc.Mat().create() + B.setType('aij') + B.setSizes(self.domain.getNodes(),self.domain.getNodes()) + B.setUp() + + B_ij = B_temp.array() + + v_to_d_map = vertex_to_dof_map(V) + + print '---------------------------' + print '---------------------------' + print ' Building Mass Matrix ' + print '---------------------------' + print '---------------------------' + for node_i in range(0, self.domain.getNodes()): + for node_j in range(node_i, self.domain.getNodes()): + B_ij_nodes = B_ij[v_to_d_map[node_i],v_to_d_map[node_j]] + if B_ij_nodes > 0: + B.setValue(node_i,node_j,B_ij_nodes) + B.setValue(node_j,node_i,B_ij_nodes) + + B.assemblyBegin() + B.assemblyEnd() + print '---------------------------' + print '---------------------------' + print ' Finished Mass Matrix ' + print '---------------------------' + print '---------------------------' + return B + + def projectCovToMesh(self,num_kl,cov_expr): + """TODO: Docstring for projectCovToMesh. Solves CX = BX where C is the covariance matrix + :num_kl : number of kl exapansion terms needed + :returns: TODO + + """ + # turn the flag to true + self.flag = True + # get C,B matrices + C = PETScMatrix(self.getCovMat(cov_expr)) + B = PETScMatrix(self._getBMat()) + # Solve the generalized eigenvalue problem + eigensolver = SLEPcEigenSolver(C,B) + eigensolver.solve(num_kl) + # Get the number of eigen values that converged. + nconv = eigensolver.get_number_converged() + + # Get N eigenpairs where N is the number of KL expansion and check if N < nconv otherwise you had + # really bad matrix + + # create numpy array of vectors and eigenvalues + self.eigen_funcs = np.empty((num_kl),dtype=object) + self.eigen_vals = np.empty((num_kl),dtype=float) + + # store the eigenvalues and eigen functions + V = FunctionSpace(self._mesh, "CG", 1) + for eigen_pairs in range(0,num_kl): + lambda_r, lambda_c, x_real, x_complex = eigensolver.get_eigenpair(eigen_pairs) + self.eigen_funcs[eigen_pairs] = Function(V) + # use dof_to_vertex map to map values to the function space + self.eigen_funcs[eigen_pairs].vector()[:] = x_real[dof_to_vertex_map(V)]#*np.sqrt(lambda_r) + # divide by norm to make the unit norm again + self.eigen_funcs[eigen_pairs].vector()[:] = self.eigen_funcs[eigen_pairs].vector()[:] / \ + norm(self.eigen_funcs[eigen_pairs]) + self.eigen_vals[eigen_pairs] = lambda_r + + diff --git a/examples/contaminantTransport/contaminant.py b/examples/contaminantTransport/contaminant.py index f59f0d26..37329520 100644 --- a/examples/contaminantTransport/contaminant.py +++ b/examples/contaminantTransport/contaminant.py @@ -26,92 +26,103 @@ import bet.postProcess.plotP as plotP import bet.postProcess.plotDomains as plotD import bet.postProcess.postTools as postTools - +import bet.sample as samp # Labels and descriptions of the uncertain parameters labels = ['Source $y$ coordinate [L]', 'Source $x$ coordinate [L]', 'Dispersivity x [L]', 'Flow Angle [degrees]', 'Contaminant flux [M/T]'] # Load data from files -lam_domain = np.loadtxt("files/lam_domain.txt.gz") #parameter domain -ref_lam = np.loadtxt("files/lam_ref.txt.gz") #reference parameter set -Q_ref = np.loadtxt("files/Q_ref.txt.gz") #reference QoI set -samples = np.loadtxt("files/samples.txt.gz") # uniform samples in parameter domain -dataf = np.loadtxt("files/data.txt.gz") # data from model - -QoI_indices=[0,1,2,3] # Indices for output data with which you want to invert -bin_ratio = 0.25 #ratio of length of data region to invert - -data = dataf[:,QoI_indices] -Q_ref=Q_ref[QoI_indices] - -dmax = data.max(axis=0) -dmin = data.min(axis=0) -dscale = bin_ratio*(dmax-dmin) -Qmax = Q_ref + 0.5*dscale -Qmin = Q_ref -0.5*dscale -def rho_D(x): - return np.all(np.logical_and(np.greater(x,Qmin), np.less(x,Qmax)),axis=1) +# First obtain info on the parameter domain +parameter_domain = np.loadtxt("files/lam_domain.txt.gz") #parameter domain +parameter_dim = parameter_domain.shape[0] +# Create input sample set +input_samples = samp.sample_set(parameter_dim) +input_samples.set_domain(parameter_domain) +input_samples.set_values(np.loadtxt("files/samples.txt.gz")) +input_samples.estimate_volume_mc() # Use standard MC estimate of volumes +# Choose which QoI to use and create output sample set +QoI_indices_observe = np.array([0,1,2,3]) +output_samples = samp.sample_set(QoI_indices_observe.size) +output_samples.set_values(np.loadtxt("files/data.txt.gz")[:,QoI_indices_observe]) + +# Create discretization object +my_discretization = samp.discretization(input_sample_set=input_samples, + output_sample_set=output_samples) + +# Load the reference parameter and QoI values +param_ref = np.loadtxt("files/lam_ref.txt.gz") #reference parameter set +Q_ref = np.loadtxt("files/Q_ref.txt.gz")[QoI_indices_observe] #reference QoI set # Plot the data domain -plotD.show_data(data, Q_ref = Q_ref, rho_D=rho_D, showdim=2) +plotD.show_data(my_discretization, Q_ref = Q_ref, showdim=2) # Whether or not to use deterministic description of simple function approximation of # ouput probability deterministic_discretize_D = True if deterministic_discretize_D == True: - (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.uniform_hyperrectangle(data=data, - Q_ref=Q_ref, - bin_ratio=bin_ratio, - center_pts_per_edge = 1) + simpleFunP.regular_partition_uniform_distribution_rectangle_scaled(data_set=my_discretization, + Q_ref=Q_ref, + rect_scale=0.25, + center_pts_per_edge = 1) else: - (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.unif_unif(data=data, - Q_ref=Q_ref, - M=50, - bin_ratio=bin_ratio, - num_d_emulate=1E5) + simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled(data_set=my_discretization, + Q_ref=Q_ref, + rect_scale=0.25, + M=50, + num_d_emulate=1E5) # calculate probablities making Monte Carlo assumption -(P, lam_vol, io_ptr) = calculateP.prob(samples=samples, - data=data, - rho_D_M=d_distr_prob, - d_distr_samples=d_distr_samples) +calculateP.prob(my_discretization) # calculate 2D marginal probabilities -(bins, marginals2D) = plotP.calculate_2D_marginal_probs(P_samples = P, samples = samples, lam_domain = lam_domain, nbins = 10) +(bins, marginals2D) = plotP.calculate_2D_marginal_probs(my_discretization, nbins = 10) # smooth 2D marginal probabilites for plotting (optional) -marginals2D = plotP.smooth_marginals_2D(marginals2D,bins, sigma=1.0) +marginals2D = plotP.smooth_marginals_2D(marginals2D, bins, sigma=1.0) # plot 2D marginal probabilities -plotP.plot_2D_marginal_probs(marginals2D, bins, lam_domain, filename = "contaminant_map", +plotP.plot_2D_marginal_probs(marginals2D, bins, my_discretization, filename = "contaminant_map", plot_surface=False, - lam_ref = ref_lam, + lam_ref = param_ref, lambda_label=labels, interactive=False) # calculate 1d marginal probs -(bins, marginals1D) = plotP.calculate_1D_marginal_probs(P_samples = P, samples = samples, lam_domain = lam_domain, nbins = 20) +(bins, marginals1D) = plotP.calculate_1D_marginal_probs(my_discretization, nbins = 20) # smooth 1d marginal probs (optional) marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=1.0) # plot 1d marginal probs -plotP.plot_1D_marginal_probs(marginals1D, bins, lam_domain, filename = "contaminant_map", interactive=False, lam_ref=ref_lam, lambda_label=labels) +plotP.plot_1D_marginal_probs(marginals1D, bins, my_discretization, + filename = "contaminant_map", + interactive=False, + lam_ref=param_ref, + lambda_label=labels) percentile = 1.0 # Sort samples by highest probability density and sample highest percentile percent samples -(num_samples, P_high, samples_high, lam_vol_high, data_high)= postTools.sample_highest_prob(top_percentile=percentile, P_samples=P, samples=samples, lam_vol=lam_vol,data = data,sort=True) +(num_samples, my_discretization_highP, indices)= postTools.sample_highest_prob( + percentile, my_discretization, sort=True) # print the number of samples that make up the highest percentile percent samples and # ratio of the volume of the parameter domain they take up -print (num_samples, np.sum(lam_vol_high)) +print (num_samples, np.sum(my_discretization_highP._input_sample_set.get_volumes())) + +# Choose unused QoI as prediction QoI and propagate measure onto predicted QoI data space +QoI_indices_predict = np.array([7]) +output_samples_predict = samp.sample_set(QoI_indices_predict.size) +output_samples_predict.set_values(np.loadtxt("files/data.txt.gz")[:,QoI_indices_predict]) +output_samples_predict.set_probabilities(input_samples.get_probabilities()) -# Propogate the probability measure through a different QoI map -(_, P_pred, _, _ , data_pred)= postTools.sample_highest_prob(top_percentile=percentile, P_samples=P, samples=samples, lam_vol=lam_vol,data = dataf[:,7],sort=True) +# Determine range of predictions and store as domain for plotting purposes +output_samples_predict.set_domain(output_samples_predict.get_bounding_box()) # Plot 1D pdf of predicted QoI # calculate 1d marginal probs -(bins_pred, marginals1D_pred) = plotP.calculate_1D_marginal_probs(P_samples = P_pred, samples = data_pred, lam_domain = np.array([[np.min(data_pred),np.max(data_pred)]]), nbins = 20) +(bins_pred, marginals1D_pred) = plotP.calculate_1D_marginal_probs(output_samples_predict, + nbins = 20) # plot 1d pdf -plotP.plot_1D_marginal_probs(marginals1D_pred, bins_pred, lam_domain= np.array([[np.min(data_pred),np.max(data_pred)]]), filename = "contaminant_prediction", interactive=False) +plotP.plot_1D_marginal_probs(marginals1D_pred, bins_pred, output_samples_predict, + filename = "contaminant_prediction", interactive=False) diff --git a/examples/fromFile_ADCIRCMap/Q_1D_serial.py b/examples/fromFile_ADCIRCMap/Q_1D_serial.py index 6288d928..6447f465 100644 --- a/examples/fromFile_ADCIRCMap/Q_1D_serial.py +++ b/examples/fromFile_ADCIRCMap/Q_1D_serial.py @@ -4,16 +4,23 @@ import bet.calculateP.simpleFunP as sfun import numpy as np import scipy.io as sio +import bet.sample as sample # Import "Truth" -mdat = sio.loadmat('Q_2D') +mdat = sio.loadmat('../matfiles/Q_2D') Q = mdat['Q'] Q_ref = mdat['Q_true'] # Import Data -samples = mdat['points'].transpose() +points = mdat['points'] lam_domain = np.array([[0.07, .15], [0.1, 0.2]]) +# Create input, output, and discretization from data read from file +input_sample_set = sample.sample_set(points.shape[0]) +input_sample_set.set_values(points.transpose()) +input_sample_set.set_domain(lam_domain) + + print "Finished loading data" def postprocess(station_nums, ref_num): @@ -24,55 +31,41 @@ def postprocess(station_nums, ref_num): filename += '_ref_'+str(ref_num+1) data = Q[:, station_nums] + output_sample_set = sample.sample_set(data.shape[1]) + output_sample_set.set_values(data) q_ref = Q_ref[ref_num, station_nums] # Create Simple function approximation # Save points used to parition D for simple function approximation and the # approximation itself (this can be used to make close comparisions...) - (rho_D_M, d_distr_samples, d_Tree) = sfun.uniform_hyperrectangle(data, - q_ref, bin_ratio=0.15, + output_probability_set = sfun.regular_partition_uniform_distribution_rectangle_scaled(\ + output_sample_set, q_ref, rect_scale=0.15, center_pts_per_edge=np.ones((data.shape[1],))) - num_l_emulate = 1e6 - lambda_emulate = calcP.emulate_iid_lebesgue(lam_domain, num_l_emulate) - print "Finished emulating lambda samples" + num_l_emulate = 1e4 + set_emulated = calcP.emulate_iid_lebesgue(lam_domain, num_l_emulate) + my_disc = sample.discretization(input_sample_set, output_sample_set, + output_probability_set, emulated_input_sample_set=set_emulated) - mdict = dict() - mdict['rho_D_M'] = rho_D_M - mdict['d_distr_samples'] = d_distr_samples - mdict['num_l_emulate'] = num_l_emulate - mdict['lambda_emulate'] = lambda_emulate + print "Finished emulating lambda samples" # Calculate P on lambda emulate - (P0, lem0, io_ptr0, emulate_ptr0) = calcP.prob_emulated(samples, data, - rho_D_M, d_distr_samples, lambda_emulate, d_Tree) print "Calculating prob_emulated" - mdict['P0'] = P0 - mdict['lem0'] = lem0 - mdict['io_ptr0'] = io_ptr0 - mdict['emulate_ptr0'] = emulate_ptr0 + calcP.prob_emulated(my_disc) + sample.save_discretization(my_disc, filename, "prob_emulated_solution") # Calclate P on the actual samples with assumption that voronoi cells have # equal size - (P1, lam_vol1, io_ptr1) = calcP.prob(samples, data, - rho_D_M, d_distr_samples, d_Tree) + input_sample_set.estimate_volume_mc() print "Calculating prob" - mdict['P1'] = P1 - mdict['lam_vol1'] = lam_vol1 - mdict['lem1'] = samples - mdict['io_ptr1'] = io_ptr1 + calcP.prob(my_disc) + sample.save_discretization(my_disc, filename, "prob_solution") # Calculate P on the actual samples estimating voronoi cell volume with MC # integration - (P3, lam_vol3, lambda_emulate3, io_ptr3, emulate_ptr3) = calcP.prob_mc(samples, - data, rho_D_M, d_distr_samples, lambda_emulate, d_Tree) + calcP.prob_mc(my_disc) print "Calculating prob_mc" - mdict['P3'] = P3 - mdict['lam_vol3'] = lam_vol3 - mdict['io_ptr3'] = io_ptr3 - mdict['emulate_ptr3'] = emulate_ptr3 - # Export P - sio.savemat(filename, mdict, do_compression=True) + sample.save_discretization(my_disc, filename, "prob_mc_solution") # Post-process and save P and emulated points ref_nums = [6, 11, 15] # 7, 12, 16 diff --git a/examples/fromFile_ADCIRCMap/Q_2D_parallel.py b/examples/fromFile_ADCIRCMap/Q_2D_parallel.py index e05c8fac..bdad804f 100644 --- a/examples/fromFile_ADCIRCMap/Q_2D_parallel.py +++ b/examples/fromFile_ADCIRCMap/Q_2D_parallel.py @@ -6,16 +6,21 @@ import scipy.io as sio import bet.util as util from bet.Comm import comm +import bet.sample as sample # Import "Truth" -mdat = sio.loadmat('Q_2D') +mdat = sio.loadmat('../matfiles/Q_2D') Q = mdat['Q'] Q_ref = mdat['Q_true'] # Import Data -samples = mdat['points'].transpose() +points = mdat['points'] lam_domain = np.array([[0.07, .15], [0.1, 0.2]]) +# Create input, output, and discretization from data read from file +input_sample_set = sample.sample_set(points.shape[0]) +input_sample_set.set_values(points.transpose()) +input_sample_set.set_domain(lam_domain) print "Finished loading data" def postprocess(station_nums, ref_num): @@ -26,58 +31,44 @@ def postprocess(station_nums, ref_num): filename += '_ref_'+str(ref_num+1) data = Q[:, station_nums] + output_sample_set = sample.sample_set(data.shape[1]) + output_sample_set.set_values(data) q_ref = Q_ref[ref_num, station_nums] # Create Simple function approximation # Save points used to parition D for simple function approximation and the # approximation itself (this can be used to make close comparisions...) - (rho_D_M, d_distr_samples, d_Tree) = sfun.uniform_hyperrectangle(data, - q_ref, bin_ratio=0.15, + output_probability_set = sfun.regular_partition_uniform_distribution_rectangle_scaled(\ + output_sample_set, q_ref, rect_scale=0.15, center_pts_per_edge=np.ones((data.shape[1],))) num_l_emulate = 1e6 - lambda_emulate = calcP.emulate_iid_lebesgue(lam_domain, num_l_emulate) - - if comm.rank == 0: - print "Finished emulating lambda samples" - mdict = dict() - mdict['rho_D_M'] = rho_D_M - mdict['d_distr_samples'] = d_distr_samples - mdict['num_l_emulate'] = num_l_emulate + set_emulated = calcP.emulate_iid_lebesgue(lam_domain, num_l_emulate) + my_disc = sample.discretization(input_sample_set, output_sample_set, + output_probability_set, emulated_input_sample_set=set_emulated) + + print "Finished emulating lambda samples" # Calculate P on lambda emulate - (P0, lem0, io_ptr0, emulate_ptr0) = calcP.prob_emulated(samples, data, - rho_D_M, d_distr_samples, lambda_emulate, d_Tree) + print "Calculating prob_emulated" + calcP.prob_emulated(my_disc) if comm.rank == 0: - print "Calculating prob_emulated" - mdict['P0'] = P0 - mdict['lem0'] = lem0 - mdict['io_ptr0'] = io_ptr0 - mdict['emulate_ptr0'] = emulate_ptr0 + sample.save_discretization(my_disc, filename, "prob_emulated_solution") # Calclate P on the actual samples with assumption that voronoi cells have # equal size - (P1, lam_vol1, io_ptr1) = calcP.prob(samples, data, - rho_D_M, d_distr_samples, d_Tree) + input_sample_set.estimate_volume_mc() + print "Calculating prob" + calcP.prob(my_disc) if comm.rank == 0: - print "Calculating prob" - mdict['P1'] = P1 - mdict['lam_vol1'] = lam_vol1 - mdict['lem1'] = samples - mdict['io_ptr1'] = io_ptr1 + sample.save_discretization(my_disc, filename, "prob_solution") # Calculate P on the actual samples estimating voronoi cell volume with MC # integration - (P3, lam_vol3, lambda_emulate3, io_ptr3, emulate_ptr3) = calcP.prob_mc(samples, - data, rho_D_M, d_distr_samples, lambda_emulate, d_Tree) + calcP.prob_mc(my_disc) + print "Calculating prob_mc" if comm.rank == 0: - print "Calculating prob_mc" - mdict['P3'] = P3 - mdict['lam_vol3'] = lam_vol3 - mdict['io_ptr3'] = io_ptr3 - mdict['emulate_ptr3'] = emulate_ptr3 - # Export P - sio.savemat(filename, mdict, do_compression=True) + sample.save_discretization(my_disc, filename, "prob_mc_solution") # Post-process and save P and emulated points ref_nums = [6, 11, 15] # 7, 12, 16 diff --git a/examples/fromFile_ADCIRCMap/Q_2D_serial.py b/examples/fromFile_ADCIRCMap/Q_2D_serial.py index d3d50c3c..85e4eab0 100644 --- a/examples/fromFile_ADCIRCMap/Q_2D_serial.py +++ b/examples/fromFile_ADCIRCMap/Q_2D_serial.py @@ -4,16 +4,21 @@ import bet.calculateP.simpleFunP as sfun import numpy as np import scipy.io as sio +import bet.sample as sample # Import "Truth" -mdat = sio.loadmat('Q_2D') +mdat = sio.loadmat('../matfiles/Q_2D') Q = mdat['Q'] Q_ref = mdat['Q_true'] # Import Data -samples = mdat['points'].transpose() +points = mdat['points'] lam_domain = np.array([[0.07, .15], [0.1, 0.2]]) +# Create input, output, and discretization from data read from file +input_sample_set = sample.sample_set(points.shape[0]) +input_sample_set.set_values(points.transpose()) +input_sample_set.set_domain(lam_domain) print "Finished loading data" def postprocess(station_nums, ref_num): @@ -24,55 +29,41 @@ def postprocess(station_nums, ref_num): filename += '_ref_'+str(ref_num+1) data = Q[:, station_nums] + output_sample_set = sample.sample_set(data.shape[1]) + output_sample_set.set_values(data) q_ref = Q_ref[ref_num, station_nums] # Create Simple function approximation # Save points used to parition D for simple function approximation and the # approximation itself (this can be used to make close comparisions...) - (rho_D_M, d_distr_samples, d_Tree) = sfun.uniform_hyperrectangle(data, - q_ref, bin_ratio=0.15, + output_probability_set = sfun.regular_partition_uniform_distribution_rectangle_scaled(\ + output_sample_set, q_ref, rect_scale=0.15, center_pts_per_edge=np.ones((data.shape[1],))) - num_l_emulate = 1e6 - lambda_emulate = calcP.emulate_iid_lebesgue(lam_domain, num_l_emulate) - print "Finished emulating lambda samples" + num_l_emulate = 1e4 + set_emulated = calcP.emulate_iid_lebesgue(lam_domain, num_l_emulate) + my_disc = sample.discretization(input_sample_set, output_sample_set, + output_probability_set, emulated_input_sample_set=set_emulated) - mdict = dict() - mdict['rho_D_M'] = rho_D_M - mdict['d_distr_samples'] = d_distr_samples - mdict['num_l_emulate'] = num_l_emulate - mdict['lambda_emulate'] = lambda_emulate + print "Finished emulating lambda samples" # Calculate P on lambda emulate - (P0, lem0, io_ptr0, emulate_ptr0) = calcP.prob_emulated(samples, data, - rho_D_M, d_distr_samples, lambda_emulate, d_Tree) print "Calculating prob_emulated" - mdict['P0'] = P0 - mdict['lem0'] = lem0 - mdict['io_ptr0'] = io_ptr0 - mdict['emulate_ptr0'] = emulate_ptr0 + calcP.prob_emulated(my_disc) + sample.save_discretization(my_disc, filename, "prob_emulated_solution") # Calclate P on the actual samples with assumption that voronoi cells have # equal size - (P1, lam_vol1, io_ptr1) = calcP.prob(samples, data, - rho_D_M, d_distr_samples, d_Tree) + input_sample_set.estimate_volume_mc() print "Calculating prob" - mdict['P1'] = P1 - mdict['lam_vol1'] = lam_vol1 - mdict['lem1'] = samples - mdict['io_ptr1'] = io_ptr1 + calcP.prob(my_disc) + sample.save_discretization(my_disc, filename, "prob_solution") # Calculate P on the actual samples estimating voronoi cell volume with MC # integration - (P3, lam_vol3, lambda_emulate3, io_ptr3, emulate_ptr3) = calcP.prob_mc(samples, - data, rho_D_M, d_distr_samples, lambda_emulate, d_Tree) + calcP.prob_mc(my_disc) print "Calculating prob_mc" - mdict['P3'] = P3 - mdict['lam_vol3'] = lam_vol3 - mdict['io_ptr3'] = io_ptr3 - mdict['emulate_ptr3'] = emulate_ptr3 - # Export P - sio.savemat(filename, mdict, do_compression=True) + sample.save_discretization(my_disc, filename, "prob_mc_solution") # Post-process and save P and emulated points ref_nums = [6, 11, 15] # 7, 12, 16 diff --git a/examples/fromFile_ADCIRCMap/Q_3D_serial.py b/examples/fromFile_ADCIRCMap/Q_3D_serial.py index b9656cf3..d4dfd8c5 100644 --- a/examples/fromFile_ADCIRCMap/Q_3D_serial.py +++ b/examples/fromFile_ADCIRCMap/Q_3D_serial.py @@ -4,9 +4,10 @@ import bet.calculateP.simpleFunP as sfun import numpy as np import scipy.io as sio +import bet.sample as sample # Import "Truth" -mdat = sio.loadmat('Q_3D') +mdat = sio.loadmat('../matfiles/Q_3D') Q = mdat['Q'] Q_ref = mdat['Q_true'] @@ -14,6 +15,11 @@ samples = mdat['points'].transpose() lam_domain = np.array([[-900, 1200], [0.07, .15], [0.1, 0.2]]) +# Create input, output, and discretization from data read from file +points = mdat['points'] +input_sample_set = sample.sample_set(points.shape[0]) +input_sample_set.set_values(points.transpose()) +input_sample_set.set_domain(lam_domain) print "Finished loading data" def postprocess(station_nums, ref_num): @@ -24,30 +30,26 @@ def postprocess(station_nums, ref_num): filename += '_ref_'+str(ref_num+1) data = Q[:, station_nums] + output_sample_set = sample.sample_set(data.shape[1]) + output_sample_set.set_values(data) q_ref = Q_ref[ref_num, station_nums] # Create Simple function approximation # Save points used to parition D for simple function approximation and the # approximation itself (this can be used to make close comparisions...) - (rho_D_M, d_distr_samples, d_Tree) = sfun.uniform_hyperrectangle(data, - q_ref, bin_ratio=0.15, + output_probability_set = sfun.regular_partition_uniform_distribution_rectangle_scaled(\ + output_sample_set, q_ref, rect_scale=0.15, center_pts_per_edge=np.ones((data.shape[1],))) - mdict = dict() - mdict['rho_D_M'] = rho_D_M - mdict['d_distr_samples'] = d_distr_samples + + my_disc = sample.discretization(input_sample_set, output_sample_set, + output_probability_set) # Calclate P on the actual samples with assumption that voronoi cells have # equal size - (P1, lam_vol1, io_ptr1) = calcP.prob(samples, data, rho_D_M, - d_distr_samples, d_Tree) + input_sample_set.estimate_volume_mc() print "Calculating prob" - mdict['P1'] = P1 - mdict['lam_vol1'] = lam_vol1 - mdict['lem1'] = samples - mdict['io_ptr1'] = io_ptr1 - - # Export P and compare to MATLAB solution visually - sio.savemat(filename, mdict, do_compression=True) + calcP.prob(my_disc) + sample.save_discretization(my_disc, filename, "prob_solution") # Post-process and save P and emulated points ref_num = 14 diff --git a/examples/fromFile_ADCIRCMap/fromFile2D.py b/examples/fromFile_ADCIRCMap/fromFile2D.py index c3b6e628..180a2e18 100644 --- a/examples/fromFile_ADCIRCMap/fromFile2D.py +++ b/examples/fromFile_ADCIRCMap/fromFile2D.py @@ -12,8 +12,6 @@ # Set minima and maxima lam_domain = np.array([[.07, .15], [.1, .2]]) -param_min = lam_domain[:, 0] -param_max = lam_domain[:, 1] # Select only the stations I care about this will lead to better sampling station_nums = [0, 5] # 1, 6 @@ -22,7 +20,7 @@ transition_set = asam.transition_set(.5, .5**5, 1.0) # Read in Q_ref and Q to create the appropriate rho_D -mdat = sio.loadmat('Q_2D') +mdat = sio.loadmat('../matfiles/Q_2D') Q = mdat['Q'] Q = Q[:, station_nums] Q_ref = mdat['Q_true'] @@ -60,7 +58,7 @@ def rho_D(outputs): # Get samples inital_sample_type = "lhs" -(samples, data, all_step_ratios) = sampler.generalized_chains(param_min, param_max, +(my_disc, all_step_ratios) = sampler.generalized_chains(lam_domain, transition_set, kernel_rD, sample_save_file, inital_sample_type) # Read in points_ref and plot results diff --git a/examples/fromFile_ADCIRCMap/fromFile3D.py b/examples/fromFile_ADCIRCMap/fromFile3D.py index c662b246..e5ae925f 100644 --- a/examples/fromFile_ADCIRCMap/fromFile3D.py +++ b/examples/fromFile_ADCIRCMap/fromFile3D.py @@ -18,8 +18,6 @@ ymax = 1500 wall_height = -2.5 -param_min = param_domain[:, 0] -param_max = param_domain[:, 1] # Select only the stations I care about this will lead to better # sampling @@ -29,7 +27,7 @@ transition_set = asam.transition_set(.5, .5**5, 0.5) # Read in Q_ref and Q to create the appropriate rho_D -mdat = sio.loadmat('Q_3D') +mdat = sio.loadmat('../matfiles/Q_3D') Q = mdat['Q'] Q = Q[:, station_nums] Q_ref = mdat['Q_true'] @@ -67,7 +65,7 @@ def rho_D(outputs): # Get samples inital_sample_type = "lhs" -(samples, data, all_step_ratios) = sampler.generalized_chains(param_min, param_max, +(my_disc, all_step_ratios) = sampler.generalized_chains(param_domain, transition_set, kernel_rD, sample_save_file, inital_sample_type) # Read in points_ref and plot results diff --git a/examples/fromFile_ADCIRCMap/plotDomains2D.py b/examples/fromFile_ADCIRCMap/plotDomains2D.py index 5dbf13b4..e4a00e63 100644 --- a/examples/fromFile_ADCIRCMap/plotDomains2D.py +++ b/examples/fromFile_ADCIRCMap/plotDomains2D.py @@ -6,17 +6,16 @@ import numpy as np import bet.postProcess.plotDomains as pDom import scipy.io as sio +import bet.sample as sample # Set minima and maxima lam_domain = np.array([[.07, .15], [.1, .2]]) -param_min = lam_domain[:, 0] -param_max = lam_domain[:, 1] # Select only the stations I care about this will lead to better sampling station_nums = [0, 5] # 1, 6 # Read in Q_ref and Q to create the appropriate rho_D -mdat = sio.loadmat('Q_2D') +mdat = sio.loadmat('../matfiles/Q_2D.mat') Q = mdat['Q'] Q = Q[:, station_nums] Q_ref = mdat['Q_true'] @@ -39,15 +38,24 @@ def rho_D(outputs): p_ref = mdat['points_true'] p_ref = p_ref[5:7, 15] +# Create input, output, and discretization from data read from file +points = mdat['points'] +input_sample_set = sample.sample_set(points.shape[0]) +input_sample_set.set_values(points.transpose()) +input_sample_set.set_domain(lam_domain) +output_sample_set = sample.sample_set(Q.shape[1]) +output_sample_set.set_values(Q) +my_disc = sample.discretization(input_sample_set, output_sample_set) + # Show the samples in the parameter space -pDom.show_param(samples=points.transpose(), data=Q, rho_D=rho_D, p_ref=p_ref) +pDom.show_param(my_disc, rho_D=rho_D, p_ref=p_ref) # Show the corresponding samples in the data space -pDom.show_data(data=Q, rho_D=rho_D, Q_ref=Q_ref) +pDom.show_data(output_sample_set, rho_D=rho_D, Q_ref=Q_ref) # Show the data domain that corresponds with the convex hull of samples in the # parameter space -pDom.show_data_domain_2D(samples=points.transpose(), data=Q, Q_ref=Q_ref) +pDom.show_data_domain_2D(my_disc, Q_ref=Q_ref) # Show multiple data domains that correspond with the convex hull of samples in # the parameter space -pDom.show_data_domain_multi(samples=points.transpose(), data=mdat['Q'], - Q_ref=mdat['Q_true'][15], Q_nums=[1,2,5], showdim='all') +pDom.show_data_domain_multi(my_disc, Q_ref=mdat['Q_true'][15], + showdim='all') diff --git a/examples/fromFile_ADCIRCMap/plotDomains3D.py b/examples/fromFile_ADCIRCMap/plotDomains3D.py index b9bdec86..2b15b5ac 100644 --- a/examples/fromFile_ADCIRCMap/plotDomains3D.py +++ b/examples/fromFile_ADCIRCMap/plotDomains3D.py @@ -7,6 +7,7 @@ import bet.postProcess.plotDomains as pDom import scipy.io as sio from scipy.interpolate import griddata +import bet.sample as sample # Set minima and maxima param_domain = np.array([[-900, 1500], [.07, .15], [.1, .2]]) @@ -15,15 +16,13 @@ xmax = 1580 ymax = 1500 -param_min = param_domain[:, 0] -param_max = param_domain[:, 1] # Select only the stations I care about this will lead to better # sampling station_nums = [0, 4, 1] # 1, 5, 2 # Read in Q_ref and Q to create the appropriate rho_D -mdat = sio.loadmat('Q_3D') +mdat = sio.loadmat('../matfiles/Q_3D') Q = mdat['Q'] Q = Q[:, station_nums] Q_ref = mdat['Q_true'] @@ -48,12 +47,21 @@ def rho_D(outputs): p_ref = mdat['points_true'] p_ref = p_ref[:, 14] + +# Create input, output, and discretization from data read from file +input_sample_set = sample.sample_set(points.shape[0]) +input_sample_set.set_values(points.transpose()) +input_sample_set.set_domain(param_domain) +output_sample_set = sample.sample_set(Q.shape[1]) +output_sample_set.set_values(Q) +my_disc = sample.discretization(input_sample_set, output_sample_set) + # Show the samples in the parameter space -pDom.show_param(samples=points.transpose(), data=Q, rho_D=rho_D, p_ref=p_ref) +pDom.show_param(my_disc, rho_D=rho_D, p_ref=p_ref) # Show the corresponding samples in the data space -pDom.show_data(data=Q, rho_D=rho_D, Q_ref=Q_ref) +pDom.show_data(output_sample_set, rho_D=rho_D, Q_ref=Q_ref) # Show multiple data domains that correspond with the convex hull of samples in # the parameter space -pDom.show_data_domain_multi(samples=points.transpose(), data=mdat['Q'], - Q_ref=mdat['Q_true'][15], Q_nums=[1,2,5], showdim='all') +pDom.show_data_domain_multi(my_disc, Q_ref=mdat['Q_true'][15], + showdim='all') diff --git a/examples/fromFile_ADCIRCMap/sandbox_test_2D.py b/examples/fromFile_ADCIRCMap/sandbox_test_2D.py index 1c097e0a..9d65d91f 100644 --- a/examples/fromFile_ADCIRCMap/sandbox_test_2D.py +++ b/examples/fromFile_ADCIRCMap/sandbox_test_2D.py @@ -21,9 +21,6 @@ ymax = 1500 wall_height = -2.5 -param_min = lam_domain[:, 0] -param_max = lam_domain[:, 1] - # Select only the stations I care about this will lead to better sampling station_nums = [0, 5] # 1, 6 @@ -33,7 +30,7 @@ transition_set = asam.transition_set(.5, .5**5, 1.0) # Read in Q_ref and Q to create the appropriate rho_D -mdat = sio.loadmat('Q_2D') +mdat = sio.loadmat('../matfiles/Q_2D') Q = mdat['Q'] Q = Q[:, station_nums] Q_ref = mdat['Q_true'] @@ -75,24 +72,24 @@ def rho_D(outputs): # Get samples # Run with varying kernels -gen_results = sampler.run_gen(kern_list, rho_D, maximum, param_min, - param_max, transition_set, sample_save_file) -#run_reseed_results = sampler.run_gen(kern_list, rho_D, maximum, param_min, -# param_max, t_kernel, sample_save_file, reseed=3) +gen_results = sampler.run_gen(kern_list, rho_D, maximum, lam_domain, + transition_set, sample_save_file) +#run_reseed_results = sampler.run_gen(kern_list, rho_D, maximum, lam_domain, +# t_kernel, sample_save_file, reseed=3) # Run with varying transition sets bounds init_ratio = [0.1, 0.25, 0.5] min_ratio = [2e-3, 2e-5, 2e-8] max_ratio = [.5, .75, 1.0] tk_results = sampler.run_tk(init_ratio, min_ratio, max_ratio, rho_D, - maximum, param_min, param_max, kernel_rD, sample_save_file) + maximum, lam_domain, kernel_rD, sample_save_file) # Run with varying increase/decrease ratios and tolerances for a rhoD_kernel increase = [1.0, 2.0, 4.0] decrease = [0.5, 0.5e2, 0.5e3] tolerance = [1e-4, 1e-6, 1e-8] incdec_results = sampler.run_inc_dec(increase, decrease, tolerance, rho_D, - maximum, param_min, param_max, transition_set, sample_save_file) + maximum, lam_domain, transition_set, sample_save_file) # Compare the quality of several sets of samples print "Compare yield of sample sets with various kernels" diff --git a/examples/fromFile_ADCIRCMap/sandbox_test_3D.py b/examples/fromFile_ADCIRCMap/sandbox_test_3D.py index 61a7de2f..75fd0fcb 100644 --- a/examples/fromFile_ADCIRCMap/sandbox_test_3D.py +++ b/examples/fromFile_ADCIRCMap/sandbox_test_3D.py @@ -21,8 +21,6 @@ ymax = 1500 wall_height = -2.5 -param_min = param_domain[:, 0] -param_max = param_domain[:, 1] # Select only the stations I care about this will lead to better sampling station_nums = [0, 4, 1] # 1, 5, 2 @@ -31,7 +29,7 @@ transition_set = asam.transition_set(.5, .5**5, 0.5) # Read in Q_ref and Q to create the appropriate rho_D -mdat = sio.loadmat('Q_3D') +mdat = sio.loadmat('../matfiles/Q_3D') Q = mdat['Q'] Q = Q[:, station_nums] Q_ref = mdat['Q_true'] @@ -73,24 +71,24 @@ def rho_D(outputs): # Get samples # Run with varying kernels -gen_results = sampler.run_gen(heur_list, rho_D, maximum, param_min, - param_max, transition_set, sample_save_file) -#run_reseed_results = sampler.run_gen(heur_list, rho_D, maximum, param_min, -# param_max, t_kernel, sample_save_file, reseed=3) +gen_results = sampler.run_gen(heur_list, rho_D, maximum, param_domain, + transition_set, sample_save_file) +#run_reseed_results = sampler.run_gen(heur_list, rho_D, maximum, param_domain, +# t_kernel, sample_save_file, reseed=3) # Run with varying transition sets bounds init_ratio = [0.1, 0.25, 0.5] min_ratio = [2e-3, 2e-5, 2e-8] max_ratio = [.5, .75, 1.0] tk_results = sampler.run_tk(init_ratio, min_ratio, max_ratio, rho_D, - maximum, param_min, param_max, kernel_rD, sample_save_file) + maximum, param_domain, kernel_rD, sample_save_file) # Run with varying increase/decrease ratios and tolerances for a rhoD_kernel increase = [1.0, 2.0, 4.0] decrease = [0.5, 0.5e2, 0.5e3] tolerance = [1e-4, 1e-6, 1e-8] incdec_results = sampler.run_inc_dec(increase, decrease, tolerance, rho_D, - maximum, param_min, param_max, transition_set, sample_save_file) + maximum, param_domain, transition_set, sample_save_file) # Compare the quality of several sets of samples result_list = [gen_results, tk_results, incdec_results] diff --git a/examples/linearMap/linearMapUniformSampling.py b/examples/linearMap/linearMapUniformSampling.py index ee1e23eb..b578d18b 100644 --- a/examples/linearMap/linearMapUniformSampling.py +++ b/examples/linearMap/linearMapUniformSampling.py @@ -1,11 +1,27 @@ #! /usr/bin/env python -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team """ -This example generates uniform samples on a 3D grid -and evaluates a linear map to a 2d space. Probabilities -in the paramter space are calculated using emulated points. +This example solves a stochastic inverse problem for a +linear 3-to-2 map. We refer to the map as the QoI map, +or just a QoI. We refer to the range of the QoI map as +the data space. +The 3-D input space is discretized with i.i.d. uniform +random samples or a regular grid of samples. +We refer to the input space as the +parameter space, and use parameter to refer to a particular +point (e.g., a particular random sample) in this space. +A reference parameter is used to define a reference QoI datum +and a uniform probability measure is defined on a small box +centered at this datum. +The measure on the data space is discretized either randomly +or deterministically, and this discretized measure is then +inverted by BET to determine a probability measure on the +parameter space whose support contains the measurable sets +of probable parameters. +We use emulation to estimate the measures of sets defined by +the random discretizations. 1D and 2D marginals are calculated, smoothed, and plotted. """ @@ -15,157 +31,137 @@ import bet.calculateP.simpleFunP as simpleFunP import bet.calculateP.calculateP as calculateP import bet.postProcess.plotP as plotP +import bet.postProcess.plotDomains as plotD +import bet.sample as samp +import bet.sampling.basicSampling as bsam +from myModel import my_model -# parameter domain -lam_domain= np.array([[0.0, 1.0], - [0.0, 1.0], - [0.0, 1.0]]) +# Initialize 3-dimensional input parameter sample set object +input_samples = samp.sample_set(3) -# reference parameters -ref_lam = [0.5, 0.5, 0.5] +# Set parameter domain +input_samples.set_domain(np.repeat([[0.0, 1.0]], 3, axis=0)) + +# Define the sampler that will be used to create the discretization +# object, which is the fundamental object used by BET to compute +# solutions to the stochastic inverse problem +sampler = bsam.sampler(my_model) ''' Suggested changes for user: - -Try setting n0, n1, and n2 all to 10 and compare the results. - -Also, we can do uniform random sampling by setting - - random_sample = True - -If random_sample = True, consider defining - - n_samples = 1E3 - -Then also try n_samples = 1E4. What happens when n_samples = 1E2? -''' -random_sample = True -if random_sample == False: - n0 = 30 # number of samples in lam0 direction - n1 = 30 # number of samples in lam1 direction - n2 = 30 # number of samples in lam2 direction - n_samples = n0*n1*n2 -else: - n_samples = 2E3 - -#set up samples -if random_sample == False: - vec0=list(np.linspace(lam_domain[0][0], lam_domain[0][1], n0)) - vec1 = list(np.linspace(lam_domain[1][0], lam_domain[1][1], n1)) - vec2 = list(np.linspace(lam_domain[2][0], lam_domain[2][1], n2)) - vecv0, vecv1, vecv2 = np.meshgrid(vec0, vec1, vec2, indexing='ij') - samples=np.vstack((vecv0.flat[:], vecv1.flat[:], vecv2.flat[:])).transpose() +Try with and without random sampling. + +If using random sampling, try num_samples = 1E3 and 1E4. +What happens when num_samples = 1E2? +Try using 'lhs' instead of 'random' in the random_sample_set. + +If using regular sampling, try different numbers of samples +per dimension. +''' +# Generate samples on the parameter space +randomSampling = False +if randomSampling is True: + sampler.random_sample_set('random', input_samples, num_samples=1E3) else: - samples = calculateP.emulate_iid_lebesgue(lam_domain=lam_domain, - num_l_emulate = n_samples) + sampler.regular_sample_set(input_samples, num_samples_per_dim=[15, 15, 10]) -# QoI map -Q_map = np.array([[0.506, 0.463],[0.253, 0.918], [0.085, 0.496]]) +''' +Suggested changes for user: + +A standard Monte Carlo (MC) assumption is that every Voronoi cell +has the same volume. If a regular grid of samples was used, then +the standard MC assumption is true. -# reference QoI -Q_ref = np.array([0.422, 0.9385]) +See what happens if the MC assumption is not assumed to be true, and +if different numbers of points are used to estimate the volumes of +the Voronoi cells. +''' +MC_assumption = True +# Estimate volumes of Voronoi cells associated with the parameter samples +if MC_assumption is False: + input_samples.estimate_volume(n_mc_points=1E5) +else: + input_samples.estimate_volume_mc() -# calc data -data= np.dot(samples,Q_map) -np.savetxt('3to2_samples.txt.gz', samples) -np.savetxt('3to2_data.txt.gz', data) +# Create the discretization object using the input samples +my_discretization = sampler.compute_QoI_and_create_discretization(input_samples, + savefile = '3to2_discretization.txt.gz') ''' Suggested changes for user: - -Try different ways of discretizing the probability measure on D defined as a uniform -probability measure on a rectangle (since D is 2-dimensional). - -unif_unif creates a uniform measure on a hyperbox with dimensions relative to the -size of the circumscribed hyperbox of the set D using the bin_ratio. A total of M samples -are drawn within a slightly larger scaled hyperbox to discretize this measure defining -M total generalized contour events in Lambda. The reason a slightly larger scaled hyperbox -is used to draw the samples to discretize D is because otherwise every generalized contour -event will have non-zero probability which obviously defeats the purpose of "localizing" -the probability within a subset of D. - -uniform_hyperrectangle uses the same measure defined in the same way as unif_unif, but the -difference is in the discretization which is on a regular grid defined by center_pts_per_edge. -If center_pts_per_edge = 1, then the contour event corresponding to the entire support of rho_D -is approximated as a single event. This is done by carefully placing a regular 3x3 grid (since D=2 in -this case) of points in D with the center point of the grid in the center of the support of -the measure and the other points placed outside of the rectangle defining the support to define -a total of 9 contour events with 8 of them having exactly zero probability. + +Try different reference parameters. ''' -deterministic_discretize_D = False +# Define the reference parameter +param_ref = np.array([0.5, 0.5, 0.5]) +#param_ref = np.array([0.75, 0.75, 0.5]) +#param_ref = np.array([0.75, 0.75, 0.75]) +#param_ref = np.array([0.5, 0.5, 0.75]) -if deterministic_discretize_D == True: - (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.uniform_hyperrectangle(data=data, - Q_ref=Q_ref, bin_ratio=0.2, center_pts_per_edge = 1) -else: - (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.unif_unif(data=data, - Q_ref=Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E5) +# Compute the reference QoI +Q_ref = my_model(param_ref) + +# Create some plots of input and output discretizations +plotD.scatter2D_multi(input_samples, p_ref = param_ref, showdim = 'all', filename = 'linearMapParameterSamples') +plotD.show_data(my_discretization, Q_ref = Q_ref) ''' Suggested changes for user: - -If using a regular grid of sampling (if random_sample = False), we set - - lambda_emulate = samples - -Otherwise, play around with num_l_emulate. A value of 1E2 will probably -give poor results while results become fairly consistent with values -that are approximately 10x the number of samples. - -Note that you can always use - - lambda_emulate = samples - -and this simply will imply that a standard Monte Carlo assumption is -being used, which in a measure-theoretic context implies that each -Voronoi cell is assumed to have the same measure. This type of -approximation is more reasonable for large n_samples due to the slow -convergence rate of Monte Carlo (it converges like 1/sqrt(n_samples)). + +Try different ways of discretizing the probability measure on D defined as a uniform +probability measure on a rectangle (since D is 2-dimensional) centered at Q_ref whose +size is determined by scaling the circumscribing box of D. ''' -if random_sample == False: - lambda_emulate = samples +randomDataDiscretization = False +if randomDataDiscretization is False: + simpleFunP.regular_partition_uniform_distribution_rectangle_scaled( + data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25, + center_pts_per_edge = 3) else: - lambda_emulate = calculateP.emulate_iid_lebesgue(lam_domain=lam_domain, num_l_emulate = 1E5) - + simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled( + data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25, + M=50, num_d_emulate=1E5) # calculate probablities -(P, lambda_emulate, io_ptr, emulate_ptr) = calculateP.prob_emulated(samples=samples, - data=data, - rho_D_M=d_distr_prob, - d_distr_samples=d_distr_samples, - lambda_emulate=lambda_emulate, - d_Tree=d_Tree) -# calculate 2d marginal probs +calculateP.prob(my_discretization) + +######################################## +# Post-process the results +######################################## ''' Suggested changes for user: - + At this point, the only thing that should change in the plotP.* inputs should be either the nbins values or sigma (which influences the kernel density estimation with smaller values implying a density estimate that looks more like a histogram and larger values smoothing out the values more). - + There are ways to determine "optimal" smoothing parameters (e.g., see CV, GCV, and other similar methods), but we have not incorporated these into the code -as lower-dimensional marginal plots have limited value in understanding the -structure of a high dimensional non-parametric probability measure. +as lower-dimensional marginal plots generally have limited value in understanding +the structure of a high dimensional non-parametric probability measure. ''' -(bins, marginals2D) = plotP.calculate_2D_marginal_probs(P_samples = P, samples = lambda_emulate, lam_domain = lam_domain, nbins = [10, 10, 10]) +# calculate 2d marginal probs +(bins, marginals2D) = plotP.calculate_2D_marginal_probs(input_samples, + nbins = [10, 10, 10]) + # smooth 2d marginals probs (optional) -marginals2D = plotP.smooth_marginals_2D(marginals2D,bins, sigma=0.1) +marginals2D = plotP.smooth_marginals_2D(marginals2D, bins, sigma=0.2) # plot 2d marginals probs -plotP.plot_2D_marginal_probs(marginals2D, bins, lam_domain, filename = "linearMap", - plot_surface=False) +plotP.plot_2D_marginal_probs(marginals2D, bins, input_samples, filename = "linearMap", + lam_ref=param_ref, file_extension = ".eps", plot_surface=False) # calculate 1d marginal probs -(bins, marginals1D) = plotP.calculate_1D_marginal_probs(P_samples = P, samples = lambda_emulate, lam_domain = lam_domain, nbins = [10, 10, 10]) +(bins, marginals1D) = plotP.calculate_1D_marginal_probs(input_samples, + nbins = [10, 10, 10]) # smooth 1d marginal probs (optional) -marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=0.1) +marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=0.2) # plot 2d marginal probs -plotP.plot_1D_marginal_probs(marginals1D, bins, lam_domain, filename = "linearMap") - +plotP.plot_1D_marginal_probs(marginals1D, bins, input_samples, filename = "linearMap", + lam_ref=param_ref, file_extension = ".eps") diff --git a/examples/nonlinearMap/myModel.py b/examples/nonlinearMap/myModel.py new file mode 100644 index 00000000..819257aa --- /dev/null +++ b/examples/nonlinearMap/myModel.py @@ -0,0 +1,70 @@ +# Copyright (C) 2016 The BET Development Team + +# -*- coding: utf-8 -*- +import numpy as np +import math as m + +''' +Suggested changes for user: + +Try setting QoI_num = 2. + +Play around with the x1, y1, and/or, x2, y2 values to try and +"optimize" the QoI to give the highest probability region +on the reference parameter above. + +Hint: Try using QoI_num = 1 and systematically varying the +x1 and y1 values to find QoI with contour structures (as inferred +through the 2D marginal plots) that are nearly orthogonal. + +Some interesting pairs of QoI to compare are: +(x1,y1)=(0.5,0.5) and (x2,y2)=(0.25,0.25) +(x1,y1)=(0.5,0.5) and (x2,y2)=(0.15,0.15) +(x1,y1)=(0.5,0.5) and (x2,y2)=(0.25,0.15) +''' +# Choose the number of QoI +QoI_num = 1 + +# Specify the spatial points to take measurements of solution defining the QoI +if QoI_num == 1: + x1 = 0.5 + y1 = 0.5 + x = np.array([x1]) + y = np.array([y1]) +else: + x1 = 0.5 + y1 = 0.15 + x2 = 0.15 + y2 = 0.25 + x = np.array([x1, x2]) + y = np.array([y1, y2]) + +class QoI_component(object): + def __init__(self, x, y): + self.x = x + self.y = y + def eval(self, parameter_samples): + if parameter_samples.shape == (2,): + lam1 = parameter_samples[0] + lam2 = parameter_samples[1] + else: + lam1 = parameter_samples[:,0] + lam2 = parameter_samples[:,1] + z = np.sin(m.pi * self.x * lam1) * np.sin(m.pi * self.y * lam2) + return z + +# Specify the QoI maps +if QoI_num == 1: + def QoI_map(parameter_samples): + Q1 = QoI_component(x[0], y[0]) + return np.array([Q1.eval(parameter_samples)]).transpose() +else: + def QoI_map(parameter_samples): + Q1 = QoI_component(x[0], y[0]) + Q2 = QoI_component(x[1], y[1]) + return np.array([Q1.eval(parameter_samples), Q2.eval(parameter_samples)]).transpose() + +# Define a model that is the QoI map +def my_model(parameter_samples): + QoI_samples = QoI_map(parameter_samples) + return QoI_samples \ No newline at end of file diff --git a/examples/nonlinearMap/nonlinearMapUniformSampling.py b/examples/nonlinearMap/nonlinearMapUniformSampling.py index 0c3f4673..97cb3f37 100644 --- a/examples/nonlinearMap/nonlinearMapUniformSampling.py +++ b/examples/nonlinearMap/nonlinearMapUniformSampling.py @@ -1,6 +1,6 @@ #! /usr/bin/env python -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team r""" This example generates samples on a 2D grid and evaluates @@ -19,216 +19,149 @@ and :math:`\Omega=[0,1]\times[0,1]`. Probabilities -in the paramter space are calculated using emulated points. +in the parameter space are calculated using emulated points. 1D and 2D marginals are calculated, smoothed, and plotted. """ + import numpy as np -import math as m import bet.calculateP as calculateP import bet.postProcess as postProcess import bet.calculateP.simpleFunP as simpleFunP import bet.calculateP.calculateP as calculateP import bet.postProcess.plotP as plotP +import bet.postProcess.plotDomains as plotD +import bet.sample as samp +import bet.sampling.basicSampling as bsam +from myModel import my_model + + +# Initialize 3-dimensional input parameter sample set object +input_samples = samp.sample_set(2) -# parameter domain -lam_domain= np.array([[3.0, 6.0], - [1.0, 5.0]]) +# Set parameter domain +input_samples.set_domain(np.array([[3.0, 6.0], + [1.0, 5.0]])) -# reference parameters -ref_lam = [5.5, 4.5] +# Define the sampler that will be used to create the discretization +# object, which is the fundamental object used by BET to compute +# solutions to the stochastic inverse problem +sampler = bsam.sampler(my_model) ''' Suggested changes for user: - -Try setting n0 and n1 both to 10 and compare the results. - -Also, we can do uniform random sampling by setting - random_sample = True - -If random_sample = True, consider defining - - n_samples = 1E3 - -Then also try n_samples = 1E4. What happens when n_samples = 1E2? -''' -random_sample = False +Try with and without random sampling. -if random_sample == False: - n0 = 50 # number of samples in lam0 direction - n1 = 50 # number of samples in lam1 direction - n_samples = n0*n1 -else: - n_samples = 1E3 - -#set up samples -if random_sample == False: - vec0 = list(np.linspace(lam_domain[0][0], lam_domain[0][1], n0)) - vec1 = list(np.linspace(lam_domain[1][0], lam_domain[1][1], n1)) - vecv0, vecv1 = np.meshgrid(vec0, vec1, indexing='ij') - samples = np.vstack((vecv0.flat[:], vecv1.flat[:])).transpose() -else: - samples = calculateP.emulate_iid_lebesgue(lam_domain=lam_domain, - num_l_emulate = n_samples) +If using random sampling, try num_samples = 1E3 and 1E4. +What happens when num_samples = 1E2? +Try using 'lhs' instead of 'random' in the random_sample_set. -# QoI function -def QoI(x,y,lam1,lam2): - z = np.sin(m.pi*x*lam1)*np.sin(m.pi*y*lam2) - return z #np.vstack(z.flat[:]).transpose() +If using regular sampling, try different numbers of samples +per dimension. +''' +# Generate samples on the parameter space +randomSampling = False +if randomSampling is True: + sampler.random_sample_set('random', input_samples, num_samples=1E4) +else: + sampler.regular_sample_set(input_samples, num_samples_per_dim=[50, 50]) ''' Suggested changes for user: -Try setting QoI_num = 2. - -Play around with the x1, y1, and/or, x2, y2 values to try and -"optimize" the QoI to give the highest probability region -on the reference parameter above. +A standard Monte Carlo (MC) assumption is that every Voronoi cell +has the same volume. If a regular grid of samples was used, then +the standard MC assumption is true. -Hint: Try using QoI_num = 1 and systematically varying the -x1 and y1 values to find QoI with contour structures (as inferred -through the 2D marginal plots) that are nearly orthogonal. - -Some interesting pairs of QoI to compare are: -(x1,y1)=(0.5,0.5) and (x2,y2)=(0.25,0.25) -(x1,y1)=(0.5,0.5) and (x2,y2)=(0.15,0.15) -(x1,y1)=(0.5,0.5) and (x2,y2)=(0.25,0.15) +See what happens if the MC assumption is not assumed to be true, and +if different numbers of points are used to estimate the volumes of +the Voronoi cells. ''' -# Choose the QoI and define Q_ref -QoI_num = 1 - -if QoI_num == 1: - x1 = 0.5 - y1 = 0.5 - x = np.array([x1]) - y = np.array([y1]) - Q_ref = np.array([QoI(x[0],y[0],ref_lam[0],ref_lam[1])]) -else: - x1 = 0.5 - y1 = 0.15 - x2 = 0.15 - y2 = 0.25 - x = np.array([x1,x2]) - y = np.array([y1,y2]) - Q_ref = np.array([QoI(x[0],y[0],ref_lam[0],ref_lam[1]), - QoI(x[1],y[1],ref_lam[0],ref_lam[1])]) - -if QoI_num == 1: - def QoI_map(x,y,lam1,lam2): - Q1 = QoI(x[0],y[0],lam1,lam2) - z = np.array([Q1]).transpose() - return z +MC_assumption = True +# Estimate volumes of Voronoi cells associated with the parameter samples +if MC_assumption is False: + input_samples.estimate_volume(n_mc_points=1E5) else: - def QoI_map(x,y,lam1,lam2): - Q1 = QoI(x[0],y[0],lam1,lam2) - Q2 = QoI(x[1],y[1],lam1,lam2) - z = np.array([Q1,Q2]).transpose() - return z - -# calc data -data = QoI_map(x,y,samples[:,0],samples[:,1]) + input_samples.estimate_volume_mc() + +# Create the discretization object using the input samples +my_discretization = sampler.compute_QoI_and_create_discretization(input_samples, + savefile = 'NonlinearExample.txt.gz') ''' Suggested changes for user: - -Try different ways of discretizing the probability measure on D defined -as a uniform probability measure on a rectangle (if QoI_num = 2) or on -an interval (if QoI_num = 1). - -unif_unif creates a uniform measure on a hyperbox with dimensions -relative to the size of the circumscribed hyperbox of the set D using -the bin_ratio. A total of M samples are drawn within a slightly larger -scaled hyperbox to discretize this measure defining M total generalized -contour events in Lambda. The reason a slightly larger scaled hyperbox -is used to draw the samples to discretize D is because otherwise every -generalized contour event will have non-zero probability which obviously -defeats the purpose of "localizing" the probability within a subset of D. - -uniform_hyperrectangle uses the same measure defined in the same way as -unif_unif, but the difference is in the discretization which is on a -regular grid defined by center_pts_per_edge. If center_pts_per_edge = 1, -then the contour event corresponding to the entire support of rho_D is -approximated as a single event. This is done by carefully placing a -regular 3x3 grid (for the D=2 case) of points in D with the center -point of the grid in the center of the support of the measure and the -other points placed outside of the rectangle defining the support to -define a total of 9 contour events with 8 of them with zero probability. + +Try different reference parameters. ''' -deterministic_discretize_D = True +# Define the reference parameter +param_ref = np.array([5.5, 4.5]) +#param_ref = np.array([4.5, 3.0]) +#param_ref = np.array([3.5, 1.5]) -if deterministic_discretize_D == True: - (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.uniform_hyperrectangle(data=data, - Q_ref=Q_ref, bin_ratio=0.2, center_pts_per_edge = 1) -else: - (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.unif_unif(data=data, - Q_ref=Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E5) +# Compute the reference QoI +Q_ref = my_model(param_ref) +# Create some plots of input and output discretizations +plotD.scatter_2D(input_samples, p_ref = param_ref, filename = 'nonlinearMapParameterSamples.eps') +if Q_ref.size == 2: + plotD.show_data(my_discretization, Q_ref = Q_ref) -# create emulated points ''' Suggested changes for user: -If using a regular grid of sampling (if random_sample = False), we set - - lambda_emulate = samples - -Otherwise, play around with num_l_emulate. A value of 1E2 will probably -give poor results while results become fairly consistent with values -that are approximately 10x the number of samples. - -Note that you can always use - - lambda_emulate = samples - -and this simply will imply that a standard Monte Carlo assumption is -being used, which in a measure-theoretic context implies that each -Voronoi cell is assumed to have the same measure. This type of -approximation is more reasonable for large n_samples due to the slow -convergence rate of Monte Carlo (it converges like 1/sqrt(n_samples)). +Try different ways of discretizing the probability measure on D defined +as a uniform probability measure on a rectangle or interval depending +on choice of QoI_num in myModel.py. ''' -if random_sample == False: - lambda_emulate = samples +randomDataDiscretization = False +if randomDataDiscretization is False: + simpleFunP.regular_partition_uniform_distribution_rectangle_scaled( + data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25, + center_pts_per_edge = 3) else: - lambda_emulate = calculateP.emulate_iid_lebesgue(lam_domain=lam_domain, num_l_emulate = 1E5) - + simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled( + data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25, + M=50, num_d_emulate=1E5) # calculate probablities -(P, lambda_emulate, io_ptr, emulate_ptr) = calculateP.prob_emulated(samples=samples, - data=data, rho_D_M = d_distr_prob, d_distr_samples = d_distr_samples, - lambda_emulate=lambda_emulate, d_Tree=d_Tree) -# calculate 2d marginal probs +calculateP.prob(my_discretization) + +######################################## +# Post-process the results +######################################## ''' Suggested changes for user: - + At this point, the only thing that should change in the plotP.* inputs should be either the nbins values or sigma (which influences the kernel density estimation with smaller values implying a density estimate that looks more like a histogram and larger values smoothing out the values more). - + There are ways to determine "optimal" smoothing parameters (e.g., see CV, GCV, and other similar methods), but we have not incorporated these into the code -as lower-dimensional marginal plots have limited value in understanding the -structure of a high dimensional non-parametric probability measure. +as lower-dimensional marginal plots generally have limited value in understanding +the structure of a high dimensional non-parametric probability measure. ''' -(bins, marginals2D) = plotP.calculate_2D_marginal_probs(P_samples = P, samples = lambda_emulate, lam_domain = lam_domain, nbins = [20, 20]) +# calculate 2d marginal probs +(bins, marginals2D) = plotP.calculate_2D_marginal_probs(input_samples, + nbins = [20, 20]) # smooth 2d marginals probs (optional) -marginals2D = plotP.smooth_marginals_2D(marginals2D,bins, sigma=0.5) +marginals2D = plotP.smooth_marginals_2D(marginals2D, bins, sigma=0.5) # plot 2d marginals probs -plotP.plot_2D_marginal_probs(marginals2D, bins, lam_domain, filename = "nonlinearMap", - plot_surface=False) +plotP.plot_2D_marginal_probs(marginals2D, bins, input_samples, filename = "nomlinearMap", + lam_ref = param_ref, file_extension = ".eps", plot_surface=False) - # calculate 1d marginal probs -(bins, marginals1D) = plotP.calculate_1D_marginal_probs(P_samples = P, samples = lambda_emulate, lam_domain = lam_domain, nbins = [20, 20]) +(bins, marginals1D) = plotP.calculate_1D_marginal_probs(input_samples, + nbins = [20, 20]) # smooth 1d marginal probs (optional) marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=0.5) -# plot 1d marginal probs -plotP.plot_1D_marginal_probs(marginals1D, bins, lam_domain, filename = "nonlinearMap") - - +# plot 2d marginal probs +plotP.plot_1D_marginal_probs(marginals1D, bins, input_samples, filename = "nonlinearMap", + lam_ref = param_ref, file_extension = ".eps") diff --git a/examples/parallel_and_serial_sampling/parallel_model.py b/examples/parallel_and_serial_sampling/parallel_model.py new file mode 100644 index 00000000..d9839eb6 --- /dev/null +++ b/examples/parallel_and_serial_sampling/parallel_model.py @@ -0,0 +1,35 @@ +# Copyright (C) 2016 The BET Development Team + +# -*- coding: utf-8 -*- + +import numpy as np +import os, sys +import scipy.io as sio +import bet.util as util +from bet.Comm import comm + +# Parameter space is nD +# Data space is n/2 D + +def my_model(io_file_name): + # read in input from file + io_mdat = sio.loadmat(io_file_name) + input = io_mdat['input'] + # localize input + input_local = np.array_split(input, comm.size)[comm.rank] + # model is y = x[:, 0:dim/2 ] + x[:, dim/2:] + output_local = sum(np.split(input_local, 2, 1)) + # save output to file + io_mdat['output'] = util.get_global_values(output_local) + comm.barrier() + if comm.rank == 0: + sio.savemat(io_file_name, io_mdat) + +def usage(): + print "usage: [io_file]" + +if __name__ == "__main__": + if len(sys.argv) == 2: + my_model(sys.argv[1]) + else: + usage() diff --git a/examples/parallel_and_serial_sampling/parallel_parallel.py b/examples/parallel_and_serial_sampling/parallel_parallel.py new file mode 100644 index 00000000..953b602b --- /dev/null +++ b/examples/parallel_and_serial_sampling/parallel_parallel.py @@ -0,0 +1,34 @@ +# Copyright (C) 2016 The BET Development Team + +# -*- coding: utf-8 -*- + +# TODO THIS MIGHT NOT WORK +# This demonstrates how to use BET in parallel to sample a parallel external model. +# run by calling "mpirun -np nprocs python parallel_parallel.py" + +import os, subprocess +import scipy.io as sio +import bet.sampling.basicSampling as bsam +from bet.Comm import comm + +def lb_model(input_data, nprocs=2): + io_file_name = "io_file_"+str(comm.rank) + io_mdat = dict() + io_mdat['input'] = input_data + + # save the input to file + sio.savemat(io_file_name, io_mdat) + + # run the model + subprocess.call(['mpirun', '-np', nprocs, 'python', 'parallel_model.py', + io_file_name]) + + # read the output from file + io_mdat = sio.loadmat(io_file_name) + output_data = io_mdat['output'] + return output_data + +my_sampler = bsam.sampler(lb_model) +my_discretization = my_sampler.create_random_discretization(sample_type='r', + input_obj=4, savefile="parallel_parallel_example", num_samples=100, + parallel=True) diff --git a/examples/parallel_and_serial_sampling/parallel_serial.py b/examples/parallel_and_serial_sampling/parallel_serial.py new file mode 100644 index 00000000..9e2103b6 --- /dev/null +++ b/examples/parallel_and_serial_sampling/parallel_serial.py @@ -0,0 +1,32 @@ +# Copyright (C) 2016 The BET Development Team + +# -*- coding: utf-8 -*- + +# This demonstrates how to use BET in parallel to sample a serial external model. +# run by calling "mpirun -np nprocs python parallel_serial.py" + +import os, subprocess +import scipy.io as sio +import bet.sampling.basicSampling as bsam +from bet.Comm import comm + +def lb_model(input_data): + io_file_name = "io_file_"+str(comm.rank) + io_mdat = dict() + io_mdat['input'] = input_data + + # save the input to file + sio.savemat(io_file_name, io_mdat) + + # run the model + subprocess.call(['python', 'serial_model.py', io_file_name]) + + # read the output from file + io_mdat = sio.loadmat(io_file_name) + output_data = io_mdat['output'] + return output_data + +my_sampler = bsam.sampler(lb_model) +my_discretization = my_sampler.create_random_discretization(sample_type='r', + input_obj=4, savefile="parallel_serial_example", num_samples=100, + parallel=True) diff --git a/examples/parallel_and_serial_sampling/serial_model.py b/examples/parallel_and_serial_sampling/serial_model.py new file mode 100644 index 00000000..84d5e6f2 --- /dev/null +++ b/examples/parallel_and_serial_sampling/serial_model.py @@ -0,0 +1,29 @@ +# Copyright (C) 2016 The BET Development Team + +# -*- coding: utf-8 -*- + +import numpy as np +import sys +import scipy.io as sio + +# Parameter space is nD +# Data space is n/2 D + +def my_model(io_file_name): + # read in input from file + io_mdat = sio.loadmat(io_file_name) + input_samples = io_mdat['input'] + # model is y = x[:, 0:dim/2 ] + x[:, dim/2:] + output_samples = sum(np.split(input_samples, 2, 1)) + # save output to file + io_mdat['output'] = output_samples + sio.savemat(io_file_name, io_mdat) + +def usage(): + print "usage: [io_file]" + +if __name__ == "__main__": + if len(sys.argv) == 2: + my_model(sys.argv[1]) + else: + usage() diff --git a/examples/parallel_and_serial_sampling/serial_parallel.py b/examples/parallel_and_serial_sampling/serial_parallel.py new file mode 100644 index 00000000..d114060d --- /dev/null +++ b/examples/parallel_and_serial_sampling/serial_parallel.py @@ -0,0 +1,31 @@ +# Copyright (C) 2016 The BET Development Team + +# -*- coding: utf-8 -*- + +# This demonstrates how to use BET in serial to sample a parallel external model. +# run by calling "python serial_parallel.py" + +import os, subprocess +import scipy.io as sio +import bet.sampling.basicSampling as bsam + +def lb_model(input_data, nprocs=2): + io_file_name = "io_file" + io_mdat = dict() + io_mdat['input'] = input_data + + # save the input to file + sio.savemat(io_file_name, io_mdat) + + # run the model + subprocess.call(['mpirun', '-np', str(nprocs), 'python', 'parallel_model.py', + io_file_name]) + + # read the output from file + io_mdat = sio.loadmat(io_file_name) + output_data = io_mdat['output'] + return output_data + +my_sampler = bsam.sampler(lb_model) +my_discretization = my_sampler.create_random_discretization(sample_type='r', + input_obj=4, savefile="serial_parallel_example", num_samples=100) diff --git a/examples/parallel_and_serial_sampling/serial_serial.py b/examples/parallel_and_serial_sampling/serial_serial.py new file mode 100644 index 00000000..c47690d6 --- /dev/null +++ b/examples/parallel_and_serial_sampling/serial_serial.py @@ -0,0 +1,30 @@ +# Copyright (C) 2016 The BET Development Team + +# -*- coding: utf-8 -*- + +# This demonstrates how to use BET in serial to sample a serial external model. +# run by calling "python serial_serial.py" + +import os, subprocess +import scipy.io as sio +import bet.sampling.basicSampling as bsam + +def lb_model(input_data): + io_file_name = "io_file" + io_mdat = dict() + io_mdat['input'] = input_data + + # save the input to file + sio.savemat(io_file_name, io_mdat) + + # run the model + subprocess.call(['python', 'serial_model.py', io_file_name]) + + # read the output from file + io_mdat = sio.loadmat(io_file_name) + output_data = io_mdat['output'] + return output_data + +my_sampler = bsam.sampler(lb_model) +my_discretization = my_sampler.create_random_discretization(sample_type='r', + input_obj=4, savefile="serial_serial_example", num_samples=100) diff --git a/examples/sensitivity/heatplate/chooseOptQoIs_2d.py b/examples/sensitivity/heatplate/chooseOptQoIs_2d.py index 9e89f193..e36a2c6a 100644 --- a/examples/sensitivity/heatplate/chooseOptQoIs_2d.py +++ b/examples/sensitivity/heatplate/chooseOptQoIs_2d.py @@ -14,19 +14,22 @@ import bet.Comm as comm import scipy.io as sio import numpy as np +import bet.sample as sample # Import the data from the FEniCS simulation (RBF or FFD or CFD clusters) matfile = sio.loadmat('heatplate_2d_16clustersRBF_1000qoi.mat') #matfile = sio.loadmat('heatplate_2d_16clustersFFD_1000qoi.mat') #matfile = sio.loadmat('heatplate_2d_16clustersCFD_1000qoi.mat') -samples = matfile['samples'] -data = matfile['data'] -Lambda_dim = samples.shape[1] +input_set = sample.sample_set(2) +output_set = sample.sample_set(1000) + +input_set._values = matfile['samples'] +output_set._values = matfile['data'] # Calculate the gradient vectors at each of the 16 centers for each of the # QoI maps -G = grad.calculate_gradients_rbf(samples, data) +input_set._jacobians = grad.calculate_gradients_rbf(input_set, output_set) #G = grad.calculate_gradients_ffd(samples, data) #G = grad.calculate_gradients_cfd(samples, data) @@ -35,7 +38,7 @@ indexstart = 0 indexstop = 20 qoiIndices = range(indexstart, indexstop) -condnum_indices_mat = cQoIs.chooseOptQoIs(G, qoiIndices) +condnum_indices_mat = cQoIs.chooseOptQoIs(input_set, qoiIndices) qoi1 = condnum_indices_mat[0, 1] qoi2 = condnum_indices_mat[0, 2] @@ -47,5 +50,5 @@ # Choose a specific set of QoIs to check the condition number of index1 = 0 index2 = 4 -singvals = np.linalg.svd(G[:, [index1, index2], :], compute_uv=False) +singvals = np.linalg.svd(input_set._jacobians[:, [index1, index2], :], compute_uv=False) spec_condnum = np.sum(singvals[:,0]/singvals[:,-1], axis=0)/16 diff --git a/examples/sensitivity/linear/linear_condnum_binratio.py b/examples/sensitivity/linear/linear_condnum_binratio.py index 845b48ab..9503a3c2 100644 --- a/examples/sensitivity/linear/linear_condnum_binratio.py +++ b/examples/sensitivity/linear/linear_condnum_binratio.py @@ -4,14 +4,14 @@ This example generates uniform random samples in the unit hypercube and corresponding QoIs (data) generated by a linear map Q. We then calculate the gradients using an RBF scheme and use the gradient information to choose the -optimal set of 2 (3, 4, ... Lambda_dim) QoIs to use in the inverse problem. +optimal set of 2 (3, 4, ... input_dim) QoIs to use in the inverse problem. Every real world problem requires special attention regarding how we choose *optimal QoIs*. This set of examples (examples/sensitivity/linear) covers some of the more common scenarios using easy to understand linear maps. In this *condnum_binratio* example we choose *optimal QoIs* to be the set of QoIs -of size Lambda_dim that has optimal skewness properties which will yield an +of size input_dim that has optimal skewness properties which will yield an inverse solution that can be approximated well. The uncertainty in our data is relative to the range of data measured in each QoI (bin_ratio). """ @@ -23,36 +23,42 @@ import bet.calculateP.calculateP as calculateP import bet.postProcess.postTools as postTools import bet.Comm as comm +import bet.sample as sample # Let Lambda be a 5 dimensional hypercube -Lambda_dim = 5 -Data_dim = 10 +input_dim = 5 +output_dim = 10 num_samples = 1E5 num_centers = 10 -# Let the map Q be a random matrix of size (Data_dim, Lambda_dim) +# Let the map Q be a random matrix of size (output_dim, input_dim) np.random.seed(0) -Q = np.random.random([Data_dim, Lambda_dim]) +Q = np.random.random([output_dim, input_dim]) # Choose random samples in parameter space to solve the model -samples = np.random.random([num_samples, Lambda_dim]) -data = Q.dot(samples.transpose()).transpose() +input_set = sample.sample_set(input_dim) +input_set_centers = sample.sample_set(input_dim) +output_set = sample.sample_set(output_dim) + +input_set._values = np.random.random([num_samples, input_dim]) +input_set_centers._values = input_set._values[:num_centers] +output_set._values = Q.dot(input_set._values.transpose()).transpose() # Calculate the gradient vectors at some subset of the samples. Here the # *normalize* argument is set to *True* because we are using bin_ratio to # determine the uncertainty in our data. -G = grad.calculate_gradients_rbf(samples, data, centers=samples[:num_centers, :], - normalize=True) +input_set._jacobians = grad.calculate_gradients_rbf(input_set, output_set, + input_set_centers, normalize=True) # With these gradient vectors, we are now ready to choose an optimal set of # QoIs to use in the inverse problem, based on optimal skewness properites of # QoI vectors. The most robust method for this is # :meth:~bet.sensitivity.chooseQoIs.chooseOptQoIs_large which returns the -# best set of 2, 3, 4 ... until Lambda_dim. This method returns a list of +# best set of 2, 3, 4 ... until input_dim. This method returns a list of # matrices. Each matrix has 10 rows, the first column representing the # average condition number of the Jacobian of Q, and the rest of the columns # the corresponding QoI indices. -best_sets = cQoI.chooseOptQoIs_large(G, volume=False) +best_sets = cQoI.chooseOptQoIs_large(input_set, volume=False) ############################################################################### @@ -61,7 +67,7 @@ # different sets of these QoIs. We set Q_ref to correspond to the center of # the parameter space. We choose the set of QoIs to consider. -QoI_indices = [3, 4] # choose up to Lambda_dim +QoI_indices = [3, 4] # choose up to input_dim #QoI_indices = [3, 6] #QoI_indices = [0, 3] #QoI_indices = [3, 5, 6, 8, 9] @@ -70,26 +76,27 @@ #QoI_indices = [2, 3, 5, 6, 9] # Restrict the data to have just QoI_indices -data = data[:, QoI_indices] -Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(Lambda_dim)) +output_set._values = output_set._values[:, QoI_indices] +Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(input_dim)) # bin_ratio defines the uncertainty in our data bin_ratio = 0.25 # Find the simple function approximation (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.uniform_hyperrectangle(\ - data=data, Q_ref=Q_ref, bin_ratio=bin_ratio, center_pts_per_edge = 1) + data=output_set._values, Q_ref=Q_ref, bin_ratio=bin_ratio, center_pts_per_edge = 1) # Calculate probablities making the Monte Carlo assumption -(P, lam_vol, io_ptr) = calculateP.prob(samples=samples, data=data, - rho_D_M=d_distr_prob, d_distr_samples=d_distr_samples) +(P, lam_vol, io_ptr) = calculateP.prob(samples=input_set._values, + data=output_set._values, rho_D_M=d_distr_prob, + d_distr_samples=d_distr_samples) percentile = 1.0 # Sort samples by highest probability density and find how many samples lie in # the support of the inverse solution. With the Monte Carlo assumption, this # also tells us the approximate volume of this support. -(num_samples, P_high, samples_high, lam_vol_high, data_high) =\ +(num_samples, P_high, samples_high, lam_vol_high, data_high, sort) =\ postTools.sample_highest_prob(top_percentile=percentile, P_samples=P, - samples=samples, lam_vol=lam_vol,data = data,sort=True) + samples=input_set._values, lam_vol=lam_vol,data=output_set._values,sort=True) # Print the number of samples that make up the highest percentile percent # samples and ratio of the volume of the parameter domain they take up diff --git a/examples/sensitivity/linear/linear_measure_binratio.py b/examples/sensitivity/linear/linear_measure_binratio.py new file mode 100644 index 00000000..32576841 --- /dev/null +++ b/examples/sensitivity/linear/linear_measure_binratio.py @@ -0,0 +1,115 @@ +# Copyright (C) 2014-2015 The BET Development Team + +""" +This example generates uniform random samples in the unit hypercube and +corresponding QoIs (data) generated by a linear map Q. We then calculate the +gradients using an RBF scheme and use the gradient information to choose the +optimal set of 2 (3, 4, ... input_dim) QoIs to use in the inverse problem. + +Every real world problem requires special attention regarding how we choose +*optimal QoIs*. This set of examples (examples/sensitivity/linear) covers +some of the more common scenarios using easy to understand linear maps. + +In this *measure_binratio* example we choose *optimal QoIs* to be the set of +QoIs of size input_dim that produces the smallest measure of the support of the +inverse solution, assuming we define the uncertainty in our data relative to +the range of data measured in each QoI (bin_ratio). +""" + +import numpy as np +import bet.sensitivity.gradients as grad +import bet.sensitivity.chooseQoIs as cQoI +import bet.calculateP.simpleFunP as simpleFunP +import bet.calculateP.calculateP as calculateP +import bet.postProcess.postTools as postTools +import bet.Comm as comm +import bet.sample as sample + +# Let Lambda be a 5 dimensional hypercube +input_dim = 5 +output_dim = 10 +num_samples = 1E5 +num_centers = 10 + +# Let the map Q be a random matrix of size (output_dim, input_dim) +np.random.seed(0) +Q = np.random.random([output_dim, input_dim]) + +# Choose random samples in parameter space to solve the model +input_set = sample.sample_set(input_dim) +input_set_centers = sample.sample_set(input_dim) +output_set = sample.sample_set(output_dim) + +input_set._values = np.random.random([num_samples, input_dim]) +input_set_centers._values = input_set._values[:num_centers] +output_set._values = Q.dot(input_set._values.transpose()).transpose() + +# Calculate the gradient vectors at some subset of the samples. Here the +# *normalize* argument is set to *True* because we are using *bin_ratio* to +# determine the uncertainty in our data. +input_set._jacobians = grad.calculate_gradients_rbf(input_set, output_set, + input_set_centers, normalize=True) + +# With these gradient vectors, we are now ready to choose an optimal set of +# QoIs to use in the inverse problem, based on minimizing the mesure of the +# inverse solution. The most robust method for this is +# :meth:~bet.sensitivity.chooseQoIs.chooseOptQoIs_large which returns the +# best set of 2, 3, 4 ... until input_dim. This method returns a list of +# matrices. Each matrix has 10 rows, the first column representing the +# expected inverse measure ratio, and the rest of the columns the corresponding +# QoI indices. +best_sets = cQoI.chooseOptQoIs_large(input_set, measure=True) + +############################################################################### + +# At this point we have determined the optimal set of QoIs to use in the inverse +# problem. Now we compare the support of the inverse solution using +# different sets of these QoIs. We set Q_ref to correspond to the center of +# the parameter space. We choose the set of QoIs to consider. + +QoI_indices = [3, 6] # choose up to input_dim +#QoI_indices = [3, 4] +#QoI_indices = [8, 9] +#QoI_indices = [3, 5, 6, 8, 9] +#QoI_indices = [3, 4, 5, 8, 9] +#QoI_indices = [2, 3, 6, 8, 9] +#QoI_indices = [3, 5, 6, 7, 8] +#QoI_indices = [0, 1, 2, 3, 4] + +''' +In this linear case we expect our ordering of sets of QoIs to be very good. But +we see in this example that the set [3, 4, 5, 8, 9] (set 1) has a smaller +expected measure ratio than the set [2, 3, 6, 8, 9] (set 2), however the inverse +solution yields larger measure of support for set 1 than set 2. This is likely +due to the fact that we restrict ourselves to the parameter space [0, 1]^5, and +the actual support of the inverse solution may extend out of this space. The +expected measure ratio is computed assuming an unbounded parameter space. +''' + +# Restrict the data to have just QoI_indices +output_set._values = output_set._values[:, QoI_indices] +Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(input_dim)) + +# bin_ratio defines the uncertainty in our data +bin_ratio = 0.25 + +# Find the simple function approximation +(d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.uniform_hyperrectangle(\ + data=output_set._values, Q_ref=Q_ref, bin_ratio=bin_ratio, center_pts_per_edge = 1) + +# Calculate probablities making the Monte Carlo assumption +(P, lam_vol, io_ptr) = calculateP.prob(samples=input_set._values, + data=output_set._values,rho_D_M=d_distr_prob, d_distr_samples=d_distr_samples) + +percentile = 1.0 +# Sort samples by highest probability density and find how many samples lie in +# the support of the inverse solution. With the Monte Carlo assumption, this +# also tells us the approximate measure of this support. +(num_samples, P_high, samples_high, lam_vol_high, data_high, sort) =\ + postTools.sample_highest_prob(top_percentile=percentile, P_samples=P, + samples=input_set._values, lam_vol=lam_vol,data=output_set._values,sort=True) + +# Print the number of samples that make up the highest percentile percent +# samples and ratio of the measure of the parameter domain they take up +if comm.rank == 0: + print (num_samples, np.sum(lam_vol_high)) diff --git a/examples/sensitivity/linear/linear_measure_binsize.py b/examples/sensitivity/linear/linear_measure_binsize.py new file mode 100644 index 00000000..4ed25150 --- /dev/null +++ b/examples/sensitivity/linear/linear_measure_binsize.py @@ -0,0 +1,114 @@ +# Copyright (C) 2014-2015 The BET Development Team + +""" +This example generates uniform random samples in the unit hypercube and +corresponding QoIs (data) generated by a linear map Q. We then calculate the +gradients using an RBF scheme and use the gradient information to choose the +optimal set of 2 (3, 4, ... input_dim) QoIs to use in the inverse problem. + +Every real world problem requires special attention regarding how we choose +*optimal QoIs*. This set of examples (examples/sensitivity/linear) covers +some of the more common scenarios using easy to understand linear maps. + +In this *measure_binsize_large* example we choose *optimal QoIs* to be the set of +QoIs of size input_dim that produces the smallest measure of the inverse +solution, assuming we define the uncertainty in our data to be fixed, i.e., +independent of the range of data maesured for each QoI (bin_size). +""" + +import numpy as np +import bet.sensitivity.gradients as grad +import bet.sensitivity.chooseQoIs as cQoI +import bet.calculateP.simpleFunP as simpleFunP +import bet.calculateP.calculateP as calculateP +import bet.postProcess.postTools as postTools +import bet.Comm as comm +import bet.sample as sample + +# Let Lambda be a 5 dimensional hypercube +input_dim = 10 +output_dim = 100 +num_samples = 1E5 +num_centers = 10 + +# Let the map Q be a random matrix of size (output_dim, input_dim) +np.random.seed(0) +Q = np.random.random([output_dim, input_dim]) + +# Choose random samples in parameter space to solve the model +input_set = sample.sample_set(input_dim) +input_set_centers = sample.sample_set(input_dim) +output_set = sample.sample_set(output_dim) + +input_set._values = np.random.random([num_samples, input_dim]) +input_set_centers._values = input_set._values[:num_centers] +output_set._values = Q.dot(input_set._values.transpose()).transpose() + +# Calculate the gradient vectors at some subset of the samples. Here the +# *normalize* argument is set to *False* because we are using bin_size to +# determine the uncertainty in our data. +input_set._jacobians = grad.calculate_gradients_rbf(input_set, output_set, + input_set_centers, normalize=False) + +# With these gradient vectors, we are now ready to choose an optimal set of +# QoIs to use in the inverse problem, based on minimizing the mesure of the +# inverse solution. The most robust method for this is +# :meth:~bet.sensitivity.chooseQoIs.chooseOptQoIs_large which returns the +# best set of 2, 3, 4 ... until input_dim. This method returns a list of +# matrices. Each matrix has 10 rows, the first column representing the +# expected inverse measure ratio, and the rest of the columns the corresponding +# QoI indices. +best_sets = cQoI.chooseOptQoIs_large(input_set, max_qois_return=5, + num_optsets_return=2, inner_prod_tol=0.9, cond_tol=1E2, measure=True) + +''' +We see here the expected measure ratios are small. This number represents the +expected measure of the inverse image of a unit hypercube in the data space. +With the bin_size definition of the uncertainty in the data, here we expect to +see inverse solutions that have a smaller measure (expected measure ratio < 1) +than the original measure of the hypercube in the data space. + +This interpretation of the expected measure ratios is only valid for inverting +from a data space that has the same dimensions as the paramter space. When +inverting into a higher dimensional space, this expected measure ratio is the +expected measure of the cross section of the inverse solution. +''' +############################################################################### + +# At this point we have determined the optimal set of QoIs to use in the inverse +# problem. Now we compare the measure of the inverse solution using +# different sets of these QoIs. We set Q_ref to correspond to the center of +# the parameter space. We choose the set of QoIs to consider. + +QoI_indices = [0, 7] # choose up to input_dim +#QoI_indices = [0, 1] +#QoI_indices = [0, 7, 34, 39, 90] +#QoI_indices = [0, 1, 2, 3, 4] + +# Restrict the data to have just QoI_indices +output_set._values = output_set._values[:, QoI_indices] +Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(input_dim)) +# bin_size defines the uncertainty in our data +bin_size = 0.25 + +# Find the simple function approximation +(d_distr_prob, d_distr_samples, d_Tree) =\ + simpleFunP.uniform_hyperrectangle_binsize(data=output_set._values, Q_ref=Q_ref, + bin_size=bin_size, center_pts_per_edge = 1) + +# Calculate probablities making the Monte Carlo assumption +(P, lam_vol, io_ptr) = calculateP.prob(samples=input_set._values, + data=output_set._values, rho_D_M=d_distr_prob, d_distr_samples=d_distr_samples) + +percentile = 1.0 +# Sort samples by highest probability density and find how many samples lie in +# the measure of the inverse solution. With the Monte Carlo assumption, this +# also tells us the approximate measure of this measure. +(num_samples, P_high, samples_high, lam_vol_high, data_high, sort) =\ + postTools.sample_highest_prob(top_percentile=percentile, P_samples=P, + samples=input_set._values, lam_vol=lam_vol,data=output_set._values,sort=True) + +# Print the number of samples that make up the highest percentile percent +# samples and ratio of the measure of the parameter domain they take up +if comm.rank == 0: + print (num_samples, np.sum(lam_vol_high)) diff --git a/examples/sensitivity/linear/linear_skewness_binratio.py b/examples/sensitivity/linear/linear_skewness_binratio.py new file mode 100644 index 00000000..c031572d --- /dev/null +++ b/examples/sensitivity/linear/linear_skewness_binratio.py @@ -0,0 +1,104 @@ +# Copyright (C) 2014-2015 The BET Development Team + +""" +This example generates uniform random samples in the unit hypercube and +corresponding QoIs (data) generated by a linear map Q. We then calculate the +gradients using an RBF scheme and use the gradient information to choose the +optimal set of 2 (3, 4, ... input_dim) QoIs to use in the inverse problem. + +Every real world problem requires special attention regarding how we choose +*optimal QoIs*. This set of examples (examples/sensitivity/linear) covers +some of the more common scenarios using easy to understand linear maps. + +In this *skweness_binratio* example we choose *optimal QoIs* to be the set of QoIs +of size input_dim that has optimal skewness properties which will yield an +inverse solution that can be approximated well. The uncertainty in our data is +relative to the range of data measured in each QoI (bin_ratio). +""" + +import numpy as np +import bet.sensitivity.gradients as grad +import bet.sensitivity.chooseQoIs as cQoI +import bet.calculateP.simpleFunP as simpleFunP +import bet.calculateP.calculateP as calculateP +import bet.postProcess.postTools as postTools +import bet.Comm as comm +import bet.sample as sample + +# Let Lambda be a 5 dimensional hypercube +input_dim = 5 +output_dim = 10 +num_samples = 1E5 +num_centers = 10 + +# Let the map Q be a random matrix of size (output_dim, input_dim) +np.random.seed(0) +Q = np.random.random([output_dim, input_dim]) + +# Choose random samples in parameter space to solve the model +input_set = sample.sample_set(input_dim) +input_set_centers = sample.sample_set(input_dim) +output_set = sample.sample_set(output_dim) + +input_set._values = np.random.random([num_samples, input_dim]) +input_set_centers._values = input_set._values[:num_centers] +output_set._values = Q.dot(input_set._values.transpose()).transpose() + +# Calculate the gradient vectors at some subset of the samples. Here the +# *normalize* argument is set to *True* because we are using bin_ratio to +# determine the uncertainty in our data. +input_set._jacobians = grad.calculate_gradients_rbf(input_set, output_set, + input_set_centers, normalize=True) + +# With these gradient vectors, we are now ready to choose an optimal set of +# QoIs to use in the inverse problem, based on optimal skewness properites of +# QoI vectors. The most robust method for this is +# :meth:~bet.sensitivity.chooseQoIs.chooseOptQoIs_large which returns the +# best set of 2, 3, 4 ... until input_dim. This method returns a list of +# matrices. Each matrix has 10 rows, the first column representing the +# average skewness of the Jacobian of Q, and the rest of the columns +# the corresponding QoI indices. +best_sets = cQoI.chooseOptQoIs_large(input_set, measure=False) + +############################################################################### + +# At this point we have determined the optimal set of QoIs to use in the inverse +# problem. Now we compare the support of the inverse solution using +# different sets of these QoIs. We set Q_ref to correspond to the center of +# the parameter space. We choose the set of QoIs to consider. + +QoI_indices = [3, 4] # choose up to input_dim +#QoI_indices = [3, 6] +#QoI_indices = [0, 3] +#QoI_indices = [3, 5, 6, 8, 9] +#QoI_indices = [0, 3, 5, 8, 9] +#QoI_indices = [3, 4, 5, 8, 9] +#QoI_indices = [2, 3, 5, 6, 9] + +# Restrict the data to have just QoI_indices +output_set._values = output_set._values[:, QoI_indices] +Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(input_dim)) +# bin_ratio defines the uncertainty in our data +bin_ratio = 0.25 + +# Find the simple function approximation +(d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.uniform_hyperrectangle(\ + data=output_set._values, Q_ref=Q_ref, bin_ratio=bin_ratio, center_pts_per_edge = 1) + +# Calculate probablities making the Monte Carlo assumption +(P, lam_vol, io_ptr) = calculateP.prob(samples=input_set._values, + data=output_set._values, rho_D_M=d_distr_prob, + d_distr_samples=d_distr_samples) + +percentile = 1.0 +# Sort samples by highest probability density and find how many samples lie in +# the support of the inverse solution. With the Monte Carlo assumption, this +# also tells us the approximate measure of this support. +(num_samples, P_high, samples_high, lam_vol_high, data_high, sort) =\ + postTools.sample_highest_prob(top_percentile=percentile, P_samples=P, + samples=input_set._values, lam_vol=lam_vol,data=output_set._values,sort=True) + +# Print the number of samples that make up the highest percentile percent +# samples and ratio of the measure of the parameter domain they take up +if comm.rank == 0: + print (num_samples, np.sum(lam_vol_high)) diff --git a/examples/sensitivity/linear/linear_volume_binratio.py b/examples/sensitivity/linear/linear_volume_binratio.py index 0b785c46..7dfdcf11 100644 --- a/examples/sensitivity/linear/linear_volume_binratio.py +++ b/examples/sensitivity/linear/linear_volume_binratio.py @@ -4,14 +4,14 @@ This example generates uniform random samples in the unit hypercube and corresponding QoIs (data) generated by a linear map Q. We then calculate the gradients using an RBF scheme and use the gradient information to choose the -optimal set of 2 (3, 4, ... Lambda_dim) QoIs to use in the inverse problem. +optimal set of 2 (3, 4, ... input_dim) QoIs to use in the inverse problem. Every real world problem requires special attention regarding how we choose *optimal QoIs*. This set of examples (examples/sensitivity/linear) covers some of the more common scenarios using easy to understand linear maps. In this *volume_binratio* example we choose *optimal QoIs* to be the set of QoIs -of size Lambda_dim that produces the smallest support of the inverse solution, +of size input_dim that produces the smallest support of the inverse solution, assuming we define the uncertainty in our data relative to the range of data measured in each QoI (bin_ratio). """ @@ -23,36 +23,42 @@ import bet.calculateP.calculateP as calculateP import bet.postProcess.postTools as postTools import bet.Comm as comm +import bet.sample as sample # Let Lambda be a 5 dimensional hypercube -Lambda_dim = 5 -Data_dim = 10 +input_dim = 5 +output_dim = 10 num_samples = 1E5 num_centers = 10 -# Let the map Q be a random matrix of size (Data_dim, Lambda_dim) +# Let the map Q be a random matrix of size (output_dim, input_dim) np.random.seed(0) -Q = np.random.random([Data_dim, Lambda_dim]) +Q = np.random.random([output_dim, input_dim]) # Choose random samples in parameter space to solve the model -samples = np.random.random([num_samples, Lambda_dim]) -data = Q.dot(samples.transpose()).transpose() +input_set = sample.sample_set(input_dim) +input_set_centers = sample.sample_set(input_dim) +output_set = sample.sample_set(output_dim) + +input_set._values = np.random.random([num_samples, input_dim]) +input_set_centers._values = input_set._values[:num_centers] +output_set._values = Q.dot(input_set._values.transpose()).transpose() # Calculate the gradient vectors at some subset of the samples. Here the # *normalize* argument is set to *True* because we are using *bin_ratio* to # determine the uncertainty in our data. -G = grad.calculate_gradients_rbf(samples, data, centers=samples[:num_centers, :], - normalize=True) +input_set._jacobians = grad.calculate_gradients_rbf(input_set, output_set, + input_set_centers, normalize=True) # With these gradient vectors, we are now ready to choose an optimal set of # QoIs to use in the inverse problem, based on minimizing the support of the # inverse solution (volume). The most robust method for this is # :meth:~bet.sensitivity.chooseQoIs.chooseOptQoIs_large which returns the -# best set of 2, 3, 4 ... until Lambda_dim. This method returns a list of +# best set of 2, 3, 4 ... until input_dim. This method returns a list of # matrices. Each matrix has 10 rows, the first column representing the # expected inverse volume ratio, and the rest of the columns the corresponding # QoI indices. -best_sets = cQoI.chooseOptQoIs_large(G, volume=True) +best_sets = cQoI.chooseOptQoIs_large(input_set, volume=True) ############################################################################### @@ -61,7 +67,7 @@ # different sets of these QoIs. We set Q_ref to correspond to the center of # the parameter space. We choose the set of QoIs to consider. -QoI_indices = [3, 6] # choose up to Lambda_dim +QoI_indices = [3, 6] # choose up to input_dim #QoI_indices = [3, 4] #QoI_indices = [8, 9] #QoI_indices = [3, 5, 6, 8, 9] @@ -81,27 +87,27 @@ ''' # Restrict the data to have just QoI_indices -data = data[:, QoI_indices] -Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(Lambda_dim)) +output_set._values = output_set._values[:, QoI_indices] +Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(input_dim)) # bin_ratio defines the uncertainty in our data bin_ratio = 0.25 # Find the simple function approximation (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.uniform_hyperrectangle(\ - data=data, Q_ref=Q_ref, bin_ratio=bin_ratio, center_pts_per_edge = 1) + data=output_set._values, Q_ref=Q_ref, bin_ratio=bin_ratio, center_pts_per_edge = 1) # Calculate probablities making the Monte Carlo assumption -(P, lam_vol, io_ptr) = calculateP.prob(samples=samples, data=data, - rho_D_M=d_distr_prob, d_distr_samples=d_distr_samples) +(P, lam_vol, io_ptr) = calculateP.prob(samples=input_set._values, + data=output_set._values,rho_D_M=d_distr_prob, d_distr_samples=d_distr_samples) percentile = 1.0 # Sort samples by highest probability density and find how many samples lie in # the support of the inverse solution. With the Monte Carlo assumption, this # also tells us the approximate volume of this support. -(num_samples, P_high, samples_high, lam_vol_high, data_high) =\ +(num_samples, P_high, samples_high, lam_vol_high, data_high, sort) =\ postTools.sample_highest_prob(top_percentile=percentile, P_samples=P, - samples=samples, lam_vol=lam_vol,data = data,sort=True) + samples=input_set._values, lam_vol=lam_vol,data=output_set._values,sort=True) # Print the number of samples that make up the highest percentile percent # samples and ratio of the volume of the parameter domain they take up diff --git a/examples/sensitivity/linear/linear_volume_binsize_large.py b/examples/sensitivity/linear/linear_volume_binsize_large.py index 3d8f18eb..b3090b0d 100644 --- a/examples/sensitivity/linear/linear_volume_binsize_large.py +++ b/examples/sensitivity/linear/linear_volume_binsize_large.py @@ -4,14 +4,14 @@ This example generates uniform random samples in the unit hypercube and corresponding QoIs (data) generated by a linear map Q. We then calculate the gradients using an RBF scheme and use the gradient information to choose the -optimal set of 2 (3, 4, ... Lambda_dim) QoIs to use in the inverse problem. +optimal set of 2 (3, 4, ... input_dim) QoIs to use in the inverse problem. Every real world problem requires special attention regarding how we choose *optimal QoIs*. This set of examples (examples/sensitivity/linear) covers some of the more common scenarios using easy to understand linear maps. In this *volume_binsize_large* example we choose *optimal QoIs* to be the set of -QoIs of size Lambda_dim that produces the smallest support of the inverse +QoIs of size input_dim that produces the smallest support of the inverse solution, assuming we define the uncertainty in our data to be fixed, i.e., independent of the range of data maesured for each QoI (bin_size). """ @@ -23,36 +23,42 @@ import bet.calculateP.calculateP as calculateP import bet.postProcess.postTools as postTools import bet.Comm as comm +import bet.sample as sample # Let Lambda be a 5 dimensional hypercube -Lambda_dim = 10 -Data_dim = 100 +input_dim = 10 +output_dim = 100 num_samples = 1E5 num_centers = 10 -# Let the map Q be a random matrix of size (Data_dim, Lambda_dim) +# Let the map Q be a random matrix of size (output_dim, input_dim) np.random.seed(0) -Q = np.random.random([Data_dim, Lambda_dim]) +Q = np.random.random([output_dim, input_dim]) # Choose random samples in parameter space to solve the model -samples = np.random.random([num_samples, Lambda_dim]) -data = Q.dot(samples.transpose()).transpose() +input_set = sample.sample_set(input_dim) +input_set_centers = sample.sample_set(input_dim) +output_set = sample.sample_set(output_dim) + +input_set._values = np.random.random([num_samples, input_dim]) +input_set_centers._values = input_set._values[:num_centers] +output_set._values = Q.dot(input_set._values.transpose()).transpose() # Calculate the gradient vectors at some subset of the samples. Here the # *normalize* argument is set to *False* because we are using bin_size to # determine the uncertainty in our data. -G = grad.calculate_gradients_rbf(samples, data, centers=samples[:num_centers, :], - normalize=False) +input_set._jacobians = grad.calculate_gradients_rbf(input_set, output_set, + input_set_centers, normalize=False) # With these gradient vectors, we are now ready to choose an optimal set of # QoIs to use in the inverse problem, based on minimizing the support of the # inverse solution (volume). The most robust method for this is # :meth:~bet.sensitivity.chooseQoIs.chooseOptQoIs_large which returns the -# best set of 2, 3, 4 ... until Lambda_dim. This method returns a list of +# best set of 2, 3, 4 ... until input_dim. This method returns a list of # matrices. Each matrix has 10 rows, the first column representing the # expected inverse volume ratio, and the rest of the columns the corresponding # QoI indices. -best_sets = cQoI.chooseOptQoIs_large(G, max_qois_return=5, +best_sets = cQoI.chooseOptQoIs_large(input_set, max_qois_return=5, num_optsets_return=2, inner_prod_tol=0.9, cond_tol=1E2, volume=True) ''' @@ -74,33 +80,33 @@ # different sets of these QoIs. We set Q_ref to correspond to the center of # the parameter space. We choose the set of QoIs to consider. -QoI_indices = [0, 7] # choose up to Lambda_dim +QoI_indices = [0, 7] # choose up to input_dim #QoI_indices = [0, 1] #QoI_indices = [0, 7, 34, 39, 90] #QoI_indices = [0, 1, 2, 3, 4] # Restrict the data to have just QoI_indices -data = data[:, QoI_indices] -Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(Lambda_dim)) +output_set._values = output_set._values[:, QoI_indices] +Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(input_dim)) # bin_size defines the uncertainty in our data bin_size = 0.25 # Find the simple function approximation (d_distr_prob, d_distr_samples, d_Tree) =\ - simpleFunP.uniform_hyperrectangle_binsize(data=data, Q_ref=Q_ref, + simpleFunP.uniform_hyperrectangle_binsize(data=output_set._values, Q_ref=Q_ref, bin_size=bin_size, center_pts_per_edge = 1) # Calculate probablities making the Monte Carlo assumption -(P, lam_vol, io_ptr) = calculateP.prob(samples=samples, data=data, - rho_D_M=d_distr_prob, d_distr_samples=d_distr_samples) +(P, lam_vol, io_ptr) = calculateP.prob(samples=input_set._values, + data=output_set._values, rho_D_M=d_distr_prob, d_distr_samples=d_distr_samples) percentile = 1.0 # Sort samples by highest probability density and find how many samples lie in # the support of the inverse solution. With the Monte Carlo assumption, this # also tells us the approximate volume of this support. -(num_samples, P_high, samples_high, lam_vol_high, data_high) =\ +(num_samples, P_high, samples_high, lam_vol_high, data_high, sort) =\ postTools.sample_highest_prob(top_percentile=percentile, P_samples=P, - samples=samples, lam_vol=lam_vol,data = data,sort=True) + samples=input_set._values, lam_vol=lam_vol,data=output_set._values,sort=True) # Print the number of samples that make up the highest percentile percent # samples and ratio of the volume of the parameter domain they take up diff --git a/examples/validationExample/linearMap.py b/examples/validationExample/linearMap.py index a6d41cdc..389b9c92 100644 --- a/examples/validationExample/linearMap.py +++ b/examples/validationExample/linearMap.py @@ -1,6 +1,6 @@ #! /usr/bin/env python -# Copyright (C) 2014-2015 Lindley Graham and Steven Mattis +# Copyright (C) 2014-2016 The BET Development Team """ This 2D linear example verifies that geometrically distinct QoI can @@ -8,170 +8,171 @@ used to define the output probability measure. """ +from bet.Comm import comm, MPI import numpy as np import bet.calculateP as calculateP import bet.postProcess as postProcess import bet.calculateP.simpleFunP as simpleFunP import bet.calculateP.calculateP as calculateP import bet.postProcess.plotP as plotP +import bet.postProcess.plotDomains as plotD +import bet.sample as samp +import bet.sampling.basicSampling as bsam import scipy.spatial as spatial +from myModel import my_model -# parameter domain -lam_domain= np.array([[0.0, 1.0], - [0.0, 1.0]]) +# Initialize 3-dimensional input parameter sample set object +input_samples = samp.sample_set(2) + +# Set parameter domain +input_samples.set_domain(np.repeat([[0.0, 1.0]], 2, axis=0)) + +# Define the sampler that will be used to create the discretization +# object, which is the fundamental object used by BET to compute +# solutions to the stochastic inverse problem +sampler = bsam.sampler(my_model) ''' Suggested changes for user: - -Try setting n0 and n1 all to 10 and compare the results. - -Also, we can do uniform random sampling by setting - - random_sample = True - -If random_sample = True, consider defining - - n_samples = 2.5E3 - -Then also try n_samples = 1E4. What happens when n_samples = 1E2? -''' -random_sample = False -if random_sample == False: - n0 = 50 # number of samples in lam0 direction - n1 = 50 # number of samples in lam1 direction - n_samples = n0*n1 -else: - n_samples = 2.5E3 +Try with and without random sampling. +If using random sampling, try num_samples = 1E3 and 1E4. +What happens when num_samples = 1E2? +Try using 'lhs' instead of 'random' in the random_sample_set. -#set up samples -if random_sample == False: - vec0=list(np.linspace(lam_domain[0][0], lam_domain[0][1], n0)) - vec1 = list(np.linspace(lam_domain[1][0], lam_domain[1][1], n1)) - vecv0, vecv1 = np.meshgrid(vec0, vec1, indexing='ij') - samples=np.vstack((vecv0.flat[:], vecv1.flat[:])).transpose() +If using regular sampling, try different numbers of samples +per dimension. +''' +# Generate samples on the parameter space +randomSampling = False +if randomSampling is True: + sampler.random_sample_set('random', input_samples, num_samples=1E3) else: - samples = calculateP.emulate_iid_lebesgue(lam_domain=lam_domain, - num_l_emulate = n_samples) + sampler.regular_sample_set(input_samples, num_samples_per_dim=[30, 30]) -# QoI map -Q_map = np.array([[0.506, 0.463],[0.253, 0.918]]) +''' +Suggested changes for user: +A standard Monte Carlo (MC) assumption is that every Voronoi cell +has the same volume. If a regular grid of samples was used, then +the standard MC assumption is true. -# calc data -data = np.dot(samples,Q_map) +See what happens if the MC assumption is not assumed to be true, and +if different numbers of points are used to estimate the volumes of +the Voronoi cells. +''' +MC_assumption = True +# Estimate volumes of Voronoi cells associated with the parameter samples +if MC_assumption is False: + input_samples.estimate_volume(n_mc_points=1E5) +else: + input_samples.estimate_volume_mc() + +# Create the discretization object using the input samples +my_discretization = sampler.compute_QoI_and_create_discretization(input_samples, + savefile = 'Validation_discretization.txt.gz') ''' Compute the output distribution simple function approximation by propagating a different set of samples to implicitly define a Voronoi -discretization of the data space, and then propagating i.i.d. uniform -samples to bin into these cells. +discretization of the data space, corresponding to an implicitly defined +set of contour events defining a discretization of the input parameter +space. The probabilities of the Voronoi cells in the data space (and +thus the probabilities of the corresponding contour events in the +input parameter space) are determined by Monte Carlo sampling using +a set of i.i.d. uniform samples to bin into these cells. Suggested changes for user: -See the effect of using different values for d_distr_samples_num. -Choosing +See the effect of using different values for num_samples_discretize_D. +Choosing + + num_samples_discretize_D = 1 - d_distr_samples_num = 1 - produces exactly the right answer and is equivalent to assigning a -uniform probability to each data sample above (why?). +uniform probability to each data sample above (why?). -Try setting this to 2, 5, 10, 50, and 100. Can you explain what you +Try setting this to 2, 5, 10, 50, and 100. Can you explain what you are seeing? To see an exaggerated effect, try using random sampling -above with n_samples set to 1E2. +above with n_samples set to 1E2. ''' -d_distr_samples_num = 1 - -samples_discretize = calculateP.emulate_iid_lebesgue(lam_domain=lam_domain, - num_l_emulate = d_distr_samples_num) +num_samples_discretize_D = 1 +num_iid_samples = 1E5 -d_distr_samples = np.dot(samples_discretize, Q_map) +Partition_set = samp.sample_set(2) +Monte_Carlo_set = samp.sample_set(2) -d_Tree = spatial.KDTree(d_distr_samples) +Partition_set.set_domain(np.repeat([[0.0, 1.0]], 2, axis=0)) +Monte_Carlo_set.set_domain(np.repeat([[0.0, 1.0]], 2, axis=0)) -samples_distr_prob_num = d_distr_samples_num*1E3 +Partition_discretization = sampler.create_random_discretization('random', + Partition_set, + num_samples=num_samples_discretize_D) -samples_distr_prob = calculateP.emulate_iid_lebesgue(lam_domain=lam_domain, - num_l_emulate = samples_distr_prob_num) +Monte_Carlo_discretization = sampler.create_random_discretization('random', + Monte_Carlo_set, + num_samples=num_iid_samples) -data_prob = np.dot(samples_distr_prob, Q_map) +# Compute the simple function approximation to the distribution on the data space +simpleFunP.user_partition_user_distribution(my_discretization, + Partition_discretization, + Monte_Carlo_discretization) -# Determine which data samples go to which d_distr_samples_num bins using the QoI -(_, oo_ptr) = d_Tree.query(data_prob) +# Calculate probabilities +calculateP.prob(my_discretization) -# Calculate Probabilities of the d_distr_samples defined Voronoi cells -d_distr_prob = np.zeros((d_distr_samples_num,)) -for i in range(d_distr_samples_num): - Itemp = np.equal(oo_ptr, i) - Itemp_sum = float(np.sum(Itemp)) - d_distr_prob[i] = Itemp_sum / samples_distr_prob_num +# Show some plots of the different sample sets +plotD.scatter_2D(my_discretization._input_sample_set, filename = 'Parameter_Samples.eps') +plotD.scatter_2D(my_discretization._output_sample_set, filename = 'QoI_Samples.eps') +plotD.scatter_2D(my_discretization._output_probability_set, filename = 'Data_Space_Discretization.eps') +######################################## +# Post-process the results +######################################## ''' Suggested changes for user: - -If using a regular grid of sampling (if random_sample = False), we set - - lambda_emulate = samples - -Otherwise, play around with num_l_emulate. A value of 1E2 will probably -give poor results while results become fairly consistent with values -that are approximately 10x the number of samples. - -Note that you can always use - - lambda_emulate = samples - -and this simply will imply that a standard Monte Carlo assumption is -being used, which in a measure-theoretic context implies that each -Voronoi cell is assumed to have the same measure. This type of -approximation is more reasonable for large n_samples due to the slow -convergence rate of Monte Carlo (it converges like 1/sqrt(n_samples)). -''' -if random_sample == False: - lambda_emulate = samples -else: - lambda_emulate = calculateP.emulate_iid_lebesgue(lam_domain=lam_domain, num_l_emulate = 1E5) - -# calculate probablities -(P, lambda_emulate, io_ptr, emulate_ptr) = calculateP.prob_emulated(samples=samples, - data=data, - rho_D_M=d_distr_prob, - d_distr_samples=d_distr_samples, - lambda_emulate=lambda_emulate, - d_Tree=d_Tree) -# calculate 2d marginal probs -''' -Suggested changes for user: - At this point, the only thing that should change in the plotP.* inputs should be either the nbins values or sigma (which influences the kernel density estimation with smaller values implying a density estimate that looks more like a histogram and larger values smoothing out the values more). - + There are ways to determine "optimal" smoothing parameters (e.g., see CV, GCV, and other similar methods), but we have not incorporated these into the code -as lower-dimensional marginal plots have limited value in understanding the -structure of a high dimensional non-parametric probability measure. +as lower-dimensional marginal plots generally have limited value in understanding +the structure of a high dimensional non-parametric probability measure. ''' -(bins, marginals2D) = plotP.calculate_2D_marginal_probs(P_samples = P, samples = lambda_emulate, lam_domain = lam_domain, nbins = [10, 10]) +# calculate 2d marginal probs +(bins, marginals2D) = plotP.calculate_2D_marginal_probs(input_samples, + nbins = [30, 30]) + +# plot 2d marginals probs +plotP.plot_2D_marginal_probs(marginals2D, bins, input_samples, filename = "validation_raw", + file_extension = ".eps", plot_surface=False) + # smooth 2d marginals probs (optional) -#marginals2D = plotP.smooth_marginals_2D(marginals2D,bins, sigma=0.01) +marginals2D = plotP.smooth_marginals_2D(marginals2D, bins, sigma=0.1) # plot 2d marginals probs -plotP.plot_2D_marginal_probs(marginals2D, bins, lam_domain, filename = "linearMapValidation", - plot_surface=False) +plotP.plot_2D_marginal_probs(marginals2D, bins, input_samples, filename = "validation_smooth", + file_extension = ".eps", plot_surface=False) # calculate 1d marginal probs -(bins, marginals1D) = plotP.calculate_1D_marginal_probs(P_samples = P, samples = lambda_emulate, lam_domain = lam_domain, nbins = [10, 10]) +(bins, marginals1D) = plotP.calculate_1D_marginal_probs(input_samples, + nbins = [30, 30]) + +# plot 2d marginal probs +plotP.plot_1D_marginal_probs(marginals1D, bins, input_samples, filename = "validation_raw", + file_extension = ".eps") + # smooth 1d marginal probs (optional) -#marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=0.01) -# plot 1d marginal probs -plotP.plot_1D_marginal_probs(marginals1D, bins, lam_domain, filename = "linearMapValidation") +marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=0.1) +# plot 2d marginal probs +plotP.plot_1D_marginal_probs(marginals1D, bins, input_samples, filename = "validation_smooth", + file_extension = ".eps") diff --git a/examples/validationExample/myModel.py b/examples/validationExample/myModel.py new file mode 100644 index 00000000..60c8b36f --- /dev/null +++ b/examples/validationExample/myModel.py @@ -0,0 +1,11 @@ +# Copyright (C) 2016 The BET Development Team + +# -*- coding: utf-8 -*- +import numpy as np + +# Define a model that is a linear QoI map +def my_model(parameter_samples): + Q_map = np.array([[0.506, 0.463], [0.253, 0.918]]) + QoI_samples = data= np.dot(parameter_samples,Q_map) + return QoI_samples + diff --git a/test/__init__.py b/test/__init__.py index e34fa14c..6ece5408 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -4,4 +4,5 @@ This package contains all of the tests for :program:`BET`. The package structure mirrors the ``bet`` package structure. """ -__all__ = ['test_calculateP', 'test_postProcess', 'test_sampling', 'test_sensitivity'] +__all__ = ['test_calculateP', 'test_postProcess', 'test_sampling', +'test_sensitivity', 'test_util', 'test_Comm', 'test_sample'] diff --git a/test/test_calculateP/test_calculateP.py b/test/test_calculateP/test_calculateP.py index f743f5d0..00ae0d95 100644 --- a/test/test_calculateP/test_calculateP.py +++ b/test/test_calculateP/test_calculateP.py @@ -1,6 +1,7 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team # Steven Mattis and Lindley Graham 04/06/2015 +# Steven Mattis 03/24/2016 """ This module contains tests for :module:`bet.calculateP.calculateP`. @@ -13,6 +14,7 @@ import bet import bet.calculateP.calculateP as calcP import bet.calculateP.simpleFunP as simpleFunP +import bet.sample as samp import numpy as np import numpy.testing as nptest import bet.util as util @@ -31,132 +33,82 @@ def setUp(self): lambda_domain. """ + self.dim = 3 + self.num_l_emulate = 1000001 lam_left = np.array([0.0, .25, .4]) lam_right = np.array([1.0, 4.0, .5]) - self.lam_domain = np.zeros((3, 3)) - self.lam_domain[:, 0] = lam_left - self.lam_domain[:, 1] = lam_right - - self.num_l_emulate = 1000001 - - self.lambda_emulate = calcP.emulate_iid_lebesgue(self.lam_domain, - self.num_l_emulate) + lam_domain = np.zeros((self.dim, 2)) + lam_domain[:, 0] = lam_left + lam_domain[:, 1] = lam_right + + self.s_set_emulated = calcP.emulate_iid_lebesgue(lam_domain, + self.num_l_emulate, + globalize=True) def test_dimension(self): """ Check the dimension. """ - nptest.assert_array_equal(self.lambda_emulate.shape, - ((self.num_l_emulate/comm.size) + (comm.rank < \ - self.num_l_emulate%comm.size), 3)) + self.s_set_emulated.local_to_global() + self.assertEqual(self.s_set_emulated._values.shape, (self.num_l_emulate, self.dim)) def test_bounds(self): """ Check that the samples are all within the correct bounds """ - self.assertGreaterEqual(np.min(self.lambda_emulate[:, 0]), 0.0) - self.assertGreaterEqual(np.min(self.lambda_emulate[:, 1]), 0.25) - self.assertGreaterEqual(np.min(self.lambda_emulate[:, 2]), 0.4) - self.assertLessEqual(np.max(self.lambda_emulate[:, 0]), 1.0) - self.assertLessEqual(np.max(self.lambda_emulate[:, 1]), 4.0) - self.assertLessEqual(np.max(self.lambda_emulate[:, 2]), 0.5) - -class TestEstimateVolume(unittest.TestCase): - """ - Test :meth:`bet.calculateP.calculateP.estimate_volulme`. - """ - - def setUp(self): - """ - Test dimension, number of samples, and that all the samples are within - lambda_domain. - - """ - lam_left = np.array([0.0, .25, .4]) - lam_right = np.array([1.0, 4.0, .5]) - lam_width = lam_right-lam_left - - self.lam_domain = np.zeros((3, 3)) - self.lam_domain[:, 0] = lam_left - self.lam_domain[:, 1] = lam_right - - num_samples_dim = 2 - start = lam_left+lam_width/(2*num_samples_dim) - stop = lam_right-lam_width/(2*num_samples_dim) - d1_arrays = [] - - for l, r in zip(start, stop): - d1_arrays.append(np.linspace(l, r, num_samples_dim)) - - self.num_l_emulate = 1000001 + self.assertGreaterEqual(np.min(self.s_set_emulated._values[:, 0]), 0.0) + self.assertGreaterEqual(np.min(self.s_set_emulated._values[:, 1]), 0.25) + self.assertGreaterEqual(np.min(self.s_set_emulated._values[:, 2]), 0.4) + self.assertLessEqual(np.max(self.s_set_emulated._values[:, 0]), 1.0) + self.assertLessEqual(np.max(self.s_set_emulated._values[:, 1]), 4.0) + self.assertLessEqual(np.max(self.s_set_emulated._values[:, 2]), 0.5) - self.lambda_emulate = calcP.emulate_iid_lebesgue(self.lam_domain, - self.num_l_emulate) - self.samples = util.meshgrid_ndim(d1_arrays) - self.volume_exact = 1.0/self.samples.shape[0] - self.lam_vol, self.lam_vol_local, self.local_index = calcP.\ - estimate_volume(self.samples, self.lambda_emulate) - - def test_dimension(self): - """ - Check the dimension. - """ - nptest.assert_array_equal(self.lam_vol.shape, (len(self.samples), )) - nptest.assert_array_equal(self.lam_vol_local.shape, - (len(self.samples)/comm.size, )) - nptest.assert_array_equal(self.lam_vol_local.shape, - len(self.local_index)) - def test_volumes(self): - """ - Check that the volumes are within a tolerance for a regular grid of - samples. - """ - nptest.assert_array_almost_equal(self.lam_vol, self.volume_exact, 3) - nptest.assert_array_equal(self.lam_vol_local, - self.lam_vol[self.local_index]) class prob: def test_prob_sum_to_1(self): """ Test to see if the prob. sums to 1. """ - nptest.assert_almost_equal(np.sum(self.P), 1.0) + nptest.assert_almost_equal(np.sum(self.inputs._probabilities), 1.0) #@unittest.skipIf(comm.size > 1, 'Only run in serial') def test_P_matches_true(self): """ Test against reference probs. (Only in serial) """ - nptest.assert_almost_equal(self.P_ref, self.P) + nptest.assert_almost_equal(self.P_ref, self.inputs._probabilities) def test_vol_sum_to_1(self): """ Test that volume ratios sum to 1. """ - nptest.assert_almost_equal(np.sum(self.lam_vol), 1.0) + nptest.assert_almost_equal(np.sum(self.inputs._volumes), 1.0) def test_prob_pos(self): """ Test that all probs are non-negative. """ - self.assertEqual(np.sum(np.less(self.P, 0)), 0) + self.assertEqual(np.sum(np.less(self.inputs._probabilities, 0)), 0) class prob_emulated: def test_P_sum_to_1(self): """ Test that prob. sums to 1. """ - nptest.assert_almost_equal(np.sum(self.P_emulate), 1.0) + self.inputs_emulated.local_to_global() + nptest.assert_almost_equal(np.sum(self.inputs_emulated._probabilities), 1.0) def test_P_matches_true(self): """ Test that probabilites match reference values. """ + self.inputs_emulated.local_to_global() if comm.size == 1: - nptest.assert_almost_equal(self.P_emulate_ref, self.P_emulate) + nptest.assert_almost_equal(self.P_emulate_ref, self.inputs_emulated._probabilities) def test_prob_pos(self): """ Test that all probabilites are non-negative. """ - self.assertEqual(np.sum(np.less(self.P_emulate, 0)), 0) + self.inputs_emulated.local_to_global() + self.assertEqual(np.sum(np.less(self.inputs_emulated._probabilities, 0)), 0) class prob_mc: @@ -164,23 +116,23 @@ def test_P_sum_to_1(self): """ Test that probs sum to 1. """ - nptest.assert_almost_equal(np.sum(self.P), 1.0) + nptest.assert_almost_equal(np.sum(self.inputs._probabilities), 1.0) def test_P_matches_true(self): """ Test the probs. match reference values. """ if comm.size == 1: - nptest.assert_almost_equal(self.P_ref, self.P) + nptest.assert_almost_equal(self.P_ref, self.inputs._probabilities) def test_vol_sum_to_1(self): """ Test that volume ratios sum to 1. """ - nptest.assert_almost_equal(np.sum(self.lam_vol), 1.0) + nptest.assert_almost_equal(np.sum(self.inputs._volumes), 1.0) def test_prob_pos(self): """ Test that all probs are non-negative. """ - self.assertEqual(np.sum(np.less(self.P, 0)), 0) + self.assertEqual(np.sum(np.less(self.inputs._probabilities, 0)), 0) class TestProbMethod_3to2(unittest.TestCase): @@ -188,20 +140,24 @@ class TestProbMethod_3to2(unittest.TestCase): Sets up 3 to 2 map problem. """ def setUp(self): - self.samples = np.loadtxt(data_path + "/3to2_samples.txt.gz") - self.data = np.loadtxt(data_path + "/3to2_data.txt.gz") + self.inputs = samp.sample_set(3) + self.outputs = samp.sample_set(2) + self.inputs.set_values(np.loadtxt(data_path + "/3to2_samples.txt.gz")) + self.outputs.set_values(np.loadtxt(data_path + "/3to2_data.txt.gz")) Q_ref = np.array([0.422, 0.9385]) - (self.d_distr_prob, self.d_distr_samples, self.d_Tree) = simpleFunP.\ - uniform_hyperrectangle(data=self.data, Q_ref=Q_ref, - bin_ratio=0.2, center_pts_per_edge=1) - self.lam_domain = np.array([[0.0, 1.0], - [0.0, 1.0], - [0.0, 1.0]]) + self.output_prob = simpleFunP.regular_partition_uniform_distribution_rectangle_scaled( + self.outputs, Q_ref = Q_ref, rect_scale=0.2, center_pts_per_edge=1) + + self.inputs.set_domain(np.array([[0.0, 1.0], + [0.0, 1.0], + [0.0, 1.0]])) import numpy.random as rnd rnd.seed(1) - self.lambda_emulate = calcP.emulate_iid_lebesgue(\ - lam_domain=self.lam_domain, num_l_emulate=1001) - + self.inputs_emulated = calcP.emulate_iid_lebesgue(self.inputs.get_domain(), num_l_emulate=1001, globalize=True) + self.disc = samp.discretization(input_sample_set=self.inputs, + output_sample_set=self.outputs, + output_probability_set=self.output_prob, + emulated_input_sample_set=self.inputs_emulated) class Test_prob_3to2(TestProbMethod_3to2, prob): """ @@ -212,9 +168,8 @@ def setUp(self): Set up problem. """ super(Test_prob_3to2, self).setUp() - (self.P, self.lam_vol, _) = calcP.prob(samples=self.samples, - data=self.data, rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, d_Tree=self.d_Tree) + self.disc._input_sample_set.estimate_volume_mc() + calcP.prob(self.disc) self.P_ref = np.loadtxt(data_path + "/3to2_prob.txt.gz") @@ -227,14 +182,10 @@ def setUp(self): Set up 3 to 2 map. """ super(Test_prob_emulated_3to2, self).setUp() - (self.P_emulate, self.lambda_emulate, _, _) = calcP.prob_emulated(\ - samples=self.samples, data=self.data, - rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, - lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) + calcP.prob_emulated(self.disc) self.P_emulate_ref = np.loadtxt(data_path+"/3to2_prob_emulated.txt.gz") - self.P_emulate = util.get_global_values(self.P_emulate) - + #self.P_emulate = util.get_global_values(self.P_emulate) + class Test_prob_mc_3to2(TestProbMethod_3to2, prob_mc): """ @@ -245,32 +196,33 @@ def setUp(self): Set up 3 to 2 problem. """ super(Test_prob_mc_3to2, self).setUp() - (self.P, self.lam_vol, _, _, _) = calcP.prob_mc(samples=self.samples, - data=self.data, rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, - lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) + calcP.prob_mc(self.disc) self.P_ref = np.loadtxt(data_path + "/3to2_prob_mc.txt.gz") class TestProbMethod_3to1(unittest.TestCase): """ - Set up 3 to 1 map problem. + Sets up 3 to 1 map problem. """ def setUp(self): - """ - Set up problem. - """ - self.samples = np.loadtxt(data_path + "/3to2_samples.txt.gz") - self.data = np.loadtxt(data_path + "/3to2_data.txt.gz")[:, 0] + self.inputs = samp.sample_set(3) + self.outputs = samp.sample_set(1) + self.inputs.set_values(np.loadtxt(data_path + "/3to2_samples.txt.gz")) + self.outputs.set_values(np.loadtxt(data_path + "/3to2_data.txt.gz")[:,0]) Q_ref = np.array([0.422]) - (self.d_distr_prob, self.d_distr_samples, self.d_Tree) = simpleFunP.\ - uniform_hyperrectangle(data=self.data, Q_ref=Q_ref, - bin_ratio=0.2, center_pts_per_edge=1) - self.lam_domain = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]) + self.output_prob = simpleFunP.regular_partition_uniform_distribution_rectangle_scaled( + self.outputs, Q_ref = Q_ref, rect_scale=0.2, center_pts_per_edge=1) + + self.inputs.set_domain(np.array([[0.0, 1.0], + [0.0, 1.0], + [0.0, 1.0]])) import numpy.random as rnd rnd.seed(1) - self.lambda_emulate = calcP.emulate_iid_lebesgue(lam_domain=\ - self.lam_domain, num_l_emulate=1001) + self.inputs_emulated = calcP.emulate_iid_lebesgue(self.inputs.get_domain(), num_l_emulate=1001, globalize=True) + self.disc = samp.discretization(input_sample_set=self.inputs, + output_sample_set=self.outputs, + output_probability_set=self.output_prob, + emulated_input_sample_set=self.inputs_emulated) class Test_prob_3to1(TestProbMethod_3to1, prob): """ @@ -281,9 +233,8 @@ def setUp(self): Set up problem. """ super(Test_prob_3to1, self).setUp() - (self.P, self.lam_vol, _) = calcP.prob(samples=self.samples, - data=self.data, rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, d_Tree=self.d_Tree) + self.disc._input_sample_set.estimate_volume_mc() + calcP.prob(self.disc) self.P_ref = np.loadtxt(data_path + "/3to1_prob.txt.gz") @@ -296,13 +247,8 @@ def setUp(self): Set up problem. """ super(Test_prob_emulated_3to1, self).setUp() - (self.P_emulate, self.lambda_emulate, _, _) = calcP.prob_emulated(\ - samples=self.samples, data=self.data, - rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, - lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) + calcP.prob_emulated(self.disc) self.P_emulate_ref = np.loadtxt(data_path+"/3to1_prob_emulated.txt.gz") - self.P_emulate = util.get_global_values(self.P_emulate) class Test_prob_mc_3to1(TestProbMethod_3to1, prob_mc): @@ -314,10 +260,7 @@ def setUp(self): Set up problem. """ super(Test_prob_mc_3to1, self).setUp() - (self.P, self.lam_vol, _, _, _) = calcP.prob_mc(samples=self.samples, - data=self.data, rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, - lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) + calcP.prob_mc(self.disc) self.P_ref = np.loadtxt(data_path + "/3to1_prob_mc.txt.gz") @@ -331,18 +274,22 @@ def setUp(self): """ import numpy.random as rnd rnd.seed(1) + self.inputs = samp.sample_set(10) + self.outputs = samp.sample_set(4) self.lam_domain = np.zeros((10, 2)) self.lam_domain[:, 0] = 0.0 self.lam_domain[:, 1] = 1.0 - self.num_l_emulate = 1001 - self.lambda_emulate = calcP.emulate_iid_lebesgue(self.lam_domain, - self.num_l_emulate) - self.samples = calcP.emulate_iid_lebesgue(self.lam_domain, 100) - self.data = np.dot(self.samples, rnd.rand(10, 4)) - Q_ref = np.mean(self.data, axis=0) - (self.d_distr_prob, self.d_distr_samples, self.d_Tree) =\ - simpleFunP.uniform_hyperrectangle(data=self.data, Q_ref=Q_ref, - bin_ratio=0.2, center_pts_per_edge=1) + self.inputs.set_domain(self.lam_domain) + self.inputs = calcP.emulate_iid_lebesgue(self.inputs.get_domain(), num_l_emulate=101, globalize=True) + self.outputs.set_values(np.dot(self.inputs._values, rnd.rand(10, 4))) + Q_ref = np.mean(self.outputs._values, axis=0) + self.inputs_emulated = calcP.emulate_iid_lebesgue(self.inputs.get_domain(), num_l_emulate=1001, globalize=True) + self.output_prob = simpleFunP.regular_partition_uniform_distribution_rectangle_scaled( + self.outputs, Q_ref = Q_ref, rect_scale=0.2, center_pts_per_edge=1) + self.disc = samp.discretization(input_sample_set=self.inputs, + output_sample_set=self.outputs, + output_probability_set=self.output_prob, + emulated_input_sample_set=self.inputs_emulated) @unittest.skip("No reference data") def test_P_matches_true(self): @@ -357,9 +304,8 @@ def setUp(self): Set up problem. """ super(Test_prob_10to4, self).setUp() - (self.P, self.lam_vol, _) = calcP.prob(samples=self.samples, - data=self.data, rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, d_Tree=self.d_Tree) + self.disc._input_sample_set.estimate_volume_mc() + calcP.prob(self.disc) class Test_prob_emulated_10to4(TestProbMethod_10to4, prob_emulated): @@ -372,13 +318,8 @@ def setUp(self): """ super(Test_prob_emulated_10to4, self).setUp() - (self.P_emulate, self.lambda_emulate, _, _) = calcP.prob_emulated(\ - samples=self.samples, data=self.data, - rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, - lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) - self.P_emulate = util.get_global_values(self.P_emulate) - + calcP.prob_emulated(self.disc) + class Test_prob_mc_10to4(TestProbMethod_10to4, prob_mc): """ Test :meth:`bet.calculateP.calculateP.prob_mc` on a 10 to 4 map. @@ -388,10 +329,7 @@ def setUp(self): Set up problem. """ super(Test_prob_mc_10to4, self).setUp() - (self.P, self.lam_vol, _, _, _) = calcP.prob_mc(samples=self.samples, - data=self.data, rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, - lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) + calcP.prob_mc(self.disc) class TestProbMethod_1to1(unittest.TestCase): @@ -402,20 +340,30 @@ def setUp(self): """ Set up problem. """ + import numpy.random as rnd rnd.seed(1) + self.inputs = samp.sample_set(1) + self.outputs = samp.sample_set(1) self.lam_domain = np.zeros((1, 2)) - self.lam_domain[0, 0] = 0.0 - self.lam_domain[0, 1] = 1.0 + self.lam_domain[:, 0] = 0.0 + self.lam_domain[:, 1] = 1.0 + self.inputs.set_domain(self.lam_domain) + self.inputs.set_values(rnd.rand(100,)) self.num_l_emulate = 1001 - self.lambda_emulate = calcP.emulate_iid_lebesgue(self.lam_domain, - self.num_l_emulate) - self.samples = rnd.rand(100,) - self.data = 2.0*self.samples - Q_ref = np.mean(self.data, axis=0) - (self.d_distr_prob, self.d_distr_samples, self.d_Tree) = simpleFunP.\ - uniform_hyperrectangle(data=self.data, Q_ref=Q_ref, - bin_ratio=0.2, center_pts_per_edge=1) + self.inputs = calcP.emulate_iid_lebesgue(self.inputs.get_domain(), num_l_emulate=1001, globalize=True) + self.outputs.set_values(2.0*self.inputs._values) + Q_ref = np.mean(self.outputs._values, axis=0) + self.inputs_emulated = calcP.emulate_iid_lebesgue(self.lam_domain, + self.num_l_emulate, + globalize = True) + self.output_prob = simpleFunP.regular_partition_uniform_distribution_rectangle_scaled( + self.outputs, Q_ref = Q_ref, rect_scale=0.2, center_pts_per_edge=1) + self.disc = samp.discretization(input_sample_set=self.inputs, + output_sample_set=self.outputs, + output_probability_set=self.output_prob, + emulated_input_sample_set=self.inputs_emulated) + @unittest.skip("No reference data") def test_P_matches_true(self): pass @@ -429,10 +377,8 @@ def setUp(self): Set up problem. """ super(Test_prob_1to1, self).setUp() - (self.P, self.lam_vol, _) = calcP.prob(samples=self.samples, - data=self.data, rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, - d_Tree=self.d_Tree) + self.disc._input_sample_set.estimate_volume_mc() + calcP.prob(self.disc) class Test_prob_emulated_1to1(TestProbMethod_1to1, prob_emulated): @@ -444,12 +390,7 @@ def setUp(self): Set up problem. """ super(Test_prob_emulated_1to1, self).setUp() - (self.P_emulate, self.lambda_emulate, _, _) =\ - calcP.prob_emulated(samples=self.samples, data=self.data, - rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, - lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) - self.P_emulate = util.get_global_values(self.P_emulate) + calcP.prob_emulated(self.disc) class Test_prob_mc_1to1(TestProbMethod_1to1, prob_mc): @@ -461,9 +402,5 @@ def setUp(self): Set up problem. """ super(Test_prob_mc_1to1, self).setUp() - (self.P, self.lam_vol, _, _, _) = calcP.prob_mc(samples=self.samples, - data=self.data, rho_D_M=self.d_distr_prob, - d_distr_samples=self.d_distr_samples, - lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) - + calcP.prob_mc(self.disc) diff --git a/test/test_calculateP/test_simpleFunP.py b/test/test_calculateP/test_simpleFunP.py index 8d9af33e..86e1fa7c 100644 --- a/test/test_calculateP/test_simpleFunP.py +++ b/test/test_calculateP/test_simpleFunP.py @@ -19,6 +19,7 @@ import bet.calculateP.simpleFunP as sFun import numpy as np import numpy.testing as nptest +import bet.sample as samp local_path = os.path.join(os.path.dirname(bet.__file__), '../test/test_calulateP') @@ -44,7 +45,7 @@ def test_dimensions(self): """ assert self.rho_D_M.shape[0] == self.d_distr_samples.shape[0] assert self.mdim == self.d_distr_samples.shape[1] - assert (self.d_Tree.n, self.d_Tree.m) == self.d_distr_samples.shape + #assert (self.d_Tree.n, self.d_Tree.m) == self.d_distr_samples.shape class prob_uniform(prob): @@ -83,7 +84,8 @@ def createData(self): """ Set up data. """ - self.data = np.random.random((100,))*10.0 + self.data = samp.sample_set(1) + self.data.set_values(np.random.random((100,))*10.0) self.Q_ref = 5.0 self.data_domain = np.array([0.0, 10.0]) self.mdim = 1 @@ -97,7 +99,8 @@ def createData(self): """ Set up data. """ - self.data = np.random.random((100, 1))*10.0 + self.data = samp.sample_set(1) + self.data.set_values(np.random.random((100,1))*10.0) self.Q_ref = np.array([5.0]) self.data_domain = np.expand_dims(np.array([0.0, 10.0]), axis=0) self.mdim = 1 @@ -111,7 +114,8 @@ def createData(self): """ Set up data. """ - self.data = np.random.random((100, 2))*10.0 + self.data = samp.sample_set(2) + self.data.set_values(np.random.random((100,2))*10.0) self.Q_ref = np.array([5.0, 5.0]) self.data_domain = np.array([[0.0, 10.0], [0.0, 10.0]]) self.mdim = 2 @@ -125,21 +129,24 @@ def createData(self): """ Set up data. """ - self.data = np.random.random((100, 3))*10.0 + self.data = samp.sample_set(3) + self.data.set_values(np.random.random((100,3))*10.0) self.Q_ref = np.array([5.0, 5.0, 5.0]) self.data_domain = np.array([[0.0, 10.0], [0.0, 10.0], [0.0, 10.0]]) self.mdim = 3 -class unif_unif(prob_uniform): +class uniform_partition_uniform_distribution_rectangle_scaled(prob_uniform): """ - Set up :meth:`bet.calculateP.simpleFunP.unif_unif` on data domain. + Set up :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled` on data domain. """ def setUp(self): """ Set up problem. """ - self.rho_D_M, self.d_distr_samples, self.d_Tree = sFun.unif_unif(self.data, - self.Q_ref, M=67, bin_ratio=0.1, num_d_emulate=1E3) + self.data_prob = sFun.uniform_partition_uniform_distribution_rectangle_scaled( + self.data, self.Q_ref, rect_scale=0.1, M=67, num_d_emulate=1E3) + self.d_distr_samples = self.data_prob.get_values() + self.rho_D_M = self.data_prob.get_probabilities() if type(self.Q_ref) != np.array: self.Q_ref = np.array([self.Q_ref]) @@ -177,55 +184,59 @@ def test_domain(self): print np.sum(self.rho_D_M[np.logical_not(inside)] == 0.0) assert np.sum(self.rho_D_M[np.logical_not(inside)] == 0.0)<100 -class test_unif_unif_01D(data_01D, unif_unif): +class test_uniform_partition_uniform_distribution_rectangle_scaled_01D(data_01D, + uniform_partition_uniform_distribution_rectangle_scaled): """ - Tests :meth:`bet.calculateP.simpleFunP.unif_unif` on 01D data domain. + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled` on 01D data domain. """ def setUp(self): """ Set up problem. """ - super(test_unif_unif_01D, self).createData() - super(test_unif_unif_01D, self).setUp() + super(test_uniform_partition_uniform_distribution_rectangle_scaled_01D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_scaled_01D, self).setUp() -class test_unif_unif_1D(data_1D, unif_unif): +class test_uniform_partition_uniform_distribution_rectangle_scaled_1D(data_1D, + uniform_partition_uniform_distribution_rectangle_scaled): """ - Tests :meth:`bet.calculateP.simpleFunP.unif_unif` on 1D data domain. + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled` on 1D data domain. """ def setUp(self): """ Set up problem. """ - super(test_unif_unif_1D, self).createData() - super(test_unif_unif_1D, self).setUp() + super(test_uniform_partition_uniform_distribution_rectangle_scaled_1D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_scaled_1D, self).setUp() -class test_unif_unif_2D(data_2D, unif_unif): +class test_uniform_partition_uniform_distribution_rectangle_scaled_2D(data_2D, + uniform_partition_uniform_distribution_rectangle_scaled): """ - Tests :meth:`bet.calculateP.simpleFunP.unif_unif` on 2D data domain. + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled` on 2D data domain. """ def setUp(self): """ Set up problem. """ - super(test_unif_unif_2D, self).createData() - super(test_unif_unif_2D, self).setUp() + super(test_uniform_partition_uniform_distribution_rectangle_scaled_2D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_scaled_2D, self).setUp() -class test_unif_unif_3D(data_3D, unif_unif): +class test_uniform_partition_uniform_distribution_rectangle_scaled_3D(data_3D, + uniform_partition_uniform_distribution_rectangle_scaled): """ - Tests :meth:`bet.calculateP.simpleFunP.unif_unif` on 3D data domain. + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled` on 3D data domain. """ def setUp(self): """ Set up problem. """ - super(test_unif_unif_3D, self).createData() - super(test_unif_unif_3D, self).setUp() + super(test_uniform_partition_uniform_distribution_rectangle_scaled_3D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_scaled_3D, self).setUp() -class normal_normal(prob): +class normal_partition_normal_distribution(prob): """ - Set up :meth:`bet.calculateP.simpleFunP.normal_normal` on data domain. + Set up :meth:`bet.calculateP.simpleFunP.normal_partition_normal_distribution` on data domain. """ def setUp(self): """ @@ -235,8 +246,9 @@ def setUp(self): std = 1.0 else: std = np.ones(self.Q_ref.shape) - self.rho_D_M, self.d_distr_samples, self.d_Tree = sFun.normal_normal(self.Q_ref, - M=67, std=std, num_d_emulate=1E3) + self.data_prob = sFun.normal_partition_normal_distribution(None, self.Q_ref, std=std, M=67, num_d_emulate=1E3) + self.d_distr_samples = self.data_prob.get_values() + self.rho_D_M = self.data_prob.get_probabilities() def test_M(self): """ @@ -245,51 +257,128 @@ def test_M(self): """ assert len(self.rho_D_M) == 67 -class test_normal_normal_01D(data_01D, normal_normal): +class test_normal_partition_normal_distribution_01D(data_01D, normal_partition_normal_distribution): """ - Tests :meth:`bet.calculateP.simpleFunP.normal_normal` on 01D data domain. + Tests :meth:`bet.calculateP.simpleFunP.normal_partition_normal_distribution` on 01D data domain. """ def setUp(self): """ Set up problem. """ - super(test_normal_normal_01D, self).createData() - super(test_normal_normal_01D, self).setUp() + super(test_normal_partition_normal_distribution_01D, self).createData() + super(test_normal_partition_normal_distribution_01D, self).setUp() -class test_normal_normal_1D(data_1D, normal_normal): +class test_normal_partition_normal_distribution_1D(data_1D, normal_partition_normal_distribution): """ - Tests :meth:`bet.calculateP.simpleFunP.normal_normal` on 1D data domain. + Tests :meth:`bet.calculateP.simpleFunP.normal_partition_normal_distribution` on 1D data domain. """ def setUp(self): """ Set up problem. """ - super(test_normal_normal_1D, self).createData() - super(test_normal_normal_1D, self).setUp() + super(test_normal_partition_normal_distribution_1D, self).createData() + super(test_normal_partition_normal_distribution_1D, self).setUp() -class test_normal_normal_2D(data_2D, normal_normal): +class test_normal_partition_normal_distribution_2D(data_2D, normal_partition_normal_distribution): """ - Tests :meth:`bet.calculateP.simpleFunP.normal_normal` on 2D data domain. + Tests :meth:`bet.calculateP.simpleFunP.normal_partition_normal_distribution` on 2D data domain. """ def setUp(self): """ Set up problem. """ - super(test_normal_normal_2D, self).createData() - super(test_normal_normal_2D, self).setUp() + super(test_normal_partition_normal_distribution_2D, self).createData() + super(test_normal_partition_normal_distribution_2D, self).setUp() -class test_normal_normal_3D(data_3D, normal_normal): +class test_normal_partition_normal_distribution_3D(data_3D, normal_partition_normal_distribution): """ - Tests :meth:`bet.calculateP.simpleFunP.normal_normal` on 3D data domain. + Tests :meth:`bet.calculateP.simpleFunP.normal_partition_normal_distribution` on 3D data domain. """ def setUp(self): """ Set up problem. """ - super(test_normal_normal_3D, self).createData() - super(test_normal_normal_3D, self).setUp() + super(test_normal_partition_normal_distribution_3D, self).createData() + super(test_normal_partition_normal_distribution_3D, self).setUp() + + +class uniform_partition_normal_distribution(prob): + """ + Set up :meth:`bet.calculateP.simpleFunP.uniform_partition_normal_distribution` on data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + if type(self.Q_ref) != np.array and type(self.Q_ref) != np.ndarray: + std = 1.0 + else: + std = np.ones(self.Q_ref.shape) + self.data_prob = sFun.uniform_partition_normal_distribution(None, self.Q_ref, std=std, M=67, num_d_emulate=1E3) + self.d_distr_samples = self.data_prob.get_values() + self.rho_D_M = self.data_prob.get_probabilities() + + def test_M(self): + """ + Test that the right number of d_distr_samples are used to create + rho_D_M. + """ + assert len(self.rho_D_M) == 67 + + +class test_uniform_partition_normal_distribution_01D(data_01D, uniform_partition_normal_distribution): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_normal_distribution` on 01D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_normal_distribution_01D, self).createData() + super(test_uniform_partition_normal_distribution_01D, self).setUp() + + +class test_uniform_partition_normal_distribution_1D(data_1D, uniform_partition_normal_distribution): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_normal_distribution` on 1D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_normal_distribution_1D, self).createData() + super(test_uniform_partition_normal_distribution_1D, self).setUp() + + +class test_uniform_partition_normal_distribution_2D(data_2D, uniform_partition_normal_distribution): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_normal_distribution` on 2D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_normal_distribution_2D, self).createData() + super(test_uniform_partition_normal_distribution_2D, self).setUp() + + +class test_uniform_partition_normal_distribution_3D(data_3D, uniform_partition_normal_distribution): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_normal_distribution` on 3D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_normal_distribution_3D, self).createData() + super(test_uniform_partition_normal_distribution_3D, self).setUp() class uniform_hyperrectangle_base(prob_uniform): @@ -331,9 +420,9 @@ def setUp(self): """ self.center_pts_per_edge = 2*np.ones((self.mdim,), dtype=np.int) -class uniform_hyperrectangle_user_int(uniform_hyperrectangle_int): +class regular_partition_uniform_distribution_rectangle_domain_int(uniform_hyperrectangle_int): """ - Set up :met:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user` with an + Set up :met:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` with an int type of value fo r``center_pts_per_edge`` """ @@ -341,7 +430,7 @@ def setUp(self): """ Set up problem. """ - super(uniform_hyperrectangle_user_int, self).setUp() + super(regular_partition_uniform_distribution_rectangle_domain_int, self).setUp() if type(self.Q_ref) != np.array: Q_ref = np.array([self.Q_ref]) else: @@ -357,12 +446,14 @@ def setUp(self): self.rect_domain[:, 0] = Q_ref - .5*r_width self.rect_domain[:, 1] = Q_ref + .5*r_width - self.rho_D_M, self.d_distr_samples, self.d_Tree = sFun.uniform_hyperrectangle_user(self.data, - self.rect_domain.transpose(), self.center_pts_per_edge) - -class uniform_hyperrectangle_user_list(uniform_hyperrectangle_list): + self.data_prob = sFun.regular_partition_uniform_distribution_rectangle_domain( + self.data, self.rect_domain.transpose(), self.center_pts_per_edge) + self.rho_D_M = self.data_prob._probabilities + self.d_distr_samples = self.data_prob._values + +class regular_partition_uniform_distribution_rectangle_domain_list(uniform_hyperrectangle_list): """ - Set up :met:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user` with an + Set up :met:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` with an int type of value fo r``center_pts_per_edge`` """ @@ -370,7 +461,7 @@ def setUp(self): """ Set up problem. """ - super(uniform_hyperrectangle_user_list, self).setUp() + super(regular_partition_uniform_distribution_rectangle_domain_list, self).setUp() if type(self.Q_ref) != np.array: Q_ref = np.array([self.Q_ref]) else: @@ -386,107 +477,117 @@ def setUp(self): self.rect_domain[:, 0] = Q_ref - .5*r_width self.rect_domain[:, 1] = Q_ref + .5*r_width - self.rho_D_M, self.d_distr_samples, self.d_Tree = sFun.uniform_hyperrectangle_user(self.data, - self.rect_domain.transpose(), self.center_pts_per_edge) + self.data_prob = sFun.regular_partition_uniform_distribution_rectangle_domain( + self.data, self.rect_domain.transpose(), self.center_pts_per_edge) + self.rho_D_M = self.data_prob._probabilities + self.d_distr_samples = self.data_prob._values -class test_uniform_hyperrectangle_user_int_01D(data_01D, uniform_hyperrectangle_user_int): +class test_regular_partition_uniform_distribution_rectangle_domain_int_01D(data_01D, + regular_partition_uniform_distribution_rectangle_domain_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user_int` on 01D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` on 01D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_user_int_01D, self).createData() - super(test_uniform_hyperrectangle_user_int_01D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_domain_int_01D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_domain_int_01D, self).setUp() -class test_uniform_hyperrectangle_user_int_1D(data_1D, uniform_hyperrectangle_user_int): +class test_regular_partition_uniform_distribution_rectangle_domain_int_1D(data_1D, + regular_partition_uniform_distribution_rectangle_domain_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user_int` on 1D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` on 1D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_user_int_1D, self).createData() - super(test_uniform_hyperrectangle_user_int_1D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_domain_int_1D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_domain_int_1D, self).setUp() -class test_uniform_hyperrectangle_user_int_2D(data_2D, uniform_hyperrectangle_user_int): +class test_regular_partition_uniform_distribution_rectangle_domain_int_2D(data_2D, + regular_partition_uniform_distribution_rectangle_domain_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user_int` on 2D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` on 2D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_user_int_2D, self).createData() - super(test_uniform_hyperrectangle_user_int_2D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_domain_int_2D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_domain_int_2D, self).setUp() -class test_uniform_hyperrectangle_user_int_3D(data_3D, uniform_hyperrectangle_user_int): +class test_regular_partition_uniform_distribution_rectangle_domain_int_3D(data_3D, + regular_partition_uniform_distribution_rectangle_domain_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user_int` on 3D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` on 3D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_user_int_3D, self).createData() - super(test_uniform_hyperrectangle_user_int_3D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_domain_int_3D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_domain_int_3D, self).setUp() -class test_uniform_hyperrectangle_user_list_01D(data_01D, uniform_hyperrectangle_user_list): +class test_regular_partition_uniform_distribution_rectangle_domain_list_01D(data_01D, + regular_partition_uniform_distribution_rectangle_domain_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user_list` on 01D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` on 01D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_user_list_01D, self).createData() - super(test_uniform_hyperrectangle_user_list_01D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_domain_list_01D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_domain_list_01D, self).setUp() -class test_uniform_hyperrectangle_user_list_1D(data_1D, uniform_hyperrectangle_user_list): +class test_regular_partition_uniform_distribution_rectangle_domain_list_1D(data_1D, + regular_partition_uniform_distribution_rectangle_domain_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user_list` on 1D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` on 1D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_user_list_1D, self).createData() - super(test_uniform_hyperrectangle_user_list_1D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_domain_list_1D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_domain_list_1D, self).setUp() -class test_uniform_hyperrectangle_user_list_2D(data_2D, uniform_hyperrectangle_user_list): +class test_regular_partition_uniform_distribution_rectangle_domain_list_2D(data_2D, + regular_partition_uniform_distribution_rectangle_domain_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user_list` on 2D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` on 2D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_user_list_2D, self).createData() - super(test_uniform_hyperrectangle_user_list_2D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_domain_list_2D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_domain_list_2D, self).setUp() -class test_uniform_hyperrectangle_user_list_3D(data_3D, uniform_hyperrectangle_user_list): +class test_regular_partition_uniform_distribution_rectangle_domain_list_3D(data_3D, + regular_partition_uniform_distribution_rectangle_domain_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_user_list` on 3D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_domain` on 3D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_user_list_3D, self).createData() - super(test_uniform_hyperrectangle_user_list_3D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_domain_list_3D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_domain_list_3D, self).setUp() -class uniform_hyperrectangle_size_int(uniform_hyperrectangle_int): +class regular_partition_uniform_distribution_rectangle_size_int(uniform_hyperrectangle_int): """ - Set up :met:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size` with an + Set up :met:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` with an int type of value fo r``center_pts_per_edge`` """ @@ -494,7 +595,7 @@ def setUp(self): """ Set up problem. """ - super(uniform_hyperrectangle_size_int, self).setUp() + super(regular_partition_uniform_distribution_rectangle_size_int, self).setUp() if type(self.Q_ref) != np.array: Q_ref = np.array([self.Q_ref]) else: @@ -511,12 +612,14 @@ def setUp(self): self.rect_domain[:, 0] = Q_ref - .5*r_width self.rect_domain[:, 1] = Q_ref + .5*r_width - self.rho_D_M, self.d_distr_samples, self.d_Tree = sFun.uniform_hyperrectangle_binsize(self.data, - self.Q_ref, binsize, self.center_pts_per_edge) + self.data_prob = sFun.regular_partition_uniform_distribution_rectangle_size( + self.data, self.Q_ref, binsize, self.center_pts_per_edge) + self.rho_D_M = self.data_prob._probabilities + self.d_distr_samples = self.data_prob._values -class uniform_hyperrectangle_size_list(uniform_hyperrectangle_list): +class regular_partition_uniform_distribution_rectangle_size_list(uniform_hyperrectangle_list): """ - Set up :met:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size` with an + Set up :met:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` with an int type of value fo r``center_pts_per_edge`` """ @@ -524,7 +627,7 @@ def setUp(self): """ Set up problem. """ - super(uniform_hyperrectangle_size_list, self).setUp() + super(regular_partition_uniform_distribution_rectangle_size_list, self).setUp() if type(self.Q_ref) != np.array: Q_ref = np.array([self.Q_ref]) else: @@ -541,106 +644,116 @@ def setUp(self): self.rect_domain[:, 0] = Q_ref - .5*r_width self.rect_domain[:, 1] = Q_ref + .5*r_width - self.rho_D_M, self.d_distr_samples, self.d_Tree = sFun.uniform_hyperrectangle_binsize(self.data, - self.Q_ref, binsize, self.center_pts_per_edge) + self.data_prob = sFun.regular_partition_uniform_distribution_rectangle_size( + self.data, self.Q_ref, binsize, self.center_pts_per_edge) + self.rho_D_M = self.data_prob._probabilities + self.d_distr_samples = self.data_prob._values -class test_uniform_hyperrectangle_size_int_01D(data_01D, uniform_hyperrectangle_size_int): +class test_regular_partition_uniform_distribution_rectangle_size_int_01D(data_01D, + regular_partition_uniform_distribution_rectangle_size_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size_int` on 01D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` on 01D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_size_int_01D, self).createData() - super(test_uniform_hyperrectangle_size_int_01D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_size_int_01D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_size_int_01D, self).setUp() -class test_uniform_hyperrectangle_size_int_1D(data_1D, uniform_hyperrectangle_size_int): +class test_regular_partition_uniform_distribution_rectangle_size_int_1D(data_1D, + regular_partition_uniform_distribution_rectangle_size_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size_int` on 1D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` on 1D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_size_int_1D, self).createData() - super(test_uniform_hyperrectangle_size_int_1D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_size_int_1D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_size_int_1D, self).setUp() -class test_uniform_hyperrectangle_size_int_2D(data_2D, uniform_hyperrectangle_size_int): +class test_regular_partition_uniform_distribution_rectangle_size_int_2D(data_2D, + regular_partition_uniform_distribution_rectangle_size_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size_int` on 2D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` on 2D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_size_int_2D, self).createData() - super(test_uniform_hyperrectangle_size_int_2D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_size_int_2D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_size_int_2D, self).setUp() -class test_uniform_hyperrectangle_size_int_3D(data_3D, uniform_hyperrectangle_size_int): +class test_regular_partition_uniform_distribution_rectangle_size_int_3D(data_3D, + regular_partition_uniform_distribution_rectangle_size_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size_int` on 3D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` on 3D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_size_int_3D, self).createData() - super(test_uniform_hyperrectangle_size_int_3D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_size_int_3D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_size_int_3D, self).setUp() -class test_uniform_hyperrectangle_size_list_01D(data_01D, uniform_hyperrectangle_size_list): +class test_regular_partition_uniform_distribution_rectangle_size_list_01D(data_01D, + regular_partition_uniform_distribution_rectangle_size_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size_list` on 01D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` on 01D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_size_list_01D, self).createData() - super(test_uniform_hyperrectangle_size_list_01D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_size_list_01D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_size_list_01D, self).setUp() -class test_uniform_hyperrectangle_size_list_1D(data_1D, uniform_hyperrectangle_size_list): +class test_regular_partition_uniform_distribution_rectangle_size_list_1D(data_1D, + regular_partition_uniform_distribution_rectangle_size_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size_list` on 1D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` on 1D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_size_list_1D, self).createData() - super(test_uniform_hyperrectangle_size_list_1D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_size_list_1D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_size_list_1D, self).setUp() -class test_uniform_hyperrectangle_size_list_2D(data_2D, uniform_hyperrectangle_size_list): +class test_regular_partition_uniform_distribution_rectangle_size_list_2D(data_2D, + regular_partition_uniform_distribution_rectangle_size_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size_list` on 2D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` on 2D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_size_list_2D, self).createData() - super(test_uniform_hyperrectangle_size_list_2D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_size_list_2D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_size_list_2D, self).setUp() -class test_uniform_hyperrectangle_size_list_3D(data_3D, uniform_hyperrectangle_size_list): +class test_regular_partition_uniform_distribution_rectangle_size_list_3D(data_3D, + regular_partition_uniform_distribution_rectangle_size_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_size_list` on 3D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_size` on 3D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_size_list_3D, self).createData() - super(test_uniform_hyperrectangle_size_list_3D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_size_list_3D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_size_list_3D, self).setUp() -class uniform_hyperrectangle_ratio_int(uniform_hyperrectangle_int): +class regular_partition_uniform_distribution_rectangle_scaled_int(uniform_hyperrectangle_int): """ - Set up :met:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio` with an + Set up :met:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled` with an int type of value fo r``center_pts_per_edge`` """ @@ -648,7 +761,7 @@ def setUp(self): """ Set up problem. """ - super(uniform_hyperrectangle_ratio_int, self).setUp() + super(regular_partition_uniform_distribution_rectangle_scaled_int, self).setUp() if type(self.Q_ref) != np.array: Q_ref = np.array([self.Q_ref]) else: @@ -665,12 +778,14 @@ def setUp(self): self.rect_domain[:, 0] = Q_ref - .5*r_width self.rect_domain[:, 1] = Q_ref + .5*r_width - self.rho_D_M, self.d_distr_samples, self.d_Tree = sFun.uniform_hyperrectangle(self.data, - self.Q_ref, binratio, self.center_pts_per_edge) + self.data_prob = sFun.regular_partition_uniform_distribution_rectangle_scaled( + self.data, self.Q_ref, binratio, self.center_pts_per_edge) + self.rho_D_M = self.data_prob._probabilities + self.d_distr_samples = self.data_prob._values -class uniform_hyperrectangle_ratio_list(uniform_hyperrectangle_list): +class regular_partition_uniform_distribution_rectangle_scaled_list(uniform_hyperrectangle_list): """ - Set up :met:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio` with an + Set up :met:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled` with an int type of value fo r``center_pts_per_edge`` """ @@ -678,7 +793,7 @@ def setUp(self): """ Set up problem. """ - super(uniform_hyperrectangle_ratio_list, self).setUp() + super(regular_partition_uniform_distribution_rectangle_scaled_list, self).setUp() if type(self.Q_ref) != np.array: Q_ref = np.array([self.Q_ref]) else: @@ -695,112 +810,125 @@ def setUp(self): self.rect_domain[:, 0] = Q_ref - .5*r_width self.rect_domain[:, 1] = Q_ref + .5*r_width - self.rho_D_M, self.d_distr_samples, self.d_Tree = sFun.uniform_hyperrectangle(self.data, - self.Q_ref, binratio, self.center_pts_per_edge) + self.data_prob = sFun.regular_partition_uniform_distribution_rectangle_scaled( + self.data, self.Q_ref, binratio, self.center_pts_per_edge) + self.rho_D_M = self.data_prob._probabilities + self.d_distr_samples = self.data_prob._values -class test_uniform_hyperrectangle_ratio_int_01D(data_01D, uniform_hyperrectangle_ratio_int): +class test_regular_partition_uniform_distribution_rectangle_scaled_int_01D(data_01D, + regular_partition_uniform_distribution_rectangle_scaled_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio_int` on 01D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled` on 01D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_ratio_int_01D, self).createData() - super(test_uniform_hyperrectangle_ratio_int_01D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_scaled_int_01D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_scaled_int_01D, self).setUp() -class test_uniform_hyperrectangle_ratio_int_1D(data_1D, uniform_hyperrectangle_ratio_int): +class test_regular_partition_uniform_distribution_rectangle_scaled_int_1D(data_1D, + regular_partition_uniform_distribution_rectangle_scaled_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio_int` on 1D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled` on 1D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_ratio_int_1D, self).createData() - super(test_uniform_hyperrectangle_ratio_int_1D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_scaled_int_1D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_scaled_int_1D, self).setUp() -class test_uniform_hyperrectangle_ratio_int_2D(data_2D, uniform_hyperrectangle_ratio_int): +class test_regular_partition_uniform_distribution_rectangle_scaled_int_2D(data_2D, + regular_partition_uniform_distribution_rectangle_scaled_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio_int` on 2D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled` on 2D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_ratio_int_2D, self).createData() - super(test_uniform_hyperrectangle_ratio_int_2D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_scaled_int_2D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_scaled_int_2D, self).setUp() -class test_uniform_hyperrectangle_ratio_int_3D(data_3D, uniform_hyperrectangle_ratio_int): +class test_regular_partition_uniform_distribution_rectangle_scaled_int_3D(data_3D, + regular_partition_uniform_distribution_rectangle_scaled_int): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio_int` on 3D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled` on 3D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_ratio_int_3D, self).createData() - super(test_uniform_hyperrectangle_ratio_int_3D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_scaled_int_3D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_scaled_int_3D, self).setUp() -class test_uniform_hyperrectangle_ratio_list_01D(data_01D, uniform_hyperrectangle_ratio_list): +class test_regular_partition_uniform_distribution_rectangle_scaled_list_01D(data_01D, + regular_partition_uniform_distribution_rectangle_scaled_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio_list` on 01D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled_list` on 01D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_ratio_list_01D, self).createData() - super(test_uniform_hyperrectangle_ratio_list_01D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_scaled_list_01D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_scaled_list_01D, self).setUp() -class test_uniform_hyperrectangle_ratio_list_1D(data_1D, uniform_hyperrectangle_ratio_list): +class test_regular_partition_uniform_distribution_rectangle_scaled_list_1D(data_1D, + regular_partition_uniform_distribution_rectangle_scaled_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio_list` on 1D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled` on 1D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_ratio_list_1D, self).createData() - super(test_uniform_hyperrectangle_ratio_list_1D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_scaled_list_1D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_scaled_list_1D, self).setUp() -class test_uniform_hyperrectangle_ratio_list_2D(data_2D, uniform_hyperrectangle_ratio_list): +class test_regular_partition_uniform_distribution_rectangle_scaled_list_2D(data_2D, + regular_partition_uniform_distribution_rectangle_scaled_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio_list` on 2D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled` on 2D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_ratio_list_2D, self).createData() - super(test_uniform_hyperrectangle_ratio_list_2D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_scaled_list_2D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_scaled_list_2D, self).setUp() -class test_uniform_hyperrectangle_ratio_list_3D(data_3D, uniform_hyperrectangle_ratio_list): +class test_regular_partition_uniform_distribution_rectangle_scaled_list_3D(data_3D, + regular_partition_uniform_distribution_rectangle_scaled_list): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_hyperrectangle_ratio_list` on 3D data domain. + Tests :meth:`bet.calculateP.simpleFunP.regular_partition_uniform_distribution_rectangle_scaled` on 3D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_hyperrectangle_ratio_list_3D, self).createData() - super(test_uniform_hyperrectangle_ratio_list_3D, self).setUp() + super(test_regular_partition_uniform_distribution_rectangle_scaled_list_3D, self).createData() + super(test_regular_partition_uniform_distribution_rectangle_scaled_list_3D, self).setUp() -class uniform_data(prob_uniform): +class uniform_partition_uniform_distribution_data_samples(prob_uniform): """ - Set up :meth:`bet.calculateP.simpleFunP.uniform_data` on data domain. + Set up :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_data_samples` on data domain. """ def setUp(self): """ Set up problem. """ - self.rho_D_M, self.d_distr_samples, self.d_Tree = sFun.uniform_data(self.data) + self.data_prob = sFun.uniform_partition_uniform_distribution_data_samples(self.data) + self.d_distr_samples = self.data_prob.get_values() + self.rho_D_M = self.data_prob.get_probabilities() + self.data = self.data._values if type(self.Q_ref) != np.array: self.Q_ref = np.array([self.Q_ref]) @@ -815,50 +943,343 @@ def test_M(self): """ assert len(self.rho_D_M) == self.data.shape[0] -class test_uniform_data_01D(data_01D, uniform_data): +class test_uniform_partition_uniform_distribution_data_samples_01D(data_01D, + uniform_partition_uniform_distribution_data_samples): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_data_samples` on 01D data domain. + """ + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_uniform_distribution_data_samples_01D, self).createData() + super(test_uniform_partition_uniform_distribution_data_samples_01D, self).setUp() + +class test_uniform_partition_uniform_distribution_data_samples_1D(data_1D, + uniform_partition_uniform_distribution_data_samples): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_data` on 01D data domain. + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_data_samples` on 1D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_data_01D, self).createData() - super(test_uniform_data_01D, self).setUp() + super(test_uniform_partition_uniform_distribution_data_samples_1D, self).createData() + super(test_uniform_partition_uniform_distribution_data_samples_1D, self).setUp() + -class test_uniform_data_1D(data_1D, uniform_data): +class test_uniform_partition_uniform_distribution_data_samples_2D(data_2D, + uniform_partition_uniform_distribution_data_samples): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_data` on 1D data domain. + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_data_samples` on 2D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_data_1D, self).createData() - super(test_uniform_data_1D, self).setUp() + super(test_uniform_partition_uniform_distribution_data_samples_2D, self).createData() + super(test_uniform_partition_uniform_distribution_data_samples_2D, self).setUp() -class test_uniform_data_2D(data_2D, uniform_data): +class test_uniform_partition_uniform_distribution_data_samples_3D(data_3D, + uniform_partition_uniform_distribution_data_samples): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_data` on 2D data domain. + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_data_samples` on 3D data domain. """ def setUp(self): """ Set up problem. """ - super(test_uniform_data_2D, self).createData() - super(test_uniform_data_2D, self).setUp() + super(test_uniform_partition_uniform_distribution_data_samples_3D, self).createData() + super(test_uniform_partition_uniform_distribution_data_samples_3D, self).setUp() -class test_uniform_data_3D(data_3D, uniform_data): +class uniform_partition_uniform_distribution_rectangle_size(prob_uniform): """ - Tests :meth:`bet.calculateP.simpleFunP.uniform_data` on 3D data domain. + Set up :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_size` on data domain. """ + def setUp(self): """ Set up problem. """ - super(test_uniform_data_3D, self).createData() - super(test_uniform_data_3D, self).setUp() + self.data_prob = sFun.uniform_partition_uniform_distribution_rectangle_size( + self.data, self.Q_ref, rect_size=1.0, M=67, num_d_emulate=1E3) + self.d_distr_samples = self.data_prob.get_values() + self.rho_D_M = self.data_prob.get_probabilities() + + if type(self.Q_ref) != np.array: + self.Q_ref = np.array([self.Q_ref]) + if len(self.data_domain.shape) == 1: + self.data_domain = np.expand_dims(self.data_domain, axis=0) + + self.rect_domain = np.zeros((self.data_domain.shape[0], 2)) + + binsize = 1.0 + r_width = binsize * np.ones(self.data_domain[:, 1].shape) + + self.rect_domain[:, 0] = self.Q_ref - .5 * r_width + self.rect_domain[:, 1] = self.Q_ref + .5 * r_width + + def test_M(self): + """ + Test that the right number of d_distr_samples are used to create + rho_D_M. + """ + assert len(self.rho_D_M) == 67 + def test_domain(self): + """ + Test that the probabilities within the prescribed domain are non-zero + and that the probabilities outside of the prescribed domain are zero. + """ + # d_distr_samples are (mdim, M) + # rect_domain is (mdim, 2) + inside = np.logical_and(np.all(np.greater_equal(self.d_distr_samples, + self.rect_domain[:, 0]), axis=1), + np.all(np.less_equal(self.d_distr_samples, + self.rect_domain[:, 1]), axis=1)) + msg = "Due to the inherent randomness of this method, this may fail." + print msg + print np.sum(self.rho_D_M[inside] >= 0.0) + assert np.sum(self.rho_D_M[inside] >= 0.0) < 100 + print np.sum(self.rho_D_M[np.logical_not(inside)] == 0.0) + assert np.sum(self.rho_D_M[np.logical_not(inside)] == 0.0) < 100 + + +class test_uniform_partition_uniform_distribution_rectangle_size_01D(data_01D, + uniform_partition_uniform_distribution_rectangle_size): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_size` on 01D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_uniform_distribution_rectangle_size_01D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_size_01D, self).setUp() + + +class test_uniform_partition_uniform_distribution_rectangle_size_1D(data_1D, + uniform_partition_uniform_distribution_rectangle_size): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_size` on 1D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_uniform_distribution_rectangle_size_1D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_size_1D, self).setUp() + + +class test_uniform_partition_uniform_distribution_rectangle_size_2D(data_2D, + uniform_partition_uniform_distribution_rectangle_size): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_size` on 2D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_uniform_distribution_rectangle_size_2D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_size_2D, self).setUp() + + +class test_uniform_partition_uniform_distribution_rectangle_size_3D(data_3D, + uniform_partition_uniform_distribution_rectangle_size): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_size` on 3D data domain. + """ + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_uniform_distribution_rectangle_size_3D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_size_3D, self).setUp() + + +class uniform_partition_uniform_distribution_rectangle_domain(prob_uniform): + """ + Set up :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_domain` on data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + if type(self.Q_ref) != np.array: + Q_ref = np.array([self.Q_ref]) + else: + Q_ref = self.Q_ref + if len(self.data_domain.shape) == 1: + data_domain = np.expand_dims(self.data_domain, axis=0) + else: + data_domain = self.data_domain + + self.rect_domain = np.zeros((data_domain.shape[0], 2)) + r_width = 0.1 * data_domain[:, 1] + + self.rect_domain[:, 0] = Q_ref - .5 * r_width + self.rect_domain[:, 1] = Q_ref + .5 * r_width + + self.data_prob = sFun.uniform_partition_uniform_distribution_rectangle_domain( + self.data, self.rect_domain.transpose(), M=67, num_d_emulate=1E3) + self.d_distr_samples = self.data_prob.get_values() + self.rho_D_M = self.data_prob.get_probabilities() + + def test_M(self): + """ + Test that the right number of d_distr_samples are used to create + rho_D_M. + """ + assert len(self.rho_D_M) == 67 + + def test_domain(self): + """ + Test that the probabilities within the prescribed domain are non-zero + and that the probabilities outside of the prescribed domain are zero. + """ + # d_distr_samples are (mdim, M) + # rect_domain is (mdim, 2) + inside = np.logical_and(np.all(np.greater_equal(self.d_distr_samples, + self.rect_domain[:, 0]), axis=1), + np.all(np.less_equal(self.d_distr_samples, + self.rect_domain[:, 1]), axis=1)) + msg = "Due to the inherent randomness of this method, this may fail." + print msg + print np.sum(self.rho_D_M[inside] >= 0.0) + assert np.sum(self.rho_D_M[inside] >= 0.0) < 100 + print np.sum(self.rho_D_M[np.logical_not(inside)] == 0.0) + assert np.sum(self.rho_D_M[np.logical_not(inside)] == 0.0) < 100 + + +class test_uniform_partition_uniform_distribution_rectangle_domain_01D(data_01D, + uniform_partition_uniform_distribution_rectangle_domain): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_domain` on 01D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_uniform_distribution_rectangle_domain_01D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_domain_01D, self).setUp() + + +class test_uniform_partition_uniform_distribution_rectangle_domain_1D(data_1D, + uniform_partition_uniform_distribution_rectangle_domain): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_domain` on 1D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_uniform_distribution_rectangle_domain_1D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_domain_1D, self).setUp() + + +class test_uniform_partition_uniform_distribution_rectangle_domain_2D(data_2D, + uniform_partition_uniform_distribution_rectangle_domain): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_domain` on 2D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_uniform_distribution_rectangle_domain_2D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_domain_2D, self).setUp() + + +class test_uniform_partition_uniform_distribution_rectangle_domain_3D(data_3D, + uniform_partition_uniform_distribution_rectangle_domain): + """ + Tests :meth:`bet.calculateP.simpleFunP.uniform_partition_uniform_distribution_rectangle_domain` on 3D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_uniform_partition_uniform_distribution_rectangle_domain_3D, self).createData() + super(test_uniform_partition_uniform_distribution_rectangle_domain_3D, self).setUp() + + +class user_partition_user_distribution(prob): + """ + Set up :meth:`bet.calculateP.simpleFunP.user_partition_user_distribution` on data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + self.data_prob = sFun.user_partition_user_distribution(self.data, + self.data, + self.data) + self.rho_D_M = self.data_prob.get_probabilities() + self.d_distr_samples = self.data_prob.get_values() + +class test_user_partition_user_distribution_01D(data_01D, + user_partition_user_distribution): + """ + Tests :meth:`bet.calculateP.simpleFunP.user_partition_user_distribution` on 01D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_user_partition_user_distribution_01D, self).createData() + super(test_user_partition_user_distribution_01D, self).setUp() + + +class test_user_partition_user_distribution_1D(data_1D, + user_partition_user_distribution): + """ + Tests :meth:`bet.calculateP.simpleFunP.user_partition_user_distribution` on 1D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_user_partition_user_distribution_1D, self).createData() + super(test_user_partition_user_distribution_1D, self).setUp() + + +class test_user_partition_user_distribution_2D(data_2D, + user_partition_user_distribution): + """ + Tests :meth:`bet.calculateP.simpleFunP.user_partition_user_distribution` on 2D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_user_partition_user_distribution_2D, self).createData() + super(test_user_partition_user_distribution_2D, self).setUp() + + +class test_user_partition_user_distribution_3D(data_3D, + user_partition_user_distribution): + """ + Tests :meth:`bet.calculateP.simpleFunP.user_partition_user_distribution` on 3D data domain. + """ + + def setUp(self): + """ + Set up problem. + """ + super(test_user_partition_user_distribution_3D, self).createData() + super(test_user_partition_user_distribution_3D, self).setUp() diff --git a/test/test_calculateP/test_voronoiHistogram.py b/test/test_calculateP/test_voronoiHistogram.py index f8702618..39023249 100644 --- a/test/test_calculateP/test_voronoiHistogram.py +++ b/test/test_calculateP/test_voronoiHistogram.py @@ -573,7 +573,8 @@ def setUp(self): volume = 1.0/(H*(2.0**self.mdim)) volumes = volume.ravel() output = vHist.simple_fun_uniform(points, volumes, self.rect_domain) - self.rho_D_M, self.d_distr_samples, self.d_Tree = output + self.rho_D_M = output._probabilities + self.d_distr_samples = output._values class test_sfu_1D(domain_1D, simple_fun_uniform): """ diff --git a/test/test_postProcess/test_plotDomains.py b/test/test_postProcess/test_plotDomains.py index 1024398c..a9afd82f 100644 --- a/test/test_postProcess/test_plotDomains.py +++ b/test/test_postProcess/test_plotDomains.py @@ -1,6 +1,5 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team -# Lindley Graham 04/07/2015 """ This module contains tests for :module:`bet.postProcess.plotDomains`. @@ -16,6 +15,7 @@ import numpy as np import numpy.testing as nptest from bet.Comm import comm +import bet.sample as sample local_path = os.path.join(os.path.dirname(bet.__file__), "../test/test_sampling") @@ -31,20 +31,41 @@ def setUp(self): """ Set up problem. """ - self.lam_domain = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]) - self.samples = util.meshgrid_ndim((np.linspace(self.lam_domain[0][0], - self.lam_domain[0][1], 10), np.linspace(self.lam_domain[1][0], - self.lam_domain[1][1], 10), np.linspace(self.lam_domain[1][0], - self.lam_domain[1][1], 10), np.linspace(self.lam_domain[1][0], - self.lam_domain[1][1], 10))) - self.data = self.samples*3.0 - self.P_samples = (1.0/float(self.samples.shape[0]))*np.ones((self.samples.shape[0],)) + # Create sample_set object for input_samples + input_samples = sample.sample_set(4) + + input_samples.set_domain(np.array([[0.0, 1.0], [0.0, 1.0], + [0.0, 1.0], [0.0, 1.0]])) + input_samples.set_values(util.meshgrid_ndim( + (np.linspace(input_samples.get_domain()[0,0], + input_samples.get_domain()[0,1], 3), + np.linspace(input_samples.get_domain()[1,0], + input_samples.get_domain()[1,1], 3), + np.linspace(input_samples.get_domain()[2,0], + input_samples.get_domain()[2,1], 3), + np.linspace(input_samples.get_domain()[3,0], + input_samples.get_domain()[3,1], 3)))) + input_samples.set_probabilities( + (1.0/float(input_samples.get_values().shape[0])) + *np.ones((input_samples.get_values().shape[0],))) + + input_samples.check_num() # Check that probabilities and values arrays have same number of entries + + # Create sample_set object for output_samples + output_samples = sample.sample_set(4) + output_samples.set_values(input_samples.get_values()*3.0) + output_samples.set_domain(3.0*input_samples.get_domain()) + + self.disc = sample.discretization(input_samples, output_samples) + self.filename = "testfigure" - QoI_range = np.array([3.0, 3.0, 3.0, 3.0]) - Q_ref = QoI_range*0.5 - bin_size = 0.15*QoI_range + output_ref_datum = np.mean(output_samples.get_domain(), axis=1) + + bin_size = 0.15*(np.max(output_samples.get_domain(), axis=1) - + np.min(output_samples.get_domain(), axis=1)) maximum = 1/np.product(bin_size) + def ifun(outputs): """ Indicator function. @@ -53,22 +74,25 @@ def ifun(outputs): :rtype: :class:`numpy.ndarray` of shape (N,) :returns: 0 if outside of set or positive number if inside set """ - left = np.repeat([Q_ref-.5*bin_size], outputs.shape[0], 0) - right = np.repeat([Q_ref+.5*bin_size], outputs.shape[0], 0) + left = np.repeat([output_ref_datum-.5*bin_size], outputs.shape[0], 0) + right = np.repeat([output_ref_datum+.5*bin_size], outputs.shape[0], 0) left = np.all(np.greater_equal(outputs, left), axis=1) right = np.all(np.less_equal(outputs, right), axis=1) inside = np.logical_and(left, right) max_values = np.repeat(maximum, outputs.shape[0], 0) return inside.astype('float64')*max_values + self.rho_D = ifun self.lnums = [1, 2, 3] self.markers = [] + for m in Line2D.markers: try: if len(m) == 1 and m != ' ': self.markers.append(m) except TypeError: pass + self.colors = ('b', 'g', 'r', 'c', 'm', 'y', 'k') def tearDown(self): @@ -107,23 +131,28 @@ def test_scatter_2D(self): Test :meth:`bet.postProcess.plotDomains.scatter_2D` """ sample_nos = [None, 25] - p_ref = [None, self.samples[4, [0, 1]]] + p_ref = [None, self.disc._input_sample_set.get_values()[4, [0, 1]]] + #p_ref = [None, self.samples[4, [0, 1]]] for sn, pr in zip(sample_nos, p_ref): self.check_scatter_2D(sn, pr, True) def check_scatter_2D(self, sample_nos, p_ref, save): """ - Check to see that the :meth:`bet.postTools.plotDomains.scatter_2D` ran without generating an error. """ try: - plotDomains.scatter_2D(self.samples[:, [0, 1]], sample_nos, - self.P_samples, p_ref, save, False, 'XLABEL', 'YLABEL', - self.filename) + input_sample_set_temp = sample.sample_set(2) + input_sample_set_temp.set_values(self.disc._input_sample_set.get_values()[:, [0, 1]]) + plotDomains.scatter_2D( + input_sample_set_temp, + sample_nos, + self.disc._input_sample_set.get_probabilities(), + p_ref, save, False, 'XLABEL', 'YLABEL', self.filename) go = True except (RuntimeError, TypeError, NameError): go = False + nptest.assert_equal(go, True) def test_scatter_3D(self): @@ -131,7 +160,7 @@ def test_scatter_3D(self): Test :meth:`bet.postProcess.plotDomains.scatter_3D` """ sample_nos = [None, 25] - p_ref = [None, self.samples[4, :]] + p_ref = [None, self.disc._input_sample_set.get_values()[4, :]] for sn, pr in zip(sample_nos, p_ref): self.check_scatter_3D(sn, pr, True) @@ -141,21 +170,27 @@ def check_scatter_3D(self, sample_nos, p_ref, save): without generating an error. """ try: - plotDomains.scatter_3D(self.samples[:, [0, 1, 2]], sample_nos, - self.P_samples, p_ref, save, False, 'XLABEL', 'YLABEL', - 'ZLABEL', self.filename) + input_sample_set_temp = sample.sample_set(3) + input_sample_set_temp.set_values(self.disc._input_sample_set.get_values()[:, [0, 1, 2]]) + plotDomains.scatter_3D( + input_sample_set_temp, + sample_nos, + self.disc._input_sample_set.get_probabilities(), + p_ref, save, False, 'XLABEL', 'YLABEL', 'ZLABEL', self.filename) go = True except (RuntimeError, TypeError, NameError): go = False - nptest.assert_equal(go, True) + + nptest.assert_equal(go, True) def test_show_param(self): """ Test :meth:`bet.postProcess.plotDomains.show_param` """ sample_nos = [None, 25] - samples = [self.samples, self.samples[:, [0, 1]], - self.samples[:, [0, 1, 2]]] + samples = [self.disc._input_sample_set.get_values(), + self.disc._input_sample_set.get_values()[:, [0, 1]], + self.disc._input_sample_set.get_values()[:, [0, 1, 2]]] lnums = [None, self.lnums] for sample in samples: @@ -167,7 +202,7 @@ def test_show_param(self): for sd in showdim: p_ref = [None, sample[4, :]] for ln, sn, pr in zip(lnums, sample_nos, p_ref): - self.check_show_param(sample, sn, pr, True, ln, sd) + self.check_show_param(sample, sn, pr, True, ln, sd) def check_show_param(self, samples, sample_nos, p_ref, save, lnums, showdim): @@ -176,19 +211,26 @@ def check_show_param(self, samples, sample_nos, p_ref, save, lnums, without generating an error. """ try: - plotDomains.show_param(samples, self.data, self.rho_D, p_ref, - sample_nos, save, False, lnums, showdim) + input_sample_set_temp = sample.sample_set(samples.shape[1]) + input_sample_set_temp.set_values(samples) + disc_obj_temp = sample.discretization(input_sample_set_temp, + self.disc._output_sample_set) + plotDomains.show_param(disc_obj_temp, + self.rho_D, p_ref, sample_nos, save, + False, lnums, showdim) go = True except (RuntimeError, TypeError, NameError): go = False - nptest.assert_equal(go, True) + + nptest.assert_equal(go, True) def test_show_data(self): """ Test :meth:`bet.postProcess.plotDomains.show_data` """ sample_nos = [None, 25] - data_sets = [self.data, self.data[:, [0, 1]]] + data_sets = [self.disc._output_sample_set.get_values(), + self.disc._output_sample_set.get_values()[:, [0, 1]]] qnums = [None, [0, 1, 2]]#self.lnums] for data, qn, sn in zip(data_sets, qnums, sample_nos): @@ -208,10 +250,14 @@ def check_show_data(self, data, sample_nos, q_ref, save, qnums, showdim): """ try: if data.shape[1] == 4: - plotDomains.show_data(data, self.rho_D, q_ref, + data_obj_temp = sample.sample_set(4) + data_obj_temp.set_values(data) + plotDomains.show_data(data_obj_temp, self.rho_D, q_ref, sample_nos, save, False, qnums, showdim) else: - plotDomains.show_data(data, None, q_ref, + data_obj_temp = sample.sample_set(data.shape[1]) + data_obj_temp.set_values(data) + plotDomains.show_data(data_obj_temp, None, q_ref, sample_nos, save, False, qnums, showdim) go = True except (RuntimeError, TypeError, NameError): @@ -243,18 +289,23 @@ def check_show_data_domain_2D(self, ref_markers, ref_colors, triangles, :meth:`bet.postTools.plotDomains.show_data_domain_2D` ran without generating an error. """ - Q_ref = self.data[:, [0, 1]] + Q_ref = self.disc._output_sample_set.get_values()[:, [0, 1]] Q_ref = Q_ref[[1,4],:] - print Q_ref.shape - data = self.data[:, [0, 1]] + + data_obj_temp = sample.sample_set(2) + data_obj_temp.set_values(self.disc._output_sample_set.get_values()[:, [0, 1]]) + disc_obj_temp = sample.discretization(self.disc._input_sample_set,data_obj_temp) + try: - plotDomains.show_data_domain_2D(self.samples, data, Q_ref, - ref_markers, ref_colors, triangles=triangles, save=save, - filenames=filenames) + plotDomains.show_data_domain_2D( + disc_obj_temp, Q_ref, + ref_markers, ref_colors, triangles=triangles, save=save, + filenames=filenames) go = True except (RuntimeError, TypeError, NameError): go = False - nptest.assert_equal(go, True) + + nptest.assert_equal(go, True) def test_show_data_domain_multi(self): """ @@ -262,9 +313,11 @@ def test_show_data_domain_multi(self): """ if not os.path.exists('figs/'): os.mkdir('figs/') + Q_nums = [None, [1, 2], [1, 2, 3]] ref_markers = [None, self.markers] ref_colors = [None, self.colors] + for rm, rc in zip(ref_markers, ref_colors): for qn in Q_nums: showdim = [None, 1] @@ -280,11 +333,12 @@ def check_show_data_domain_multi(self, ref_markers, ref_colors, Q_nums, :meth:`bet.postTools.plotDomains.show_data_domain_multi` ran without generating an error. """ - Q_ref = self.data[[4, 2], :] + Q_ref = self.disc._output_sample_set.get_values()[[4, 2], :] try: - plotDomains.show_data_domain_multi(self.samples, self.data, - Q_ref, Q_nums, ref_markers=ref_markers, - ref_colors=ref_colors, showdim=showdim) + plotDomains.show_data_domain_multi( + self.disc, + Q_ref, Q_nums, ref_markers=ref_markers, + ref_colors=ref_colors, showdim=showdim) go = True except (RuntimeError, TypeError, NameError): go = False @@ -296,11 +350,16 @@ def test_scatter_param_multi(self): """ if not os.path.exists('figs/'): os.mkdir('figs/') + try: - plotDomains.scatter_param_multi(self.samples[:, [0,1,2]]) + input_sample_set_temp = sample.sample_set(3) + input_sample_set_temp.set_values(self.disc._input_sample_set.get_values()[:, [0,1,2]]) + + plotDomains.scatter_param_multi(input_sample_set_temp) go = True except (RuntimeError, TypeError, NameError): go = False + nptest.assert_equal(go, True) def test_scatter2D_multi(self): @@ -310,9 +369,13 @@ def test_scatter2D_multi(self): if not os.path.exists('figs/'): os.mkdir('figs/') try: - plotDomains.scatter2D_multi(self.samples[:, [0,1,2]]) + input_sample_set_temp = sample.sample_set(3) + input_sample_set_temp.set_values(self.disc._input_sample_set.get_values()[:, [0,1,2]]) + + plotDomains.scatter2D_multi(input_sample_set_temp) go = True except (RuntimeError, TypeError, NameError): go = False + nptest.assert_equal(go, True) diff --git a/test/test_postProcess/test_plotP.py b/test/test_postProcess/test_plotP.py index 98f856c0..d29c31df 100644 --- a/test/test_postProcess/test_plotP.py +++ b/test/test_postProcess/test_plotP.py @@ -1,6 +1,5 @@ # Copyright (C) 2014-2015 The BET Development Team -# Steven Mattis 04/07/2015 """ This module contains tests for :module:`bet.postProcess.plotP`. @@ -18,6 +17,7 @@ import bet.util as util from bet.Comm import comm import os +import bet.sample as sample class Test_calc_marg_1D(unittest.TestCase): @@ -29,19 +29,30 @@ def setUp(self): """ Set up problem. """ - self.lam_domain=np.array([[0.0,1.0]]) + emulated_input_samples = sample.sample_set(1) + emulated_input_samples.set_domain(np.array([[0.0, 1.0]])) + num_samples=1000 - self.samples = np.linspace(self.lam_domain[0][0], self.lam_domain[0][1], num_samples+1) - self.P_samples = 1.0/float(comm.size)*(1.0/float(self.samples.shape[0]))*np.ones((self.samples.shape[0],)) - + + emulated_input_samples.set_values_local(np.linspace(emulated_input_samples.get_domain()[0][0], + emulated_input_samples.get_domain()[0][1], + num_samples+1)) + + emulated_input_samples.set_probabilities_local(1.0/float(comm.size)*(1.0/float(\ + emulated_input_samples.get_values_local().shape[0]))\ + *np.ones((emulated_input_samples.get_values_local().shape[0],))) + + emulated_input_samples.check_num() + + self.samples = emulated_input_samples + def test_1_bin(self): """ Test that marginals sum to 1 and have correct shape. """ - (bins, marginals) = plotP.calculate_1D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_1D_marginal_probs(self.samples, nbins = 1) + nptest.assert_almost_equal(marginals[0][0], 1.0) nptest.assert_equal(marginals[0].shape, (1,)) @@ -49,35 +60,44 @@ def test_10_bins(self): """ Test that marginals sum to 1 and have correct shape. """ - (bins, marginals) = plotP.calculate_1D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_1D_marginal_probs(self.samples, nbins = 10) + nptest.assert_almost_equal(np.sum(marginals[0]), 1.0) nptest.assert_equal(marginals[0].shape, (10,)) class Test_calc_marg_2D(unittest.TestCase): """ - Test :meth:`bet.postProcess.plotP.calculate_1D_marginal_probs` and :meth:`bet.postProcess.plotP.calculate_2D_marginal_probs` for a 2D + Test :meth:`bet.postProcess.plotP.calculate_1D_marginal_probs` and + :meth:`bet.postProcess.plotP.calculate_2D_marginal_probs` for a 2D parameter space. """ def setUp(self): """ Set up problem. """ - self.lam_domain=np.array([[0.0,1.0],[0.0,1.0]]) - self.samples=util.meshgrid_ndim((np.linspace(self.lam_domain[0][0], self.lam_domain[0][1], 10),np.linspace(self.lam_domain[1][0], self.lam_domain[1][1], 10))) - self.P_samples = 1.0/float(comm.size)*(1.0/float(self.samples.shape[0]))*np.ones((self.samples.shape[0],)) - + emulated_input_samples = sample.sample_set(2) + emulated_input_samples.set_domain(np.array([[0.0,1.0],[0.0,1.0]])) + + emulated_input_samples.set_values_local(util.meshgrid_ndim((np.linspace(emulated_input_samples.get_domain()[0][0], + emulated_input_samples.get_domain()[0][1], 10), + np.linspace(emulated_input_samples.get_domain()[1][0], + emulated_input_samples.get_domain()[1][1], 10)))) + + emulated_input_samples.set_probabilities_local(1.0/float(comm.size)*\ + (1.0/float(emulated_input_samples.get_values_local().shape[0]))*\ + np.ones((emulated_input_samples.get_values_local().shape[0],))) + emulated_input_samples.check_num() + + self.samples = emulated_input_samples + def test_1_bin_1D(self): """ Test that 1D marginals sum to 1 and have right shape. """ - (bins, marginals) = plotP.calculate_1D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_1D_marginal_probs(self.samples, nbins = 1) - + nptest.assert_almost_equal(marginals[0][0], 1.0) nptest.assert_almost_equal(marginals[1][0], 1.0) nptest.assert_equal(marginals[0].shape, (1,)) @@ -87,10 +107,9 @@ def test_10_bins_1D(self): """ Test that 1D marginals sum to 1 and have right shape. """ - (bins, marginals) = plotP.calculate_1D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_1D_marginal_probs(self.samples, nbins = 10) + nptest.assert_almost_equal(np.sum(marginals[0]), 1.0) nptest.assert_almost_equal(np.sum(marginals[1]), 1.0) nptest.assert_equal(marginals[0].shape, (10,)) @@ -99,11 +118,9 @@ def test_1_bin_2D(self): """ Test that 2D marginals sum to 1 and have right shape. """ - (bins, marginals) = plotP.calculate_2D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_2D_marginal_probs(self.samples, nbins = 1) - + nptest.assert_almost_equal(marginals[(0,1)][0], 1.0) nptest.assert_equal(marginals[(0,1)].shape, (1,1)) @@ -111,10 +128,9 @@ def test_10_bins_2D(self): """ Test that 2D marginals sum to 1 and have right shape. """ - (bins, marginals) = plotP.calculate_2D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_2D_marginal_probs(self.samples, nbins = 10) + nptest.assert_almost_equal(np.sum(marginals[(0,1)]), 1.0) nptest.assert_equal(marginals[(0,1)].shape, (10,10)) @@ -122,10 +138,9 @@ def test_5_10_bins_2D(self): """ Test that 1D marginals sum to 1 and have right shape. """ - (bins, marginals) = plotP.calculate_2D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_2D_marginal_probs(self.samples, nbins = [5,10]) + nptest.assert_almost_equal(np.sum(marginals[(0,1)]), 1.0) nptest.assert_equal(marginals[(0,1)].shape, (5,10)) @@ -134,11 +149,11 @@ def test_1D_smoothing(self): """ Test :meth:`bet.postProcess.plotP.smooth_marginals_1D`. """ - (bins, marginals) = plotP.calculate_1D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_1D_marginal_probs(self.samples, nbins = 10) + marginals_smooth = plotP.smooth_marginals_1D(marginals, bins, sigma = 10.0) + nptest.assert_equal(marginals_smooth[0].shape, marginals[0].shape) nptest.assert_almost_equal(np.sum(marginals_smooth[0]), 1.0) @@ -146,11 +161,11 @@ def test_2D_smoothing(self): """ Test :meth:`bet.postProcess.plotP.smooth_marginals_2D`. """ - (bins, marginals) = plotP.calculate_2D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_2D_marginal_probs(self.samples, nbins = 10) + marginals_smooth = plotP.smooth_marginals_2D(marginals, bins, sigma = 10.0) + nptest.assert_equal(marginals_smooth[(0,1)].shape, marginals[(0,1)].shape) nptest.assert_almost_equal(np.sum(marginals_smooth[(0,1)]), 1.0) @@ -158,12 +173,12 @@ def test_plot_marginals_1D(self): """ Test :meth:`bet.postProcess.plotP.plot_1D_marginal_probs`. """ - (bins, marginals) = plotP.calculate_1D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_1D_marginal_probs(self.samples, nbins = 10) + try: - plotP.plot_1D_marginal_probs(marginals, bins,self.lam_domain, filename = "file", interactive=False) + plotP.plot_1D_marginal_probs(marginals, bins, self.samples, + filename = "file", interactive=False) go = True if os.path.exists("file_1D_0.eps"): os.remove("file_1D_0.eps") @@ -177,14 +192,13 @@ def test_plot_marginals_2D(self): """ Test :meth:`bet.postProcess.plotP.plot_2D_marginal_probs`. """ - (bins, marginals) = plotP.calculate_2D_marginal_probs(self.P_samples, - self.samples, - self.lam_domain, + (bins, marginals) = plotP.calculate_2D_marginal_probs(self.samples, nbins = 10) marginals[(0,1)][0][0]=0.0 marginals[(0,1)][0][1]*=2.0 try: - plotP.plot_2D_marginal_probs(marginals, bins,self.lam_domain, filename = "file", interactive=False) + plotP.plot_2D_marginal_probs(marginals, bins, self.samples, + filename = "file", interactive=False) go = True if os.path.exists("file_2D_0_1.eps"): os.remove("file_2D_0_1.eps") diff --git a/test/test_postProcess/test_postTools.py b/test/test_postProcess/test_postTools.py index d7bbe99b..4551f4b7 100644 --- a/test/test_postProcess/test_postTools.py +++ b/test/test_postProcess/test_postTools.py @@ -1,6 +1,5 @@ -# Copyright (C) 2014-2015 The BET Development Team +# Copyright (C) 2014-2016 The BET Development Team -# Steven Mattis 04/07/2015 """ This module contains tests for :module:`bet.postProcess.postTools`. @@ -14,8 +13,10 @@ import scipy.spatial as spatial import numpy.testing as nptest import bet.util as util -from bet.Comm import comm +from bet.Comm import comm +import bet.sample as sample +''' def test_in_high_prob(): """ @@ -23,14 +24,15 @@ def test_in_high_prob(): """ def rho_D(my_data): return my_data/4.0 - data = np.array([0, 1, 0, 1, 1, 1]) - maximum = np.max(rho_D(data)) + output_samples = sample.sample_set(1)\ + output_samples.set_values(np.array([0, 1, 0, 1, 1, 1])) + maximum = np.max(rho_D(output_samples.get_values())) print "maximum", maximum - assert 4 == postTools.in_high_prob(data, rho_D, maximum) - assert 3 == postTools.in_high_prob(data, rho_D, maximum, [3, 4, 5]) - assert 2 == postTools.in_high_prob(data, rho_D, maximum, [0, 1, 2, 3]) - assert 1 == postTools.in_high_prob(data, rho_D, maximum, [0, 2, 4]) - assert 0 == postTools.in_high_prob(data, rho_D, maximum, [0, 2]) + assert 4 == postTools.in_high_prob(output_samples.get_values(), rho_D, maximum) + assert 3 == postTools.in_high_prob(output_samples.get_values(), rho_D, maximum, [3, 4, 5]) + assert 2 == postTools.in_high_prob(output_samples.get_values(), rho_D, maximum, [0, 1, 2, 3]) + assert 1 == postTools.in_high_prob(output_samples.get_values(), rho_D, maximum, [0, 2, 4]) + assert 0 == postTools.in_high_prob(output_samples.get_values(), rho_D, maximum, [0, 2]) def test_in_high_prob_multi(): """ @@ -91,7 +93,7 @@ def test_compare_yield(): except (RuntimeError, TypeError, NameError): go = False nptest.assert_equal(go, True) - +''' class Test_PostTools(unittest.TestCase): """ @@ -101,96 +103,96 @@ def setUp(self): """ Set up problem. """ - self.lam_domain=np.array([[0.0,1.0]]) + input_samples = sample.sample_set(1) + input_samples.set_domain(np.array([[0.0,1.0]])) + #self.lam_domain=np.array([[0.0,1.0]]) num_samples=1000 - self.samples = np.linspace(self.lam_domain[0][0], self.lam_domain[0][1], num_samples+1) - self.P_samples = (1.0/float(self.samples.shape[0]))*np.ones((self.samples.shape[0],)) - self.P_samples[0] = 0.0 - self.P_samples[-1] *= 2.0 - - self.data = self.samples[:] + input_samples.set_values(np.linspace(input_samples.get_domain()[0,0], + input_samples.get_domain()[0,1], + num_samples+1)) + #self.samples = np.linspace(self.lam_domain[0][0], self.lam_domain[0][1], num_samples+1) + input_samples.set_probabilities((1.0/float(input_samples.get_values().shape[0]))* + np.ones((input_samples.get_values().shape[0],))) + #self.P_samples = (1.0/float(self.samples.shape[0]))*np.ones((self.samples.shape[0],)) + input_samples._probabilities[0] = 0.0 + input_samples._probabilities[-1] *= 2.0 + #self.P_samples[0] = 0.0 + #self.P_samples[-1] *= 2.0 + + self.data = input_samples + #self.data = self.samples[:] def test_sort_by_rho(self): """ Test :meth:`bet.postProcess.postTools.sort_by_rho`. """ - (P_samples, samples, _ , data, _) = postTools.sort_by_rho(self.P_samples, self.samples, - lam_vol=None, data=self.data) - self.assertGreater(np.min(P_samples),0.0) - nptest.assert_almost_equal(np.sum(P_samples),1.0) + (self.data, _) = postTools.sort_by_rho(self.data) + self.assertGreater(np.min(self.data.get_probabilities()),0.0) + nptest.assert_almost_equal(np.sum(self.data.get_probabilities()),1.0) def test_sample_prob(self): """ Test :meth:`bet.postProcess.postTools.sample_prob`. """ - (num_samples,P_samples, samples, _ , data, _) = postTools.sample_prob(1.0, - self.P_samples, - self.samples, - lam_vol=None, - data=self.data, - sort=True, - descending=True) - nptest.assert_almost_equal(np.sum(P_samples),1.0) + (num_samples, sample_set_out, _) = postTools.sample_prob(1.0, self.data, + sort=True, + descending=True) + + nptest.assert_almost_equal(np.sum(sample_set_out.get_probabilities()),1.0) nptest.assert_equal(num_samples,1000) - (num_samples,P_samples, samples, _ , data, _) = postTools.sample_prob(0.8, - self.P_samples, - self.samples, - lam_vol=None, - data=self.data, - sort=True, - descending=True) - nptest.assert_allclose(np.sum(P_samples),0.8,0.001) - - (num_samples,P_samples, samples, _ , data, _) = postTools.sample_prob(1.0, - self.P_samples, - self.samples, - lam_vol=None, - data=self.data, - sort=True, - descending=False) - nptest.assert_almost_equal(np.sum(P_samples),1.0) + (num_samples, sample_set_out, _) = postTools.sample_prob(0.8, + self.data, + sort=True, + descending=True) + + nptest.assert_allclose(np.sum(sample_set_out.get_probabilities()),0.8,0.001) + + (num_samples, sample_set_out, _) = postTools.sample_prob(1.0, + self.data, + sort=True, + descending=False) + + nptest.assert_almost_equal(np.sum(sample_set_out.get_probabilities()),1.0) nptest.assert_equal(num_samples,1000) - (num_samples,P_samples, samples, _ , data, _) = postTools.sample_prob(0.8, - self.P_samples, - self.samples, - lam_vol=None, - data=self.data, - sort=True, - descending=False) - nptest.assert_allclose(np.sum(P_samples),0.8,0.001) + (num_samples, sample_set_out, _) = postTools.sample_prob(0.8, + self.data, + sort=True, + descending=False) + + nptest.assert_allclose(np.sum(sample_set_out.get_probabilities()),0.8,0.001) def test_sample_highest_prob(self): """ Test :meth:`bet.postProcess.postTools.sample_highest_prob`. """ - (num_samples,P_samples, samples, _ , data, _) = postTools.sample_highest_prob(1.0, - self.P_samples, - self.samples, - lam_vol=None, data=self.data, sort=True) - nptest.assert_almost_equal(np.sum(P_samples),1.0) + (num_samples, sample_set_out, _) = postTools.sample_highest_prob(1.0, + self.data, + sort=True) + + nptest.assert_almost_equal(np.sum(sample_set_out.get_probabilities()),1.0) nptest.assert_equal(num_samples,1000) - (num_samples,P_samples, samples, _ , data, _) = postTools.sample_highest_prob(0.8, - self.P_samples, - self.samples, - lam_vol=None, data=self.data, sort=True) - nptest.assert_allclose(np.sum(P_samples),0.8,0.001) + (num_samples, sample_set_out, _) = postTools.sample_highest_prob(0.8, + self.data, + sort=True) + + nptest.assert_allclose(np.sum(sample_set_out.get_probabilities()),0.8,0.001) def test_sample_lowest_prob(self): """ Test :meth:`bet.postProcess.postTools.sample_lowest_prob`. """ - (num_samples,P_samples, samples, _ , data, _) = postTools.sample_lowest_prob(1.0, - self.P_samples, - self.samples, - lam_vol=None, data=self.data, sort=True) - nptest.assert_almost_equal(np.sum(P_samples),1.0) + (num_samples, sample_set_out, _) = postTools.sample_lowest_prob(1.0, + self.data, + sort=True) + + nptest.assert_almost_equal(np.sum(sample_set_out.get_probabilities()),1.0) nptest.assert_equal(num_samples,1000) - (num_samples,P_samples, samples, _ , data, _) = postTools.sample_lowest_prob(0.8, - self.P_samples, - self.samples, - lam_vol=None, data=self.data, sort=True) - nptest.assert_allclose(np.sum(P_samples),0.8,0.001) + (num_samples, sample_set_out, _) = postTools.sample_lowest_prob(0.8, + self.data, + sort=True) + + nptest.assert_allclose(np.sum(sample_set_out.get_probabilities()),0.8,0.001) diff --git a/test/test_sample.py b/test/test_sample.py new file mode 100644 index 00000000..bc84dad7 --- /dev/null +++ b/test/test_sample.py @@ -0,0 +1,584 @@ +# Copyright (C) 2016 The BET Development TEam + +# Steve Mattis 03/23/2016 + +import unittest, os +import numpy as np +import numpy.testing as nptest +import bet +import bet.sample as sample +import bet.util as util +from bet.Comm import comm, MPI + +#local_path = os.path.join(os.path.dirname(bet.__file__), "/test") +local_path = '' + +class Test_sample_set(unittest.TestCase): + def setUp(self): + self.dim = 2 + self.num = 100 + self.values = np.ones((self.num, self.dim)) + self.sam_set = sample.sample_set(dim=self.dim) + self.sam_set.set_values(self.values) + self.domain = np.array([[0, 1],[0, 1]], dtype=np.float) + def test_set_domain(self): + """ + Test set domain. + """ + self.sam_set.set_domain(self.domain) + nptest.assert_array_equal(self.sam_set._domain, self.domain) + def test_get_domain(self): + """ + Test get domain. + """ + self.sam_set.set_domain(self.domain) + nptest.assert_array_equal(self.sam_set.get_domain(), self.domain) + def test_save_load(self): + """ + Check save_sample_set and load_sample_set. + """ + prob = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_probabilities(prob) + vol = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_volumes(vol) + ee = np.ones((self.num, self.dim)) + self.sam_set.set_error_estimates(ee) + jac = np.ones((self.num, 3, self.dim)) + self.sam_set.set_jacobians(jac) + self.sam_set.global_to_local() + self.sam_set.set_domain(self.domain) + self.sam_set.update_bounds() + self.sam_set.update_bounds_local() + + if comm.rank == 0: + sample.save_sample_set(self.sam_set, os.path.join(local_path, + 'testfile.mat'), "TEST") + comm.barrier() + + loaded_set = sample.load_sample_set(os.path.join(local_path, + 'testfile.mat'), "TEST") + loaded_set_none = sample.load_sample_set(os.path.join(local_path, + 'testfile.mat')) + + assert loaded_set_none is None + + for attrname in sample.sample_set.vector_names+sample.sample_set.\ + all_ndarray_names: + curr_attr = getattr(loaded_set, attrname) + print attrname + if curr_attr is not None: + nptest.assert_array_equal(getattr(self.sam_set, attrname), + curr_attr) + + if comm.rank == 0 and os.path.exists(os.path.join(local_path, 'testfile.mat')): + os.remove(os.path.join(local_path, 'testfile.mat')) + + def test_copy(self): + """ + Check save_sample_set and load_sample_set. + """ + prob = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_probabilities(prob) + vol = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_volumes(vol) + ee = np.ones((self.num, self.dim)) + self.sam_set.set_error_estimates(ee) + jac = np.ones((self.num, 3, self.dim)) + self.sam_set.set_jacobians(jac) + self.sam_set.global_to_local() + self.sam_set.set_domain(self.domain) + self.sam_set.update_bounds() + self.sam_set.update_bounds_local() + self.sam_set.set_kdtree() + + copied_set = self.sam_set.copy() + for attrname in sample.sample_set.vector_names+sample.sample_set.\ + all_ndarray_names: + curr_attr = getattr(copied_set, attrname) + if curr_attr is not None: + nptest.assert_array_equal(getattr(self.sam_set, attrname), + curr_attr) + + assert copied_set._kdtree is not None + def test_update_bounds(self): + """ + Check update_bounds + """ + self.sam_set.set_domain(self.domain) + self.sam_set.update_bounds() + nptest.assert_array_equal(self.sam_set._left, + np.repeat([self.domain[:, 0]], self.num, 0)) + nptest.assert_array_equal(self.sam_set._right, + np.repeat([self.domain[:, 1]], self.num, 0)) + nptest.assert_array_equal(self.sam_set._width, + np.repeat([self.domain[:, 1] - self.domain[:, 0]], self.num, 0)) + o_num = 35 + self.sam_set.update_bounds(o_num) + nptest.assert_array_equal(self.sam_set._left, + np.repeat([self.domain[:, 0]], o_num, 0)) + nptest.assert_array_equal(self.sam_set._right, + np.repeat([self.domain[:, 1]], o_num, 0)) + nptest.assert_array_equal(self.sam_set._width, + np.repeat([self.domain[:, 1] - self.domain[:, 0]], o_num, 0)) + def test_update_bounds_local(self): + """ + Check update_bounds_local + """ + self.sam_set.global_to_local() + self.sam_set.set_domain(self.domain) + self.sam_set.update_bounds_local() + local_size = self.sam_set.get_values_local().shape[0] + nptest.assert_array_equal(self.sam_set._left_local, + np.repeat([self.domain[:, 0]], local_size, 0)) + nptest.assert_array_equal(self.sam_set._right_local, + np.repeat([self.domain[:, 1]], local_size, 0)) + nptest.assert_array_equal(self.sam_set._width_local, + np.repeat([self.domain[:, 1] - self.domain[:, 0]], local_size, + 0)) + o_num = 35 + self.sam_set.update_bounds_local(o_num) + nptest.assert_array_equal(self.sam_set._left_local, + np.repeat([self.domain[:, 0]], o_num, 0)) + nptest.assert_array_equal(self.sam_set._right_local, + np.repeat([self.domain[:, 1]], o_num, 0)) + nptest.assert_array_equal(self.sam_set._width_local, + np.repeat([self.domain[:, 1] - self.domain[:, 0]], o_num, 0)) + + def test_check_dim(self): + """ + Check set_dim + """ + self.assertEqual(self.dim, self.sam_set.get_dim()) + def test_set_values(self): + """ + Check set_values. + """ + values = np.ones((150, self.dim)) + self.sam_set.set_values(values) + nptest.assert_array_equal(util.fix_dimensions_data(values), + self.sam_set.get_values()) + def test_set_values_local(self): + """ + Check set_values_local. + """ + values = np.ones((15, self.dim)) + self.sam_set.set_values_local(values) + nptest.assert_array_equal(util.fix_dimensions_data(values), + self.sam_set.get_values_local()) + def test_get_values(self): + """ + Check get_samples. + """ + nptest.assert_array_equal(util.fix_dimensions_data(self.values), + self.sam_set.get_values()) + def test_get_shape(self): + """ + Check get_samples. + """ + nptest.assert_array_equal(util.fix_dimensions_data(self.values).shape, + self.sam_set.shape()) + def test_append_values(self): + """ + Check appending of values. + """ + new_values = np.zeros((10, self.dim)) + self.sam_set.append_values(new_values) + nptest.assert_array_equal(util.fix_dimensions_data(new_values), + self.sam_set.get_values()[self.num::, :]) + def test_append_values_local(self): + """ + Check appending of local values. + """ + new_values = np.zeros((10, self.dim)) + self.sam_set.global_to_local() + + local_size = self.sam_set.get_values_local().shape[0] + self.sam_set.append_values_local(new_values) + nptest.assert_array_equal(util.fix_dimensions_data(new_values), + self.sam_set.get_values_local()[local_size::, :]) + + def test_get_dim(self): + """ + Check to see if dimensions are correct. + """ + self.assertEqual(self.dim, self.sam_set.get_dim()) + def test_probabilities(self): + """ + Check probability methods + """ + prob = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_probabilities(prob) + self.sam_set.check_num() + nptest.assert_array_equal(prob, self.sam_set.get_probabilities()) + def test_volumes(self): + """ + Check volume methods + """ + vol = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_volumes(vol) + self.sam_set.check_num() + nptest.assert_array_equal(vol, self.sam_set.get_volumes()) + + def test_error_estimates(self): + """ + Check error estimate methods + """ + ee = np.ones((self.num, self.dim)) + self.sam_set.set_error_estimates(ee) + self.sam_set.check_num() + nptest.assert_array_equal(ee, self.sam_set.get_error_estimates()) + + def test_jacobian_methods(self): + """ + Check jacobian methods. + """ + jac = np.ones((self.num, 3, self.dim)) + self.sam_set.set_jacobians(jac) + self.sam_set.check_num() + nptest.assert_array_equal(jac, self.sam_set.get_jacobians()) + + def test_check_num(self): + """ + Check check_num. + """ + prob = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_probabilities(prob) + vol = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_volumes(vol) + ee = np.ones((self.num, self.dim)) + self.sam_set.set_error_estimates(ee) + jac = np.ones((self.num, 3, self.dim)) + self.sam_set.set_jacobians(jac) + num = self.sam_set.check_num() + self.assertEqual(self.num, num) + new_values = np.zeros((10, self.dim)) + self.sam_set.append_values(new_values) + self.assertRaises(sample.length_not_matching, self.sam_set.check_num) + + def test_kd_tree(self): + """ + Check features of the KD Tree + """ + self.sam_set.set_kdtree() + self.sam_set.get_kdtree() + + def test_parallel_features(self): + """ + Check parallel features. + """ + prob = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_probabilities(prob) + vol = 1.0/float(self.num)*np.ones((self.num,)) + self.sam_set.set_volumes(vol) + ee = np.ones((self.num, self.dim)) + self.sam_set.set_error_estimates(ee) + jac = np.ones((self.num, 3, self.dim)) + self.sam_set.set_jacobians(jac) + self.sam_set.global_to_local() + self.assertNotEqual(self.sam_set._values_local, None) + if comm.size > 1: + for array_name in sample.sample_set.array_names: + current_array = getattr(self.sam_set, array_name+"_local") + if current_array is not None: + self.assertGreater(getattr(self.sam_set, + array_name).shape[0], current_array.shape[0]) + local_size = current_array.shape[0] + num = comm.allreduce(local_size, op=MPI.SUM) + self.assertEqual(num, self.num) + current_array_global = util.get_global_values(current_array) + nptest.assert_array_equal(getattr(self.sam_set, + array_name), current_array_global) + if array_name is "_values": + assert self.sam_set.shape_local() == (local_size, + self.dim) + else: + for array_name in sample.sample_set.array_names: + current_array = getattr(self.sam_set, array_name+"_local") + if current_array is not None: + nptest.assert_array_equal(getattr(self.sam_set, + array_name), current_array) + if array_name is "_values": + assert self.sam_set.shape_local() == (self.num, + self.dim) + + for array_name in sample.sample_set.array_names: + current_array = getattr(self.sam_set, array_name) + if current_array is not None: + setattr(self.sam_set, array_name + "_old", current_array) + current_array = None + self.sam_set.local_to_global() + for array_name in sample.sample_set.array_names: + current_array = getattr(self.sam_set, array_name + "_local") + if current_array is not None: + nptest.assert_array_equal(getattr(self.sam_set, array_name), + getattr(self.sam_set, array_name + + "_old")) + def test_domain(self): + """ + Test domain information. + """ + domain = np.ones((self.dim, 2)) + self.sam_set.set_domain(domain) + nptest.assert_array_equal(domain, self.sam_set.get_domain()) + + +class Test_sample_set_1d(Test_sample_set): + def setUp(self): + self.dim = 1 + self.num = 100 + self.values = np.ones((self.num, self.dim)) + self.sam_set = sample.sample_set(dim=self.dim) + self.sam_set.set_values(self.values) + self.domain = np.array([[0, 1]], dtype=np.float) + +class Test_discretization_simple(unittest.TestCase): + def setUp(self): + self.dim1 = 3 + self.num = 100 + self.dim2 = 1 + values1 = np.ones((self.num, self.dim1)) + values2 = np.ones((self.num, self.dim2)) + values3 = np.ones((self.num, self.dim2)) + self.input_set = sample.sample_set(dim=self.dim1) + self.output_set = sample.sample_set(dim=self.dim2) + self.output_probability_set = sample.sample_set(dim=self.dim2) + self.input_set.set_values(values1) + self.output_set.set_values(values2) + self.output_probability_set.set_values(values3) + self.disc = sample.discretization(input_sample_set=self.input_set, + output_sample_set=self.output_set, + output_probability_set=self.output_probability_set) + + def Test_check_nums(self): + """ + Test number checking. + """ + num = self.disc.check_nums() + self.assertEqual(num, self.num) + + def Test_set_io_ptr(self): + """ + Test setting io ptr + """ + #TODO be careful if we change Kdtree + self.disc.set_io_ptr(globalize=True) + self.disc.get_io_ptr() + self.disc.set_io_ptr(globalize=False) + self.disc.get_io_ptr() + + def Test_set_emulated_ii_ptr(self): + """ + Test setting emulated ii ptr + """ + #TODO be careful if we change Kdtree + values = np.ones((10, self.dim1)) + self.emulated = sample.sample_set(dim=self.dim1) + self.emulated.set_values(values) + self.disc._emulated_input_sample_set = self.emulated + self.disc.set_emulated_ii_ptr(globalize=True) + self.disc.get_emulated_ii_ptr() + self.disc.set_emulated_ii_ptr(globalize=False) + self.disc._emulated_input_sample_set.local_to_global() + self.disc.get_emulated_ii_ptr() + + + def Test_set_emulated_oo_ptr(self): + """ + Test setting emulated oo ptr + """ + #TODO be careful if we change Kdtree + values = np.ones((3, self.dim2)) + self.emulated = sample.sample_set(dim=self.dim2) + self.emulated.set_values(values) + self.disc._emulated_output_sample_set = self.emulated + self.disc.set_emulated_oo_ptr(globalize=True) + self.disc.get_emulated_oo_ptr() + self.disc.set_emulated_oo_ptr(globalize=False) + self.disc.get_emulated_oo_ptr() + + def Test_save_load_discretization(self): + """ + Test saving and loading of discretization + """ + if comm.rank == 0: + sample.save_discretization(self.disc, os.path.join(local_path, + 'testfile.mat'), "TEST") + comm.barrier() + loaded_disc = sample.load_discretization(os.path.join(local_path, + 'testfile.mat'), "TEST") + + for attrname in sample.discretization.vector_names: + curr_attr = getattr(loaded_disc, attrname) + if curr_attr is not None: + nptest.assert_array_equal(curr_attr, getattr(self.disc, + attrname)) + + for attrname in sample.discretization.sample_set_names: + curr_set = getattr(loaded_disc, attrname) + if curr_set is not None: + for set_attrname in sample.sample_set.vector_names+\ + sample.sample_set.all_ndarray_names: + curr_attr = getattr(curr_set, set_attrname) + if curr_attr is not None: + nptest.assert_array_equal(curr_attr, getattr(\ + curr_set, set_attrname)) + comm.barrier() + if comm.rank == 0 and os.path.exists(os.path.join(local_path, 'testfile.mat')): + os.remove(os.path.join(local_path, 'testfile.mat')) + + def Test_copy_discretization(self): + """ + Test copying of discretization + """ + copied_disc = self.disc.copy() + + for attrname in sample.discretization.vector_names: + curr_attr = getattr(copied_disc, attrname) + if curr_attr is not None: + nptest.assert_array_equal(curr_attr, getattr(self.disc, + attrname)) + + for attrname in sample.discretization.sample_set_names: + curr_set = getattr(copied_disc, attrname) + if curr_set is not None: + for set_attrname in sample.sample_set.vector_names+\ + sample.sample_set.all_ndarray_names: + curr_attr = getattr(curr_set, set_attrname) + if curr_attr is not None: + nptest.assert_array_equal(curr_attr, getattr(\ + curr_set, set_attrname)) + +class TestEstimateVolume(unittest.TestCase): + """ + Test :meth:`bet.calculateP.calculateP.estimate_volulme`. + """ + + def setUp(self): + """ + Test dimension, number of samples, and that all the samples are within + lambda_domain. + """ + lam_left = np.array([0.0, .25, .4]) + lam_right = np.array([1.0, 4.0, .5]) + lam_width = lam_right-lam_left + + self.lam_domain = np.zeros((3, 2)) + self.lam_domain[:, 0] = lam_left + self.lam_domain[:, 1] = lam_right + + num_samples_dim = 2 + start = lam_left+lam_width/(2*num_samples_dim) + stop = lam_right-lam_width/(2*num_samples_dim) + d1_arrays = [] + + for l, r in zip(start, stop): + d1_arrays.append(np.linspace(l, r, num_samples_dim)) + + self.num_l_emulate = 1000001 + self.s_set = sample.sample_set(util.meshgrid_ndim(d1_arrays).shape[1]) + self.s_set.set_domain(self.lam_domain) + self.s_set.set_values(util.meshgrid_ndim(d1_arrays)) + print util.meshgrid_ndim(d1_arrays).shape + self.volume_exact = 1.0/self.s_set._values.shape[0] + self.s_set.estimate_volume(n_mc_points= 1001) + self.lam_vol = self.s_set._volumes + def test_dimension(self): + """ + Check the dimension. + """ + print self.lam_vol.shape, self.s_set._values.shape + nptest.assert_array_equal(self.lam_vol.shape, (len(self.s_set._values), )) + + def test_volumes(self): + """ + Check that the volumes are within a tolerance for a regular grid of + samples. + """ + nptest.assert_array_almost_equal(self.lam_vol, self.volume_exact, 1) + nptest.assert_almost_equal(np.sum(self.lam_vol), 1.0) + +class TestEstimateLocalVolume(unittest.TestCase): + """ + Test :meth:`bet.calculateP.calculateP.estimate_local_volulme`. + """ + + def setUp(self): + """ + Test dimension, number of samples, and that all the samples are within + lambda_domain. + + """ + lam_left = np.array([0.0, .25, .4]) + lam_right = np.array([1.0, 4.0, .5]) + lam_width = lam_right-lam_left + + self.lam_domain = np.zeros((3, 2)) + self.lam_domain[:, 0] = lam_left + self.lam_domain[:, 1] = lam_right + + num_samples_dim = 2 + start = lam_left+lam_width/(2*num_samples_dim) + stop = lam_right-lam_width/(2*num_samples_dim) + d1_arrays = [] + + for l, r in zip(start, stop): + d1_arrays.append(np.linspace(l, r, num_samples_dim)) + + self.s_set = sample.sample_set(util.meshgrid_ndim(d1_arrays).shape[1]) + self.s_set.set_domain(self.lam_domain) + self.s_set.set_values(util.meshgrid_ndim(d1_arrays)) + self.volume_exact = 1.0/self.s_set._values.shape[0] + self.s_set.estimate_local_volume() + self.lam_vol = self.s_set._volumes + + def test_dimension(self): + """ + Check the dimension. + """ + nptest.assert_array_equal(self.lam_vol.shape, (len(self.s_set._values), )) + + def test_volumes(self): + """ + Check that the volumes are within a tolerance for a regular grid of + samples. + """ + nptest.assert_array_almost_equal(self.lam_vol, self.volume_exact, 2) + nptest.assert_almost_equal(np.sum(self.lam_vol), 1.0) + + +class TestExactVolume1D(unittest.TestCase): + """ + Test :meth:`bet.calculateP.calculateP.exact_volume_1D`. + """ + + def setUp(self): + """ + Test dimension, number of samples, and that all the samples are within + lambda_domain. + """ + num_samples = 10 + self.lam_domain = np.array([[.0, .1]]) + edges = np.linspace(self.lam_domain[:, 0], self.lam_domain[:, 1], + num_samples+1) + self.samples = (edges[1:]+edges[:-1])*.5 + np.random.shuffle(self.samples) + self.volume_exact = 1./self.samples.shape[0] + self.volume_exact = self.volume_exact * np.ones((num_samples,)) + s_set = sample.voronoi_sample_set(dim = 1) + s_set.set_domain(self.lam_domain) + s_set.set_values(self.samples) + s_set.exact_volume_1D() + self.lam_vol = s_set.get_volumes() + def test_dimension(self): + """ + Check the dimension. + """ + nptest.assert_array_equal(self.lam_vol.shape, (len(self.samples), )) + + def test_volumes(self): + """ + Check that the volumes are within a tolerance for a regular grid of + samples. + """ + nptest.assert_array_almost_equal(self.lam_vol, self.volume_exact) + nptest.assert_almost_equal(np.sum(self.lam_vol), 1.0) diff --git a/test/test_sampling/__init__.py b/test/test_sampling/__init__.py index d601faf9..c061fcf8 100644 --- a/test/test_sampling/__init__.py +++ b/test/test_sampling/__init__.py @@ -3,4 +3,5 @@ """ This subpackage contains the test modules for the sampling subpackage. """ -__all__ = ['test_adaptiveSampling','test_basicSampling'] +__all__ = ['test_adaptiveSampling','test_basicSampling', + 'test_LpGeneralizedSamples'] diff --git a/test/test_sampling/test_Lp_generalized_samples.py b/test/test_sampling/test_Lp_generalized_samples.py new file mode 100644 index 00000000..caa7b4e0 --- /dev/null +++ b/test/test_sampling/test_Lp_generalized_samples.py @@ -0,0 +1,150 @@ +# Copyright (C) 2014-2015 The BET Development Team + +# Lindley Graham 04/07/2015 +""" +This module contains unittests for :mod:`~bet.sampling.basicSampling:` +""" + +import unittest, os, bet +import numpy.testing as nptest +import numpy as np +import scipy.io as sio +import bet.sampling.LpGeneralizedSamples as lp + + +def test_Lp_generalized_normal(): + """ + Tests :meth:`bet.Lp_generalized_samples.Lp_generalized_normal` + + This test only verifies the mean, but not the variance. + + """ + # 1D + nptest.assert_allclose(np.mean(lp.Lp_generalized_normal(1, 1000), 0), + np.zeros((1,)), atol=1e-1) + # 2D + nptest.assert_allclose(np.mean(lp.Lp_generalized_normal(2, 1000), 0), + np.zeros((2,)), atol=1e-1) + # 3D + nptest.assert_allclose(np.mean(lp.Lp_generalized_normal(3, 1000), 0), + np.zeros((3,)), atol=1e-1) + +def verify_norm_and_mean(x, r, p): + """ + + Verify that all of the samples in `x` are within the Lp ball centered at 0. + Verify the mean of `x` is zero. + + :param x: Array containing a set of samples + :type x: :class:`numpy.ndarry` of shape (num, dim) + :param float r: radius of the Lp ball + :param float p: 0 < p <= infinity, p of the Lp ball + + """ + if np.isinf(p): + xpnorm = np.max(np.abs(x), 1) + else: + xpnorm = np.sum(np.abs(x)**p, 1)**(1./p) + assert np.all(xpnorm <= r) + nptest.assert_allclose(np.mean(x, 0), np.zeros((x.shape[1],)), atol=1e-1) + +def verify_norm(x, r, p): + """ + + Verify that all of the samples in `x` are within the Lp ball centered at 0. + + :param x: Array containing a set of samples + :type x: :class:`numpy.ndarry` of shape (num, dim) + :param float r: radius of the Lp ball + :param float p: 0 < p <= infinity, p of the Lp ball + + """ + if np.isinf(p): + xpnorm = np.max(np.abs(x), 1) + else: + xpnorm = np.sum(np.abs(x)**p, 1)**(1./p) + assert np.all(xpnorm <= r) + +def test_Lp_generalized_uniform(): + """ + Tests :meth:`bet.Lp_generalized_samples.Lp_generalized_uniform` + + This test only verifies the mean, but not the variance. + + """ + # 1D + x = lp.Lp_generalized_uniform(1, 1000) + nptest.assert_allclose(np.mean(x, 0), np.zeros((1,)), atol=1e-1) + assert np.all(np.logical_and(x <= 1., x >= -1)) + + # 2D + p = 1 + x = lp.Lp_generalized_uniform(2, 1000, p) + verify_norm_and_mean(x, 1.0, p) + + p = 2 + x = lp.Lp_generalized_uniform(2, 1000, p) + verify_norm_and_mean(x, 1.0, p) + + p = 3 + x = lp.Lp_generalized_uniform(2, 1000, p) + verify_norm_and_mean(x, 1.0, p) + + p = np.inf + x = lp.Lp_generalized_uniform(2, 1000, p) + verify_norm_and_mean(x, 1.0, p) + + # 3D + p = 1 + x = lp.Lp_generalized_uniform(3, 1000, p) + verify_norm_and_mean(x, 1.0, p) + + p = 2 + x = lp.Lp_generalized_uniform(3, 1000, p) + verify_norm_and_mean(x, 1.0, p) + + p = 3 + x = lp.Lp_generalized_uniform(3, 1000, p) + verify_norm_and_mean(x, 1.0, p) + + p = np.inf + x = lp.Lp_generalized_uniform(3, 1000, p) + verify_norm_and_mean(x, 1.0, p) + +def test_Lp_generalized_beta(): + """ + Tests :meth:`bet.Lp_generalized_samples.Lp_generalized_beta` + + This test only verifies the mean, but not the variance. + + """ + # 1D + x = lp.Lp_generalized_beta(1, 1000) + nptest.assert_allclose(np.mean(x, 0), np.zeros((1,)), atol=1e-1) + assert np.all(np.logical_and(x <= 1., x >= -1)) + + # 2D + p = 1 + x = lp.Lp_generalized_beta(2, 1000, p) + verify_norm(x, 1.0, p) + + p = 2 + x = lp.Lp_generalized_beta(2, 1000, p) + verify_norm_and_mean(x, 1.0, p) + + p = 3 + x = lp.Lp_generalized_beta(2, 1000, p) + verify_norm(x, 1.0, p) + + # 3D + p = 1 + x = lp.Lp_generalized_beta(3, 1000, p) + verify_norm(x, 1.0, p) + + p = 2 + x = lp.Lp_generalized_beta(3, 1000, p) + verify_norm_and_mean(x, 1.0, p) + + p = 3 + x = lp.Lp_generalized_beta(3, 1000, p) + verify_norm(x, 1.0, p) diff --git a/test/test_sampling/test_adaptiveSampling.py b/test/test_sampling/test_adaptiveSampling.py index f9dfeca2..66679b18 100644 --- a/test/test_sampling/test_adaptiveSampling.py +++ b/test/test_sampling/test_adaptiveSampling.py @@ -14,6 +14,9 @@ import scipy.io as sio from bet.Comm import comm import bet +import bet.sample +from bet.sample import sample_set +from bet.sample import discretization as disc local_path = os.path.join(os.path.dirname(bet.__file__), "../test/test_sampling") @@ -26,13 +29,23 @@ def test_loadmat_init(): """ np.random.seed(1) chain_length = 10 - mdat1 = {'samples':np.random.random((50, 1)), - 'data':np.random.random((50, 1)), 'num_samples':50, - 'chain_length':chain_length} - mdat2 = {'samples':np.random.random((60, 1)), - 'num_samples':60, 'chain_length':chain_length} + + + mdat1 = {'num_samples':50, 'chain_length':chain_length} + mdat2 = {'num_samples':60, 'chain_length':chain_length} model = "this is not a model" - + + my_input1 = sample_set(1) + my_input1.set_values(np.random.random((50, 1))) + my_output = sample_set(1) + my_output.set_values(np.random.random((50, 1))) + my_input2 = sample_set(1) + my_input2.set_values(np.random.random((60, 1))) + + + sio.savemat(os.path.join(local_path, 'testfile1'), mdat1) + sio.savemat(os.path.join(local_path, 'testfile2'), mdat2) + num_samples = np.array([50, 60]) num_chains_pproc1, num_chains_pproc2 = np.ceil(num_samples/float(\ chain_length*comm.size)).astype('int') @@ -40,14 +53,18 @@ def test_loadmat_init(): num_chains_pproc2]) num_samples1, num_samples2 = chain_length * np.array([num_chains1, num_chains2]) - - sio.savemat(os.path.join(local_path, 'testfile1'), mdat1) - sio.savemat(os.path.join(local_path, 'testfile2'), mdat2) - (loaded_sampler1, samples1, data1) = asam.loadmat(os.path.join(local_path, + bet.sample.save_discretization(disc(my_input1, my_output), + os.path.join(local_path, 'testfile1')) + bet.sample.save_discretization(disc(my_input2, None), + os.path.join(local_path, 'testfile2')) + + (loaded_sampler1, discretization1) = asam.loadmat(os.path.join(local_path, 'testfile1')) - nptest.assert_array_equal(samples1, mdat1['samples']) - nptest.assert_array_equal(data1, mdat1['data']) + nptest.assert_array_equal(discretization1._input_sample_set.get_values(), + my_input1.get_values()) + nptest.assert_array_equal(discretization1._output_sample_set.get_values(), + my_output.get_values()) assert loaded_sampler1.num_samples == num_samples1 assert loaded_sampler1.chain_length == chain_length assert loaded_sampler1.num_chains_pproc == num_chains_pproc1 @@ -56,10 +73,11 @@ def test_loadmat_init(): loaded_sampler1.sample_batch_no) assert loaded_sampler1.lb_model == None - (loaded_sampler2, samples2, data2) = asam.loadmat(os.path.join(local_path, - 'testfile2'), model) - nptest.assert_array_equal(samples2, mdat2['samples']) - nptest.assert_array_equal(data2, None) + (loaded_sampler2, discretization2) = asam.loadmat(os.path.join(local_path, + 'testfile2'), lb_model=model) + nptest.assert_array_equal(discretization2._input_sample_set.get_values(), + my_input2.get_values()) + assert discretization2._output_sample_set is None assert loaded_sampler2.num_samples == num_samples2 assert loaded_sampler2.chain_length == chain_length assert loaded_sampler2.num_chains_pproc == num_chains_pproc2 @@ -71,7 +89,7 @@ def test_loadmat_init(): if os.path.exists(os.path.join(local_path, 'testfile2.mat')): os.remove(os.path.join(local_path, 'testfile2.mat')) -def verify_samples(QoI_range, sampler, param_min, param_max, +def verify_samples(QoI_range, sampler, input_domain, t_set, savefile, initial_sample_type, hot_start=0): """ Run :meth:`bet.sampling.adaptiveSampling.sampler.generalized_chains` and @@ -100,31 +118,33 @@ def ifun(outputs): if not hot_start: # run generalized chains - (samples, data, all_step_ratios) = sampler.generalized_chains(param_min, - param_max, t_set, kernel_rD, savefile, initial_sample_type) + (my_discretization, all_step_ratios) = sampler.generalized_chains(\ + input_domain, t_set, kernel_rD, savefile, initial_sample_type) else: # cold start sampler1 = asam.sampler(sampler.num_samples/2, sampler.chain_length/2, sampler.lb_model) - (samples, data, all_step_ratios) = sampler1.generalized_chains(\ - param_min, param_max, t_set, kernel_rD, savefile, - initial_sample_type) + (my_discretization, all_step_ratios) = sampler1.generalized_chains(\ + input_domain, t_set, kernel_rD, savefile, initial_sample_type) + comm.barrier() # hot start - (samples, data, all_step_ratios) = sampler.generalized_chains(\ - param_min, param_max, t_set, kernel_rD, savefile, - initial_sample_type, hot_start=hot_start) + (my_discretization, all_step_ratios) = sampler.generalized_chains(\ + input_domain, t_set, kernel_rD, savefile, initial_sample_type, + hot_start=hot_start) - # check dimensions of samples - assert samples.shape == (sampler.num_samples, len(param_min)) + # check dimensions of input and output + assert my_discretization.check_nums() - # are the samples in bounds? - param_left = np.repeat([param_min], sampler.num_samples, 0) - param_right = np.repeat([param_max], sampler.num_samples, 0) - assert np.all(samples <= param_right) - assert np.all(samples >= param_left) + # are the input in bounds? + input_left = np.repeat([input_domain[:, 0]], sampler.num_samples, 0) + input_right = np.repeat([input_domain[:, 1]], sampler.num_samples, 0) + assert np.all(my_discretization._input_sample_set.get_values() <= \ + input_right) + assert np.all(my_discretization._input_sample_set.get_values() >= \ + input_left) - # check dimensions of data - assert data.shape == (sampler.num_samples, len(QoI_range)) + # check dimensions of output + assert my_discretization._output_sample_set.get_dim() == len(QoI_range) # check dimensions of all_step_ratios assert all_step_ratios.shape == (sampler.num_chains, sampler.chain_length) @@ -134,11 +154,18 @@ def ifun(outputs): assert np.all(all_step_ratios <= t_set.max_ratio) # did the savefiles get created? (proper number, contain proper keys) - mdat = {} + comm.barrier() + mdat = dict() if comm.rank == 0: mdat = sio.loadmat(savefile) - nptest.assert_array_equal(samples, mdat['samples']) - nptest.assert_array_equal(data, mdat['data']) + saved_disc = bet.sample.load_discretization(savefile) + # compare the input + nptest.assert_array_equal(my_discretization._input_sample_set.\ + get_values(), saved_disc._input_sample_set.get_values()) + # compare the output + nptest.assert_array_equal(my_discretization._output_sample_set.\ + get_values(), saved_disc._output_sample_set.get_values()) + nptest.assert_array_equal(all_step_ratios, mdat['step_ratios']) assert sampler.chain_length == mdat['chain_length'] assert sampler.num_samples == mdat['num_samples'] @@ -157,39 +184,25 @@ def setUp(self): """ # create 1-1 map - self.param_min1 = np.zeros((1, )) - self.param_max1 = np.zeros((1, )) + self.input_domain1 = np.column_stack((np.zeros((1,)), np.ones((1,)))) def map_1t1(x): - """ - 1 to 1 map - """ - return x*2.0 + return np.sin(x) # create 3-1 map - self.param_min3 = np.zeros((3, )) - self.param_max3 = np.ones((3, )) + self.input_domain3 = np.column_stack((np.zeros((3,)), np.ones((3,)))) def map_3t1(x): - """ - 3 to 1 map - """ - return np.expand_dims(np.sum(x, 1), axis=1) + return np.sum(x, 1) # create 3-2 map def map_3t2(x): - """ - 3 to 2 map - """ return np.vstack(([x[:, 0]+x[:, 1], x[:, 2]])).transpose() # create 10-4 map - self.param_min10 = np.zeros((10, )) - self.param_max10 = np.ones((10, )) + self.input_domain10 = np.column_stack((np.zeros((10,)), np.ones((10,)))) def map_10t4(x): - """ - 10 to 4 map - """ x1 = x[:, 0] + x[:, 1] x2 = x[:, 2] + x[:, 3] x3 = x[:, 4] + x[:, 5] x4 = np.sum(x[:, [6, 7, 8, 9]], 1) return np.vstack([x1, x2, x3, x4]).transpose() + self.savefiles = ["11t11", "1t1", "3to1", "3to2", "10to4"] self.models = [map_1t1, map_1t1, map_3t1, map_3t2, map_10t4] self.QoI_range = [np.array([2.0]), np.array([2.0]), np.array([3.0]), @@ -209,16 +222,15 @@ def map_10t4(x): self.samplers.append(asam.sampler(num_samples, chain_length, model)) - self.param_min_list = [self.param_min1, self.param_min1, - self.param_min3, self.param_min3, self.param_min10] - self.param_max_list = [self.param_max1, self.param_max1, - self.param_max3, self.param_max3, self.param_max10] + self.input_domain_list = [self.input_domain1, self.input_domain1, + self.input_domain3, self.input_domain3, self.input_domain10] self.test_list = zip(self.models, self.QoI_range, self.samplers, - self.param_min_list, self.param_max_list, self.savefiles) + self.input_domain_list, self.savefiles) def tearDown(self): + comm.barrier() for f in self.savefiles: if comm.rank == 0 and os.path.exists(f+".mat"): os.remove(f+".mat") @@ -240,20 +252,19 @@ def test_update(self): nptest.assert_array_equal(self.samplers[0].sample_batch_no, np.repeat(range(self.samplers[0].num_chains), self.samplers[0].chain_length, 0)) - def test_run_gen(self): """ Run :meth:`bet.sampling.adaptiveSampling.sampler.run_gen` and verify that the output has the correct dimensions. """ - # sampler.run_gen(kern_list, rho_D, maximum, param_min, param_max, + # sampler.run_gen(kern_list, rho_D, maximum, input_domain, # t_set, savefile, initial_sample_type) - # returns list where each member is a tuple ((samples, data), + # returns list where each member is a tuple (discretization, # all_step_ratios, num_high_prob_samples, - # sorted_indices_of_num_high_prob_samples, average_step_ratio) - # create indicator function + # sorted_indices_of_num_high_prob_samples, average_step_ratio) create + # indicator function inputs = self.test_list[3] - _, QoI_range, sampler, param_min, param_max, savefile = inputs + _, QoI_range, sampler, input_domain, savefile = inputs Q_ref = QoI_range*0.5 bin_size = 0.15*QoI_range @@ -276,17 +287,18 @@ def ifun(outputs): t_set = asam.transition_set(.5, .5**5, 1.0) # run run_gen - output = sampler.run_gen(kern_list, ifun, maximum, param_min, - param_max, t_set, savefile) + output = sampler.run_gen(kern_list, ifun, maximum, input_domain, t_set, + savefile) results, r_step_size, results_rD, sort_ind, mean_ss = output for out in output: assert len(out) == 2 - for samples, data in results: - assert samples.shape == (sampler.num_samples, len(param_min)) - assert data.shape == (sampler.num_samples, len(QoI_range)) + for my_disc in results: + assert my_disc.check_nums + assert my_disc._input_sample_set.get_dim() == input_domain.shape[0] + assert my_disc._output_sample_set.get_dim() == len(QoI_range) for step_sizes in r_step_size: assert step_sizes.shape == (sampler.num_chains, sampler.chain_length) @@ -304,12 +316,12 @@ def test_run_tk(self): that the output has the correct dimensions. """ # sampler.run_tk(init_ratio, min_raio, max_ratio, rho_D, maximum, - # param_min, param_max, kernel, savefile, intial_sample_type) - # returns list where each member is a tuple ((samples, data), - # all_step_ratios, num_high_prob_samples, + # input_domain, kernel, savefile, intial_sample_type) + # returns list where each member is a tuple (discretization, + # all_step_ra)tios, num_high_prob_samples, # sorted_indices_of_num_high_prob_samples, average_step_ratio) inputs = self.test_list[3] - _, QoI_range, sampler, param_min, param_max, savefile = inputs + _, QoI_range, sampler, input_domain, savefile = inputs Q_ref = QoI_range*0.5 bin_size = 0.15*QoI_range @@ -334,19 +346,20 @@ def ifun(outputs): # run run_gen output = sampler.run_tk(init_ratio, min_ratio, max_ratio, ifun, - maximum, param_min, param_max, kernel_rD, savefile) + maximum, input_domain, kernel_rD, savefile) results, r_step_size, results_rD, sort_ind, mean_ss = output for out in output: assert len(out) == 3 - for samples, data in results: - assert samples.shape == (sampler.num_samples, len(param_min)) - assert data.shape == (sampler.num_samples, len(QoI_range)) + for my_disc in results: + assert my_disc.check_nums + assert my_disc._input_sample_set.get_dim() == input_domain.shape[0] + assert my_disc._output_sample_set.get_dim() == len(QoI_range) for step_sizes in r_step_size: assert step_sizes.shape == (sampler.num_chains, - sampler.chain_length) + sampler.chain_length) for num_hps in results_rD: assert isinstance(num_hps, int) for inds in sort_ind: @@ -361,13 +374,13 @@ def test_run_inc_dec(self): that the output has the correct dimensions. """ # sampler.run_inc_dec(increase, decrease, tolerance, rho_D, maximum, - # param_min, param_max, t_set, savefile, initial_sample_type) - # returns list where each member is a tuple ((samples, data), + # input_domain, t_set, savefile, initial_sample_type) + # returns list where each member is a tuple (discretization, # all_step_ratios, num_high_prob_samples, # sorted_indices_of_num_high_prob_samples, average_step_ratio) inputs = self.test_list[3] - _, QoI_range, sampler, param_min, param_max, savefile = inputs - + _, QoI_range, sampler, input_domain, savefile = inputs + Q_ref = QoI_range*0.5 bin_size = 0.15*QoI_range maximum = 1/np.product(bin_size) @@ -391,16 +404,17 @@ def ifun(outputs): # run run_gen output = sampler.run_inc_dec(increase, decrease, tolerance, ifun, - maximum, param_min, param_max, t_set, savefile) + maximum, input_domain, t_set, savefile) results, r_step_size, results_rD, sort_ind, mean_ss = output for out in output: assert len(out) == 3 - for samples, data in results: - assert samples.shape == (sampler.num_samples, len(param_min)) - assert data.shape == (sampler.num_samples, len(QoI_range)) + for my_disc in results: + assert my_disc.check_nums + assert my_disc._input_sample_set.get_dim() == input_domain.shape[0] + assert my_disc._output_sample_set.get_dim() == len(QoI_range) for step_sizes in r_step_size: assert step_sizes.shape == (sampler.num_chains, sampler.chain_length) @@ -420,16 +434,15 @@ def test_generalized_chains(self): # create a transition set t_set = asam.transition_set(.5, .5**5, 1.0) - for _, QoI_range, sampler, param_min, param_max, savefile in self.test_list: + for _, QoI_range, sampler, input_domain, savefile in self.test_list: for initial_sample_type in ["random", "r", "lhs"]: for hot_start in range(3): - print len(param_min) - verify_samples(QoI_range, sampler, param_min, param_max, + verify_samples(QoI_range, sampler, input_domain, t_set, savefile, initial_sample_type, hot_start) class test_kernels(unittest.TestCase): """ - Tests kernels for a 1d, 2d, 4d data space. + Tests kernels for a 1d, 2d, 4d output space. """ def setUp(self): """ @@ -440,7 +453,7 @@ def setUp(self): def test_list(self): """ - Run test for a 1d, 2d, and 4d data space. + Run test for a 1d, 2d, and 4d output space. """ for QoI_range in self.QoI_range: Q_ref = QoI_range*0.5 @@ -467,19 +480,19 @@ def verify_indiv(self, Q_ref, rhoD, maximum): assert isinstance(kern_list[1], asam.rhoD_kernel) assert isinstance(kern_list[2], asam.maxima_kernel) -class data_1D(object): +class output_1D(object): """ - Sets up 1D data domain problem. + Sets up 1D output domain problem. """ def createData(self): """ - Set up data. + Set up output. """ - self.data = np.random.random((100, 1))*10.0 + self.output = np.random.random((100, 1))*10.0 self.Q_ref = np.array([5.0]) - self.data_domain = np.expand_dims(np.array([0.0, 10.0]), axis=0) + self.output_domain = np.expand_dims(np.array([0.0, 10.0]), axis=0) self.mdim = 1 - bin_size = 0.15*self.data_domain[:, 1] + bin_size = 0.15*self.output_domain[:, 1] self.maximum = 1/np.product(bin_size) def ifun(outputs): """ @@ -492,19 +505,19 @@ def ifun(outputs): return inside.astype('float64')*max_values self.rho_D = ifun -class data_2D(object): +class output_2D(object): """ - Sets up 2D data domain problem. + Sets up 2D output domain problem. """ def createData(self): """ - Set up data. + Set up output. """ - self.data = np.random.random((100, 2))*10.0 + self.output = np.random.random((100, 2))*10.0 self.Q_ref = np.array([5.0, 5.0]) - self.data_domain = np.array([[0.0, 10.0], [0.0, 10.0]]) + self.output_domain = np.array([[0.0, 10.0], [0.0, 10.0]]) self.mdim = 2 - bin_size = 0.15*self.data_domain[:, 1] + bin_size = 0.15*self.output_domain[:, 1] self.maximum = 1/np.product(bin_size) def ifun(outputs): """ @@ -518,19 +531,19 @@ def ifun(outputs): self.rho_D = ifun -class data_3D(object): +class output_3D(object): """ - Sets up 3D data domain problem. + Sets up 3D output domain problem. """ def createData(self): """ - Set up data. + Set up output. """ - self.data = np.random.random((100, 3))*10.0 + self.output = np.random.random((100, 3))*10.0 self.Q_ref = np.array([5.0, 5.0, 5.0]) - self.data_domain = np.array([[0.0, 10.0], [0.0, 10.0], [0.0, 10.0]]) + self.output_domain = np.array([[0.0, 10.0], [0.0, 10.0], [0.0, 10.0]]) self.mdim = 3 - bin_size = 0.15*self.data_domain[:, 1] + bin_size = 0.15*self.output_domain[:, 1] self.maximum = 1/np.product(bin_size) def ifun(outputs): """ @@ -567,14 +580,14 @@ def test_delta_step(self): Test the delta_step method of :class:`bet.sampling.adaptiveSampling.kernel` """ - kern_new, proposal = self.kernel.delta_step(self.data) + kern_new, proposal = self.kernel.delta_step(self.output) assert kern_new == None - assert proposal.shape == (self.data.shape[0],) + assert proposal.shape == (self.output.shape[0],) -class test_kernel_1D(kernel, data_1D): +class test_kernel_1D(kernel, output_1D): """ - Test :class:`bet.sampling.adaptiveSampling.kernel` on a 1D data space. + Test :class:`bet.sampling.adaptiveSampling.kernel` on a 1D output space. """ def setUp(self): """ @@ -583,9 +596,9 @@ def setUp(self): super(test_kernel_1D, self).createData() super(test_kernel_1D, self).setUp() -class test_kernel_2D(kernel, data_2D): +class test_kernel_2D(kernel, output_2D): """ - Test :class:`bet.sampling.adaptiveSampling.kernel` on a 2D data space. + Test :class:`bet.sampling.adaptiveSampling.kernel` on a 2D output space. """ def setUp(self): """ @@ -594,9 +607,9 @@ def setUp(self): super(test_kernel_2D, self).createData() super(test_kernel_2D, self).setUp() -class test_kernel_3D(kernel, data_3D): +class test_kernel_3D(kernel, output_3D): """ - Test :class:`bet.sampling.adaptiveSampling.kernel` on a 3D data space. + Test :class:`bet.sampling.adaptiveSampling.kernel` on a 3D output space. """ def setUp(self): """ @@ -633,19 +646,20 @@ def test_delta_step(self): Test the delta_step method of :class:`bet.sampling.adaptiveSampling.rhoD_kernel` """ - kern_new, proposal = self.kernel.delta_step(self.data) - nptest.assert_array_equal(kern_new, self.rho_D(self.data)) + kern_new, proposal = self.kernel.delta_step(self.output) + nptest.assert_array_equal(kern_new, self.rho_D(self.output)) assert proposal == None - data = np.vstack([self.Q_ref+3.0, self.Q_ref, self.Q_ref-3.0]) - data_new = np.vstack([self.Q_ref, self.Q_ref+3.0, self.Q_ref-3.0]) - kern_old = self.rho_D(data) - kern_new, proposal = self.kernel.delta_step(data_new, kern_old) + output = np.vstack([self.Q_ref+3.0, self.Q_ref, self.Q_ref-3.0]) + output_new = np.vstack([self.Q_ref, self.Q_ref+3.0, self.Q_ref-3.0]) + kern_old = self.rho_D(output) + kern_new, proposal = self.kernel.delta_step(output_new, kern_old) nptest.assert_array_equal(proposal, [0.5, 2.0, 1.0]) -class test_rhoD_kernel_1D(rhoD_kernel, data_1D): +class test_rhoD_kernel_1D(rhoD_kernel, output_1D): """ - Test :class:`bet.sampling.adaptiveSampling.rhoD_kernel` on a 1D data space. + Test :class:`bet.sampling.adaptiveSampling.rhoD_kernel` on a 1D output + space. """ def setUp(self): """ @@ -654,9 +668,10 @@ def setUp(self): super(test_rhoD_kernel_1D, self).createData() super(test_rhoD_kernel_1D, self).setUp() -class test_rhoD_kernel_2D(rhoD_kernel, data_2D): +class test_rhoD_kernel_2D(rhoD_kernel, output_2D): """ - Test :class:`bet.sampling.adaptiveSampling.rhoD_kernel` on a 2D data space. + Test :class:`bet.sampling.adaptiveSampling.rhoD_kernel` on a 2D output + space. """ def setUp(self): """ @@ -665,9 +680,10 @@ def setUp(self): super(test_rhoD_kernel_2D, self).createData() super(test_rhoD_kernel_2D, self).setUp() -class test_rhoD_kernel_3D(rhoD_kernel, data_3D): +class test_rhoD_kernel_3D(rhoD_kernel, output_3D): """ - Test :class:`bet.sampling.adaptiveSampling.rhoD_kernel` on a 3D data space. + Test :class:`bet.sampling.adaptiveSampling.rhoD_kernel` on a 3D output + space. """ def setUp(self): """ @@ -707,23 +723,23 @@ def test_delta_step(self): Test the delta_step method of :class:`bet.sampling.adaptiveSampling.maxima_kernel` """ - data_old = np.vstack([self.Q_ref+3.0, self.Q_ref, self.Q_ref-3.0]) - kern_old, proposal = self.kernel.delta_step(data_old) + output_old = np.vstack([self.Q_ref+3.0, self.Q_ref, self.Q_ref-3.0]) + kern_old, proposal = self.kernel.delta_step(output_old) # TODO: check kern_old - #nptest.assert_array_equal(kern_old, np.zeros((self.data.shape[0],)) + #nptest.assert_array_equal(kern_old, np.zeros((self.output.shape[0],)) assert proposal == None - data_new = np.vstack([self.Q_ref, self.Q_ref+3.0, self.Q_ref-3.0]) - kern_new, proposal = self.kernel.delta_step(data_new, kern_old) + output_new = np.vstack([self.Q_ref, self.Q_ref+3.0, self.Q_ref-3.0]) + kern_new, proposal = self.kernel.delta_step(output_new, kern_old) #TODO: check kern_new #nptest.assert_array_eqyal(kern_new, something) nptest.assert_array_equal(proposal, [0.5, 2.0, 1.0]) -class test_maxima_kernel_1D(maxima_kernel, data_1D): +class test_maxima_kernel_1D(maxima_kernel, output_1D): """ - Test :class:`bet.sampling.adaptiveSampling.maxima_kernel` on a 1D data + Test :class:`bet.sampling.adaptiveSampling.maxima_kernel` on a 1D output space. """ def setUp(self): @@ -733,9 +749,9 @@ def setUp(self): super(test_maxima_kernel_1D, self).createData() super(test_maxima_kernel_1D, self).setUp() -class test_maxima_kernel_2D(maxima_kernel, data_2D): +class test_maxima_kernel_2D(maxima_kernel, output_2D): """ - Test :class:`bet.sampling.adaptiveSampling.maxima_kernel` on a 2D data + Test :class:`bet.sampling.adaptiveSampling.maxima_kernel` on a 2D output space. """ def setUp(self): @@ -745,9 +761,9 @@ def setUp(self): super(test_maxima_kernel_2D, self).createData() super(test_maxima_kernel_2D, self).setUp() -class test_maxima_kernel_3D(maxima_kernel, data_3D): +class test_maxima_kernel_3D(maxima_kernel, output_3D): """ - Test :class:`bet.sampling.adaptiveSampling.maxima_kernel` on a 3D data + Test :class:`bet.sampling.adaptiveSampling.maxima_kernel` on a 3D output space. """ def setUp(self): @@ -800,10 +816,10 @@ def test_delta_step(self): # check self.radius # check self.mean -class test_maxima_mean_kernel_1D(maxima_mean_kernel, data_1D): +class test_maxima_mean_kernel_1D(maxima_mean_kernel, output_1D): """ - Test :class:`bet.sampling.adaptiveSampling.maxima_mean_kernel` on a 1D data - space. + Test :class:`bet.sampling.adaptiveSampling.maxima_mean_kernel` on a 1D + output space. """ def setUp(self): """ @@ -812,10 +828,10 @@ def setUp(self): super(test_maxima_mean_kernel_1D, self).createData() super(test_maxima_mean_kernel_1D, self).setUp() -class test_maxima_mean_kernel_2D(maxima_mean_kernel, data_2D): +class test_maxima_mean_kernel_2D(maxima_mean_kernel, output_2D): """ - Test :class:`bet.sampling.adaptiveSampling.maxima_mean_kernel` on a 2D data - space. + Test :class:`bet.sampling.adaptiveSampling.maxima_mean_kernel` on a 2D + output space. """ def setUp(self): """ @@ -824,10 +840,10 @@ def setUp(self): super(test_maxima_mean_kernel_2D, self).createData() super(test_maxima_mean_kernel_2D, self).setUp() -class test_maxima_mean_kernel_3D(maxima_mean_kernel, data_3D): +class test_maxima_mean_kernel_3D(maxima_mean_kernel, output_3D): """ - Test :class:`bet.sampling.adaptiveSampling.maxima_mean_kernel` on a 3D data - space. + Test :class:`bet.sampling.adaptiveSampling.maxima_mean_kernel` on a 3D + output space. """ def setUp(self): """ @@ -846,52 +862,60 @@ def setUp(self): Set Up """ self.t_set = asam.transition_set(.5, .5**5, 1.0) + self.output_set = sample_set(self.mdim) + self.output_set.set_values(self.output) + self.output_set.global_to_local() + # Update _right_local, _left_local, _width_local + self.output_set.set_domain(self.output_domain) + self.output_set.update_bounds() + self.output_set.update_bounds_local() def test_init(self): """ Tests the initialization of - :class:`bet.sampling.adaptiveSamplinng.transition_set` + :class:`bet.sampling.adaptiveSampling.transition_set` """ assert self.t_set.init_ratio == .5 assert self.t_set.min_ratio == .5**5 assert self.t_set.max_ratio == 1.0 - + def test_step(self): """ Tests the method :meth:`bet.sampling.adaptiveSampling.transition_set.step` """ - # define step_ratio, param_width, param_left, param_right, samples_old - # from data - param_left = np.repeat([self.data_domain[:, 0]], self.data.shape[0], 0) - param_right = np.repeat([self.data_domain[:, 1]], self.data.shape[0], 0) - param_width = param_right - param_left - - step_ratio = 0.5*np.ones(self.data.shape[0],) - step_ratio[self.data.shape[0]/2:] = .1 - step_size = np.repeat([step_ratio], param_width.shape[1], - 0).transpose()*param_width + # define step_ratio from output_set + local_num = self.output_set._values_local.shape[0] + step_ratio = 0.5*np.ones(local_num,) + step_ratio[local_num/2:] = .1 + step_size = np.repeat([step_ratio], self.output_set.get_dim(), + 0).transpose()*self.output_set._width_local # take a step - samples_new = self.t_set.step(step_ratio, param_width, param_left, - param_right, self.data) + samples_new = self.t_set.step(step_ratio, self.output_set) # make sure the proposed steps are inside the domain # check dimensions of samples - assert samples_new.shape == self.data.shape + assert samples_new.shape() == self.output_set.shape() # are the samples in bounds? - assert np.all(samples_new <= param_right) - assert np.all(samples_new >= param_left) + assert np.all(samples_new.get_values_local() <=\ + self.output_set._right_local) + assert np.all(samples_new.get_values_local() >=\ + self.output_set._left_local) # make sure the proposed steps are inside the box defined around their # generating old samples - assert np.all(samples_new <= self.data+0.5*step_size) - assert np.all(samples_new >= self.data-0.5*step_size) + assert np.all(samples_new.get_values_local() <= + self.output_set.get_values_local()\ + +0.5*step_size) + assert np.all(samples_new.get_values_local() >= + self.output_set.get_values_local()\ + -0.5*step_size) -class test_transition_set_1D(transition_set, data_1D): +class test_transition_set_1D(transition_set, output_1D): """ - Test :class:`bet.sampling.adaptiveSampling.transition_set` on a 1D data + Test :class:`bet.sampling.adaptiveSampling.transition_set` on a 1D output space. """ def setUp(self): @@ -901,9 +925,9 @@ def setUp(self): super(test_transition_set_1D, self).createData() super(test_transition_set_1D, self).setUp() -class test_transition_set_2D(transition_set, data_2D): +class test_transition_set_2D(transition_set, output_2D): """ - Test :class:`bet.sampling.adaptiveSampling.transition_set` on a 2D data + Test :class:`bet.sampling.adaptiveSampling.transition_set` on a 2D output space. """ def setUp(self): @@ -913,9 +937,9 @@ def setUp(self): super(test_transition_set_2D, self).createData() super(test_transition_set_2D, self).setUp() -class test_transition_set_3D(transition_set, data_3D): +class test_transition_set_3D(transition_set, output_3D): """ - Test :class:`bet.sampling.adaptiveSampling.transition_set` on a 3D data + Test :class:`bet.sampling.adaptiveSampling.transition_set` on a 3D output space. """ def setUp(self): diff --git a/test/test_sampling/test_basicSampling.py b/test/test_sampling/test_basicSampling.py index 0fd2c140..abb90623 100644 --- a/test/test_sampling/test_basicSampling.py +++ b/test/test_sampling/test_basicSampling.py @@ -5,14 +5,19 @@ This module contains unittests for :mod:`~bet.sampling.basicSampling:` """ -import unittest, os, bet, pyDOE +import unittest, os, pyDOE import numpy.testing as nptest import numpy as np -import bet.sampling.basicSampling as bsam import scipy.io as sio +import bet +import bet.sampling.basicSampling as bsam from bet.Comm import comm +import bet.sample +from bet.sample import sample_set +from bet.sample import discretization as disc +import collections -local_path = os.path.join(os.path.dirname(bet.__file__), "../test/test_sampling") +local_path = os.path.join(".") @unittest.skipIf(comm.size > 1, 'Only run in serial') @@ -21,25 +26,41 @@ def test_loadmat(): Tests :meth:`bet.sampling.basicSampling.loadmat` """ np.random.seed(1) - mdat1 = {'samples':np.random.random((5,1)), - 'data':np.random.random((5,1)), 'num_samples':5} - mdat2 = {'samples':np.random.random((6,1)), 'num_samples':6} + mdat1 = {'num_samples':5} + mdat2 = {'num_samples':6} model = "this is not a model" + my_input1 = sample_set(1) + my_input1.set_values(np.random.random((5,1))) + my_output = sample_set(1) + my_output.set_values(np.random.random((5,1))) + my_input2 = sample_set(1) + my_input2.set_values(np.random.random((6,1))) + + sio.savemat(os.path.join(local_path, 'testfile1'), mdat1) sio.savemat(os.path.join(local_path, 'testfile2'), mdat2) - (loaded_sampler1, samples1, data1) = bsam.loadmat(os.path.join(local_path, + + bet.sample.save_discretization(disc(my_input1, my_output), + (os.path.join(local_path, 'testfile1'))) + bet.sample.save_discretization(disc(my_input2, None), + os.path.join(local_path, 'testfile2'), "NAME") + + (loaded_sampler1, discretization1) = bsam.loadmat(os.path.join(local_path, 'testfile1')) - nptest.assert_array_equal(samples1, mdat1['samples']) - nptest.assert_array_equal(data1, mdat1['data']) + nptest.assert_array_equal(discretization1._input_sample_set.get_values(), + my_input1.get_values()) + nptest.assert_array_equal(discretization1._output_sample_set.get_values(), + my_output.get_values()) assert loaded_sampler1.num_samples == 5 - assert loaded_sampler1.lb_model == None + assert loaded_sampler1.lb_model is None - (loaded_sampler2, samples2, data2) = bsam.loadmat(os.path.join(local_path, - 'testfile2'), model) - nptest.assert_array_equal(samples2, mdat2['samples']) - nptest.assert_array_equal(data2, None) + (loaded_sampler2, discretization2) = bsam.loadmat(os.path.join(local_path, + 'testfile2'), disc_name="NAME", model=model) + nptest.assert_array_equal(discretization2._input_sample_set.get_values(), + my_input2.get_values()) + assert discretization2._output_sample_set is None assert loaded_sampler2.num_samples == 6 assert loaded_sampler2.lb_model == model if os.path.exists(os.path.join(local_path, 'testfile1.mat')): @@ -47,80 +68,447 @@ def test_loadmat(): if os.path.exists(os.path.join(local_path, 'testfile2.mat')): os.remove(os.path.join(local_path, 'testfile2.mat')) -def verify_user_samples(model, sampler, samples, savefile, parallel): +def verify_compute_QoI_and_create_discretization(model, sampler, + input_sample_set, + savefile, parallel): + """ + Verify that the user samples are correct. + """ # evalulate the model at the samples directly - data = model(samples) + output_values = (model(input_sample_set._values)) + if len(output_values.shape) == 1: + output_sample_set = sample_set(1) + else: + output_sample_set = sample_set(output_values.shape[1]) + output_sample_set.set_values(output_values) + discretization = disc(input_sample_set, output_sample_set) # evaluate the model at the samples - (my_samples, my_data) = sampler.user_samples(samples, savefile, - parallel) + my_discretization = sampler.compute_QoI_and_create_discretization( + input_sample_set, savefile, + parallel) + my_num = my_discretization.check_nums() - if len(data.shape) == 1: - data = np.expand_dims(data, axis=1) - if len(samples.shape) == 1: - samples = np.expand_dims(samples, axis=1) - # compare the samples - nptest.assert_array_equal(samples, my_samples) + nptest.assert_array_equal(my_discretization._input_sample_set.get_values(), + discretization._input_sample_set.get_values()) # compare the data - nptest.assert_array_equal(data, my_data) + nptest.assert_array_equal(my_discretization._output_sample_set.get_values(), + discretization._output_sample_set.get_values()) + # did num_samples get updated? - assert samples.shape[0] == sampler.num_samples + assert my_num == sampler.num_samples + # did the file get correctly saved? - + comm.barrier() if comm.rank == 0: - mdat = sio.loadmat(savefile) - nptest.assert_array_equal(samples, mdat['samples']) - nptest.assert_array_equal(data, mdat['data']) + print "ONE" + saved_disc = bet.sample.load_discretization(savefile) + # compare the samples + nptest.assert_array_equal(my_discretization._input_sample_set.get_values(), + saved_disc._input_sample_set.get_values()) + # compare the data + nptest.assert_array_equal(my_discretization._output_sample_set.get_values(), + saved_disc._output_sample_set.get_values()) comm.Barrier() -def verify_random_samples(model, sampler, sample_type, param_min, param_max, +def verify_create_random_discretization(model, sampler, sample_type, input_domain, num_samples, savefile, parallel): + + np.random.seed(1) # recreate the samples - if num_samples == None: + if num_samples is None: num_samples = sampler.num_samples - param_left = np.repeat([param_min], num_samples, 0) - param_right = np.repeat([param_max], num_samples, 0) - samples = (param_right-param_left) + + input_sample_set = sample_set(input_domain.shape[0]) + input_sample_set.set_domain(input_domain) + + input_left = np.repeat([input_domain[:, 0]], num_samples, 0) + input_right = np.repeat([input_domain[:, 1]], num_samples, 0) + + input_values = (input_right-input_left) if sample_type == "lhs": - samples = samples * pyDOE.lhs(param_min.shape[-1], num_samples) + input_values = input_values * pyDOE.lhs(input_sample_set.get_dim(), + num_samples, 'center') elif sample_type == "random" or "r": - np.random.seed(1) - samples = samples * np.random.random(param_left.shape) - samples = samples + param_left + input_values = input_values * np.random.random(input_left.shape) + input_values = input_values + input_left + input_sample_set.set_values(input_values) + # evalulate the model at the samples directly - data = model(samples) + output_values = (model(input_sample_set._values)) + if len(output_values.shape) == 1: + output_sample_set = sample_set(1) + else: + output_sample_set = sample_set(output_values.shape[1]) + output_sample_set.set_values(output_values) - # evaluate the model at the samples # reset the random seed - if sample_type == "random" or "r": - np.random.seed(1) - (my_samples, my_data) = sampler.user_samples(samples, savefile, - parallel) + np.random.seed(1) + # create the random discretization using a specified input domain + my_discretization = sampler.create_random_discretization(sample_type, + input_domain, savefile, num_samples=num_samples, + parallel=parallel) + my_num = my_discretization.check_nums() + # make sure that the samples are within the boundaries - assert np.all(my_samples <= param_right) - assert np.all(my_samples >= param_left) + assert np.all(my_discretization._input_sample_set._values <= input_right) + assert np.all(my_discretization._input_sample_set._values >= input_left) - if len(data.shape) == 1: - data = np.expand_dims(data, axis=1) - if len(samples.shape) == 1: - samples = np.expan_dims(samples, axis=1) - # compare the samples - nptest.assert_array_equal(samples, my_samples) + nptest.assert_array_equal(input_sample_set._values, + my_discretization._input_sample_set._values) # compare the data - nptest.assert_array_equal(data, my_data) + nptest.assert_array_equal(output_sample_set._values, + my_discretization._output_sample_set._values) + # did num_samples get updated? - assert samples.shape[0] == sampler.num_samples - assert num_samples == sampler.num_samples - # did the file get correctly saved? + assert my_num == sampler.num_samples + # did the file get correctly saved? if comm.rank == 0: - mdat = sio.loadmat(savefile) - nptest.assert_array_equal(samples, mdat['samples']) - nptest.assert_array_equal(data, mdat['data']) - comm.Barrier() + saved_disc = bet.sample.load_discretization(savefile) + + # compare the samples + nptest.assert_array_equal(my_discretization._input_sample_set.get_values(), + saved_disc._input_sample_set.get_values()) + # compare the data + nptest.assert_array_equal(my_discretization._output_sample_set.get_values(), + saved_disc._output_sample_set.get_values()) + #comm.Barrier() + + # reset the random seed + np.random.seed(1) + + my_sample_set = sample_set(input_domain.shape[0]) + my_sample_set.set_domain(input_domain) + # create the random discretization using an initialized sample_set + my_discretization = sampler.create_random_discretization(sample_type, + my_sample_set, savefile, num_samples=num_samples, + parallel=parallel) + my_num = my_discretization.check_nums() + + # make sure that the samples are within the boundaries + assert np.all(my_discretization._input_sample_set._values <= input_right) + assert np.all(my_discretization._input_sample_set._values >= input_left) + + # compare the samples + nptest.assert_array_equal(input_sample_set._values, + my_discretization._input_sample_set._values) + # compare the data + nptest.assert_array_equal(output_sample_set._values, + my_discretization._output_sample_set._values) + + # reset the random seed + np.random.seed(1) + # recreate the samples to test default choices with unit hypercube domain + if num_samples is None: + num_samples = sampler.num_samples + + my_dim = input_domain.shape[0] + input_sample_set = sample_set(my_dim) + input_sample_set.set_domain(np.repeat([[0.0, 1.0]], my_dim, axis=0)) + + input_left = np.repeat([input_domain[:, 0]], num_samples, 0) + input_right = np.repeat([input_domain[:, 1]], num_samples, 0) + + input_values = (input_right - input_left) + if sample_type == "lhs": + input_values = input_values * pyDOE.lhs(input_sample_set.get_dim(), + num_samples, 'center') + elif sample_type == "random" or "r": + input_values = input_values * np.random.random(input_left.shape) + input_values = input_values + input_left + input_sample_set.set_values(input_values) + + # reset random seed + np.random.seed(1) + # create the random discretization using a specified input_dim + my_discretization = sampler.create_random_discretization(sample_type, + my_dim, savefile, num_samples=num_samples, + parallel=parallel) + my_num = my_discretization.check_nums() + + # make sure that the samples are within the boundaries + assert np.all(my_discretization._input_sample_set._values <= input_right) + assert np.all(my_discretization._input_sample_set._values >= input_left) + + # compare the samples + nptest.assert_array_equal(input_sample_set._values, + my_discretization._input_sample_set._values) + # compare the data + nptest.assert_array_equal(output_sample_set._values, + my_discretization._output_sample_set._values) + + +def verify_random_sample_set_domain(sampler, sample_type, input_domain, + num_samples): + np.random.seed(1) + # recreate the samples + if num_samples is None: + num_samples = sampler.num_samples + + input_sample_set = sample_set(input_domain.shape[0]) + input_sample_set.set_domain(input_domain) + + input_left = np.repeat([input_domain[:, 0]], num_samples, 0) + input_right = np.repeat([input_domain[:, 1]], num_samples, 0) + + input_values = (input_right - input_left) + if sample_type == "lhs": + input_values = input_values * pyDOE.lhs(input_sample_set.get_dim(), + num_samples, 'center') + elif sample_type == "random" or "r": + input_values = input_values * np.random.random(input_left.shape) + input_values = input_values + input_left + input_sample_set.set_values(input_values) + + # reset the random seed + np.random.seed(1) + + # create the sample set from the domain + print sample_type + my_sample_set = sampler.random_sample_set_domain(sample_type, input_domain, + num_samples=num_samples) + + # make sure that the samples are within the boundaries + assert np.all(my_sample_set._values <= input_right) + assert np.all(my_sample_set._values >= input_left) + + # compare the samples + nptest.assert_array_equal(input_sample_set._values, + my_sample_set._values) + +def verify_random_sample_set_dimension(sampler, sample_type, input_dim, + num_samples): + + np.random.seed(1) + # recreate the samples + if num_samples is None: + num_samples = sampler.num_samples + + input_domain = np.repeat([[0, 1]], input_dim, axis=0) + input_sample_set = sample_set(input_dim) + input_sample_set.set_domain(input_domain) + + input_left = np.repeat([input_domain[:, 0]], num_samples, 0) + input_right = np.repeat([input_domain[:, 1]], num_samples, 0) + + input_values = (input_right - input_left) + if sample_type == "lhs": + input_values = input_values * pyDOE.lhs(input_sample_set.get_dim(), + num_samples, 'center') + elif sample_type == "random" or "r": + input_values = input_values * np.random.random(input_left.shape) + input_values = input_values + input_left + input_sample_set.set_values(input_values) + + # reset the random seed + np.random.seed(1) + + # create the sample set from the domain + my_sample_set = sampler.random_sample_set_dimension(sample_type, input_dim, + num_samples=num_samples) + + # make sure that the samples are within the boundaries + assert np.all(my_sample_set._values <= input_right) + assert np.all(my_sample_set._values >= input_left) + + # compare the samples + nptest.assert_array_equal(input_sample_set._values, + my_sample_set._values) + +def verify_random_sample_set(sampler, sample_type, input_sample_set, + num_samples): + test_sample_set = input_sample_set + np.random.seed(1) + # recreate the samples + if num_samples is None: + num_samples = sampler.num_samples + + input_domain = input_sample_set.get_domain() + if input_domain is None: + input_domain = np.repeat([[0, 1]], input_sample_set.get_dim(), axis=0) + + input_left = np.repeat([input_domain[:, 0]], num_samples, 0) + input_right = np.repeat([input_domain[:, 1]], num_samples, 0) + + input_values = (input_right - input_left) + if sample_type == "lhs": + input_values = input_values * pyDOE.lhs(input_sample_set.get_dim(), + num_samples, 'center') + elif sample_type == "random" or "r": + input_values = input_values * np.random.random(input_left.shape) + input_values = input_values + input_left + test_sample_set.set_values(input_values) + + # reset the random seed + np.random.seed(1) + + # create the sample set from the domain + print sample_type + my_sample_set = sampler.random_sample_set(sample_type, input_sample_set, + num_samples=num_samples) + + # make sure that the samples are within the boundaries + assert np.all(my_sample_set._values <= input_right) + assert np.all(my_sample_set._values >= input_left) + + # compare the samples + nptest.assert_array_equal(test_sample_set._values, + my_sample_set._values) + +def verify_regular_sample_set(sampler, input_sample_set, + num_samples_per_dim): + + test_sample_set = input_sample_set + dim = input_sample_set.get_dim() + # recreate the samples + if num_samples_per_dim is None: + num_samples_per_dim = 5 + + if not isinstance(num_samples_per_dim, collections.Iterable): + num_samples_per_dim = num_samples_per_dim * np.ones((dim,), dtype='int') + + sampler.num_samples = np.product(num_samples_per_dim) + + test_domain = test_sample_set.get_domain() + if test_domain is None: + test_domain = np.repeat([[0, 1]], test_sample_set.get_dim(), axis=0) + + test_values = np.zeros((sampler.num_samples, test_sample_set.get_dim())) + + vec_samples_dimension = np.empty((dim), dtype=object) + for i in np.arange(0, dim): + vec_samples_dimension[i] = list(np.linspace( + test_domain[i, 0], test_domain[i, 1], + num_samples_per_dim[i] + 2))[1:num_samples_per_dim[i] + 1] + + if np.equal(dim, 1): + arrays_samples_dimension = np.array([vec_samples_dimension]) + else: + arrays_samples_dimension = np.meshgrid( + *[vec_samples_dimension[i] for i in np.arange(0, dim)], indexing='ij') + + if np.equal(dim, 1): + test_values = arrays_samples_dimension.transpose() + else: + for i in np.arange(0, dim): + test_values[:, i:i + 1] = np.vstack(arrays_samples_dimension[i].flat[:]) + + test_sample_set.set_values(test_values) + + # create the sample set from sampler + my_sample_set = sampler.regular_sample_set(input_sample_set, + num_samples_per_dim=num_samples_per_dim) + + # compare the samples + nptest.assert_array_equal(test_sample_set._values, + my_sample_set._values) + +def verify_regular_sample_set_domain(sampler, input_domain, + num_samples_per_dim): + + input_sample_set = sample_set(input_domain.shape[0]) + input_sample_set.set_domain(input_domain) + + test_sample_set = input_sample_set + dim = input_sample_set.get_dim() + # recreate the samples + if num_samples_per_dim is None: + num_samples_per_dim = 5 + + if not isinstance(num_samples_per_dim, collections.Iterable): + num_samples_per_dim = num_samples_per_dim * np.ones((dim,), dtype='int') + + sampler.num_samples = np.product(num_samples_per_dim) + + test_domain = test_sample_set.get_domain() + if test_domain is None: + test_domain = np.repeat([[0, 1]], test_sample_set.get_dim(), axis=0) + + test_values = np.zeros((sampler.num_samples, test_sample_set.get_dim())) + + vec_samples_dimension = np.empty((dim), dtype=object) + for i in np.arange(0, dim): + vec_samples_dimension[i] = list(np.linspace( + test_domain[i, 0], test_domain[i, 1], + num_samples_per_dim[i] + 2))[1:num_samples_per_dim[i] + 1] + + if np.equal(dim, 1): + arrays_samples_dimension = np.array([vec_samples_dimension]) + else: + arrays_samples_dimension = np.meshgrid( + *[vec_samples_dimension[i] for i in np.arange(0, dim)], indexing='ij') + + if np.equal(dim, 1): + test_values = arrays_samples_dimension.transpose() + else: + for i in np.arange(0, dim): + test_values[:, i:i + 1] = np.vstack(arrays_samples_dimension[i].flat[:]) + + test_sample_set.set_values(test_values) + + # create the sample set from sampler + my_sample_set = sampler.regular_sample_set_domain(input_domain, + num_samples_per_dim=num_samples_per_dim) + + # compare the samples + nptest.assert_array_equal(test_sample_set._values, + my_sample_set._values) + +def verify_regular_sample_set_dimension(sampler, input_dim, + num_samples_per_dim): + + input_domain = np.repeat([[0, 1]], input_dim, axis=0) + input_sample_set = sample_set(input_dim) + input_sample_set.set_domain(input_domain) + + test_sample_set = input_sample_set + dim = input_dim + # recreate the samples + if num_samples_per_dim is None: + num_samples_per_dim = 5 + + if not isinstance(num_samples_per_dim, collections.Iterable): + num_samples_per_dim = num_samples_per_dim * np.ones((dim,), dtype='int') + + sampler.num_samples = np.product(num_samples_per_dim) + + test_domain = test_sample_set.get_domain() + if test_domain is None: + test_domain = np.repeat([[0, 1]], test_sample_set.get_dim(), axis=0) + + test_values = np.zeros((sampler.num_samples, test_sample_set.get_dim())) + + vec_samples_dimension = np.empty((dim), dtype=object) + for i in np.arange(0, dim): + vec_samples_dimension[i] = list(np.linspace( + test_domain[i, 0], test_domain[i, 1], + num_samples_per_dim[i] + 2))[1:num_samples_per_dim[i] + 1] + + if np.equal(dim, 1): + arrays_samples_dimension = np.array([vec_samples_dimension]) + else: + arrays_samples_dimension = np.meshgrid( + *[vec_samples_dimension[i] for i in np.arange(0, dim)], indexing='ij') + + if np.equal(dim, 1): + test_values = arrays_samples_dimension.transpose() + else: + for i in np.arange(0, dim): + test_values[:, i:i + 1] = np.vstack(arrays_samples_dimension[i].flat[:]) + + test_sample_set.set_values(test_values) + + # create the sample set from sampler + my_sample_set = sampler.regular_sample_set_dimension(input_dim, + num_samples_per_dim=num_samples_per_dim) + + # compare the samples + nptest.assert_array_equal(test_sample_set._values, + my_sample_set._values) class Test_basic_sampler(unittest.TestCase): @@ -130,21 +518,18 @@ class Test_basic_sampler(unittest.TestCase): def setUp(self): # create 1-1 map - self.param_min1 = np.zeros((1, )) - self.param_max1 = np.zeros((1, )) + self.input_domain1 = np.column_stack((np.zeros((1,)), np.ones((1,)))) def map_1t1(x): return np.sin(x) # create 3-1 map - self.param_min3 = np.zeros((3, )) - self.param_max3 = np.ones((3, )) + self.input_domain3 = np.column_stack((np.zeros((3,)), np.ones((3,)))) def map_3t1(x): return np.sum(x, 1) # create 3-2 map def map_3t2(x): return np.vstack(([x[:, 0]+x[:, 1], x[:, 2]])).transpose() # create 10-4 map - self.param_min10 = np.zeros((10, )) - self.param_max10 = np.ones((10, )) + self.input_domain10 = np.column_stack((np.zeros((10,)), np.ones((10,)))) def map_10t4(x): x1 = x[:, 0] + x[:, 1] x2 = x[:, 2] + x[:, 3] @@ -158,26 +543,32 @@ def map_10t4(x): for model in self.models: self.samplers.append(bsam.sampler(model, num_samples)) + self.input_dim1 = 1 + self.input_dim2 = 2 + self.input_dim3 = 10 + + self.input_sample_set1 = sample_set(self.input_dim1) + self.input_sample_set2 = sample_set(self.input_dim2) + self.input_sample_set3 = sample_set(self.input_dim3) + + self.input_sample_set4 = sample_set(self.input_domain1.shape[0]) + self.input_sample_set4.set_domain(self.input_domain1) + + self.input_sample_set5 = sample_set(self.input_domain3.shape[0]) + self.input_sample_set5.set_domain(self.input_domain3) + + self.input_sample_set6 = sample_set(self.input_domain10.shape[0]) + self.input_sample_set6.set_domain(self.input_domain10) + def tearDown(self): """ Clean up extra files """ + comm.barrier() if comm.rank == 0: for f in self.savefiles: if os.path.exists(f+".mat"): os.remove(f+".mat") - if comm.size > 1: - for f in self.savefiles: - proc_savefile = os.path.join(local_path, os.path.dirname(f), - "proc{}{}.mat".format(comm.rank, os.path.basename(f))) - print proc_savefile - if os.path.exists(proc_savefile): - os.remove(proc_savefile) - proc_savefile = os.path.join(local_path, os.path.dirname(f), - "p{}proc{}{}.mat".format(comm.rank, comm.rank, os.path.basename(f))) - if os.path.exists(proc_savefile): - os.remove(proc_savefile) - print proc_savefile def test_init(self): """ @@ -185,7 +576,7 @@ def test_init(self): """ assert self.samplers[0].num_samples == 100 assert self.samplers[0].lb_model == self.models[0] - assert bsam.sampler(self.models[0], None).num_samples == None + assert bsam.sampler(self.models[0], None).num_samples is None def test_update(self): """ @@ -194,42 +585,141 @@ def test_update(self): mdict = {"frog":3, "moose":2} self.samplers[0].update_mdict(mdict) assert self.samplers[0].num_samples == mdict["num_samples"] - - def test_user_samples(self): + + def test_compute_QoI_and_create_discretization(self): """ - Test :meth:`bet.sampling.basicSampling.sampler.user_samples` for - three different QoI maps (1 to 1, 3 to 1, 3 to 2, 10 to 4). + Test :meth:`bet.sampling.basicSampling.sampler.user_samples` + for three different QoI maps (1 to 1, 3 to 1, 3 to 2, 10 to 4). """ # create a list of different sets of samples list_of_samples = [np.ones((4, )), np.ones((4, 1)), np.ones((4, 3)), np.ones((4, 3)), np.ones((4, 10))] + list_of_dims = [1, 1, 3, 3, 10] - test_list = zip(self.models, self.samplers, list_of_samples, + list_of_sample_sets = [None]*len(list_of_samples) + + for i, array in enumerate(list_of_samples): + list_of_sample_sets[i] = sample_set(list_of_dims[i]) + list_of_sample_sets[i].set_values(array) + + test_list = zip(self.models, self.samplers, list_of_sample_sets, self.savefiles) - - for model, sampler, samples, savefile in test_list: + + for model, sampler, input_sample_set, savefile in test_list: for parallel in [False, True]: - verify_user_samples(model, sampler, samples, savefile, - parallel) + verify_compute_QoI_and_create_discretization(model, sampler, + input_sample_set, savefile, parallel) - def test_random_samples(self): + def test_random_sample_set(self): + """ + Test :meth:`bet.sampling.basicSampling.sampler.random_sample_set` + for six different sample sets + """ + input_sample_set_list = [self.input_sample_set1, + self.input_sample_set2, + self.input_sample_set3, + self.input_sample_set4, + self.input_sample_set5, + self.input_sample_set6] + + test_list = zip(self.samplers, input_sample_set_list) + + for sampler, input_sample_set in test_list: + for sample_type in ["random", "r", "lhs"]: + for num_samples in [None, 25]: + verify_random_sample_set(sampler, sample_type, + input_sample_set, num_samples) + + def test_random_sample_set_domain(self): """ - Test :meth:`bet.sampling.basicSampling.sampler.random_samples` for three - different QoI maps (1 to 1, 3 to 1, 3 to 2, 10 to 4). + Test :meth:`bet.sampling.basicSampling.sampler.random_sample_set_domain` + for five different input domains. """ - param_min_list = [self.param_min1, self.param_min1, self.param_min3, - self.param_min3, self.param_min10] - param_max_list = [self.param_max1, self.param_max1, self.param_max3, - self.param_max3, self.param_max10] + input_domain_list = [self.input_domain1, self.input_domain1, + self.input_domain3, self.input_domain3, self.input_domain10] + + test_list = zip(self.samplers, input_domain_list) + + for sampler, input_domain in test_list: + for sample_type in ["random", "r", "lhs"]: + for num_samples in [None, 25]: + verify_random_sample_set_domain(sampler, sample_type, + input_domain, num_samples) + def test_random_sample_set_dim(self): + """ + Test :meth:`bet.sampling.basicSampling.sampler.random_sample_set_dim` + for three different input dimensions. + """ + input_dim_list = [self.input_dim1, self.input_dim2, self.input_dim3] + + test_list = zip(self.samplers, input_dim_list) + + for sampler, input_dim in test_list: + for sample_type in ["random", "r", "lhs"]: + for num_samples in [None, 25]: + verify_random_sample_set_dimension(sampler, sample_type, + input_dim, num_samples) + + def test_regular_sample_set(self): + """ + Test :meth:`bet.sampling.basicSampling.sampler.regular_sample_set` + for six different sample sets + """ + input_sample_set_list = [self.input_sample_set1, + self.input_sample_set2, + self.input_sample_set4, + self.input_sample_set5] + + test_list = zip(self.samplers, input_sample_set_list) + + for sampler, input_sample_set in test_list: + for num_samples_per_dim in [None, 10]: + verify_regular_sample_set(sampler, input_sample_set, num_samples_per_dim) + + def test_regular_sample_set_domain(self): + """ + Test :meth:`bet.sampling.basicSampling.sampler.regular_sample_set_domain` + for six different sample sets + """ + input_domain_list= [self.input_domain1, + self.input_domain3] + + test_list = zip(self.samplers, input_domain_list) + + for sampler, input_domain in test_list: + for num_samples_per_dim in [None, 10]: + verify_regular_sample_set_domain(sampler, input_domain, num_samples_per_dim) + + def test_regular_sample_set_dimension(self): + """ + Test :meth:`bet.sampling.basicSampling.sampler.regular_sample_set_dimension` + for six different sample sets + """ + input_dimension_list = [self.input_dim1, + self.input_dim2] + + test_list = zip(self.samplers, input_dimension_list) + + for sampler, input_dim in test_list: + for num_samples_per_dim in [None, 10]: + verify_regular_sample_set_dimension(sampler, input_dim, num_samples_per_dim) + + def test_create_random_discretization(self): + """ + Test :meth:`bet.sampling.basicSampling.sampler.create_random_discretization` + for three different QoI maps (1 to 1, 3 to 1, 3 to 2, 10 to 4). + """ + input_domain_list = [self.input_domain1, self.input_domain1, + self.input_domain3, self.input_domain3, self.input_domain10] - test_list = zip(self.models, self.samplers, param_min_list, - param_max_list, self.savefiles) + test_list = zip(self.models, self.samplers, input_domain_list, + self.savefiles) - for model, sampler, param_min, param_max, savefile in test_list: + for model, sampler, input_domain, savefile in test_list: for sample_type in ["random", "r", "lhs"]: for num_samples in [None, 25]: for parallel in [False, True]: - verify_random_samples(model, sampler, sample_type, - param_min, param_max, num_samples, savefile, - parallel) + verify_create_random_discretization(model, sampler, + sample_type, input_domain, num_samples, + savefile, parallel) diff --git a/test/test_sensitivity/test_chooseQoIs.py b/test/test_sensitivity/test_chooseQoIs.py index 28ee8671..e9a63c82 100644 --- a/test/test_sensitivity/test_chooseQoIs.py +++ b/test/test_sensitivity/test_chooseQoIs.py @@ -12,67 +12,99 @@ import numpy.testing as nptest from itertools import combinations import sys +import bet.sample as sample class ChooseQoIsMethods: """ Test :module:`bet.sensitivity.chooseQoIs`. """ - def test_calculate_avg_condnum(self): + def test_calculate_avg_measure(self): """ - Test :meth:`bet.sensitivity.chooseQoIs.calculate_avg_condnum`. + Test :meth:`bet.sensitivity.chooseQoIs.calculate_avg_measure`. """ - self.qoi_set = range(0, self.Lambda_dim) - (self.condnum, self.singvals) = cQoIs.calculate_avg_condnum(self.G, - self.qoi_set) + self.qoi_set = range(0, self.input_dim) + (self.measure, self.singvals) = cQoIs.calculate_avg_measure(\ + self.input_set, self.qoi_set) - # Check that condnum and singvals are the right size - self.assertEqual(isinstance(self.condnum, float), True) + # Check that measure and singvals are the right size + self.assertEqual(isinstance(self.measure, float), True) self.assertEqual(self.singvals.shape, (self.num_centers, - self.Lambda_dim)) + self.input_dim)) + + # Test the method returns an error when more qois are given than + # parameters + self.input_set._jacobians = np.random.uniform(-1, 1, [10, 4, 3]) + with self.assertRaises(ValueError): + cQoIs.calculate_avg_measure(self.input_set) + + def test_calculate_avg_skewness(self): + """ + Test :meth:`bet.sensitivity.chooseQoIs.calculate_avg_skewness`. + """ + self.qoi_set = range(0, self.input_dim) + (self.skewness, self.skewnessgi) = cQoIs.calculate_avg_skewness(\ + self.input_set, self.qoi_set) + + # Check that skewness and skewnessgi are the right size + self.assertEqual(isinstance(self.skewness, float), True) + self.assertEqual(self.skewnessgi.shape, (self.num_centers, + self.input_dim)) - def test_calculate_avg_volume(self): + # Test the method returns an error when more qois are given than + # parameters + self.input_set._jacobians = np.random.uniform(-1, 1, [10, 4, 3]) + with self.assertRaises(ValueError): + cQoIs.calculate_avg_measure(self.input_set) + + def test_calculate_avg_condnum(self): """ - Test :meth:`bet.sensitivity.chooseQoIs.calculate_avg_volume`. + Test :meth:`bet.sensitivity.chooseQoIs.calculate_avg_condnum`. """ - self.qoi_set = range(0, self.Lambda_dim) - (self.volume, self.singvals) = cQoIs.calculate_avg_volume(self.G, - self.qoi_set) + self.qoi_set = range(0, self.input_dim) + (self.condnum, self.singvals) = cQoIs.calculate_avg_condnum(\ + self.input_set, self.qoi_set) # Check that condnum and singvals are the right size - self.assertEqual(isinstance(self.volume, float), True) + self.assertEqual(isinstance(self.condnum, float), True) self.assertEqual(self.singvals.shape, (self.num_centers, - self.Lambda_dim)) + self.input_dim)) + + # Test the method returns an error when more qois are given than + # parameters + self.input_set._jacobians = np.random.uniform(-1, 1, [10, 4, 3]) + with self.assertRaises(ValueError): + cQoIs.calculate_avg_measure(self.input_set) def test_chooseOptQoIs(self): """ Test :meth:`bet.sensitivity.chooseQoIs.chooseOptQoIs`. """ - self.qoiIndices = range(0, self.num_qois) - self.condnum_indices_mat = cQoIs.chooseOptQoIs(self.G, self.qoiIndices, - self.num_qois_return, self.num_optsets_return) - self.condnum_indices_mat_vol = cQoIs.chooseOptQoIs(self.G, - self.qoiIndices, self.num_qois_return, self.num_optsets_return, - volume=True) + self.qoiIndices = range(0, self.output_dim) + self.condnum_indices_mat = cQoIs.chooseOptQoIs(self.input_set, + self.qoiIndices, self.output_dim_return, self.num_optsets_return) + self.condnum_indices_mat_vol = cQoIs.chooseOptQoIs(self.input_set, + self.qoiIndices, self.output_dim_return, self.num_optsets_return, + measure=True) # Test the method returns the correct size array self.assertEqual(self.condnum_indices_mat.shape, - (self.num_optsets_return, self.num_qois_return + 1)) + (self.num_optsets_return, self.output_dim_return + 1)) self.assertEqual(self.condnum_indices_mat_vol.shape, - (self.num_optsets_return, self.num_qois_return + 1)) + (self.num_optsets_return, self.output_dim_return + 1)) # Check that the 'global condition number' is greater than or equal to 1 nptest.assert_array_less(1.0, self.condnum_indices_mat[:, 0]) - # For volume, check that it is greater than or equal to 0 + # For measure, check that it is greater than or equal to 0 nptest.assert_array_less(0.0, self.condnum_indices_mat_vol[:, 0]) # Test the method returns the known best set of QoIs (chosen to be - # last Lambda_dim indices) - nptest.assert_array_less(self.num_qois-self.Lambda_dim-1, + # last input_dim indices) + nptest.assert_array_less(self.output_dim-self.input_dim-1, self.condnum_indices_mat[0, 1:]) - nptest.assert_array_less(self.num_qois-self.Lambda_dim-1, + nptest.assert_array_less(self.output_dim-self.input_dim-1, self.condnum_indices_mat_vol[0, 1:]) # Test that none of the best chosen QoIs are the same @@ -86,20 +118,20 @@ def test_chooseOptQoIs(self): # Test the method for a set of QoIs rather than all possible. Choose # this set so that the optimal choice is not removed. self.qoiIndices = np.concatenate([range(1, 3, 2), - range(4, self.num_qois)]) - self.condnum_indices_mat = cQoIs.chooseOptQoIs(self.G, self.qoiIndices, - self.num_qois_return, self.num_optsets_return) + range(4, self.output_dim)]) + self.condnum_indices_mat = cQoIs.chooseOptQoIs(self.input_set, + self.qoiIndices, self.output_dim_return, self.num_optsets_return) - self.condnum_indices_mat_vol = cQoIs.chooseOptQoIs(self.G, - self.qoiIndices, self.num_qois_return, self.num_optsets_return, - volume=True) + self.condnum_indices_mat_vol = cQoIs.chooseOptQoIs(self.input_set, + self.qoiIndices, self.output_dim_return, self.num_optsets_return, + measure=True) # Test the method returns the correct number of qois self.assertEqual(self.condnum_indices_mat.shape, - (self.num_optsets_return, self.num_qois_return + 1)) + (self.num_optsets_return, self.output_dim_return + 1)) self.assertEqual(self.condnum_indices_mat_vol.shape, - (self.num_optsets_return, self.num_qois_return + 1)) + (self.num_optsets_return, self.output_dim_return + 1)) # Check that the 'global condidtion number' is greater than or equal # to 1 @@ -108,11 +140,11 @@ def test_chooseOptQoIs(self): nptest.assert_array_less(0.0, self.condnum_indices_mat_vol[:, 0]) # Test the method returns the known best set of QoIs (chosen to be - # last Lambda_dim indices) - nptest.assert_array_less(self.num_qois-self.Lambda_dim-1, + # last input_dim indices) + nptest.assert_array_less(self.output_dim-self.input_dim-1, self.condnum_indices_mat[0, 1:]) - nptest.assert_array_less(self.num_qois-self.Lambda_dim-1, + nptest.assert_array_less(self.output_dim-self.input_dim-1, self.condnum_indices_mat_vol[0, 1:]) # Test that none of the best chosen QoIs are the same @@ -126,181 +158,200 @@ def test_chooseOptQoIs_verbose(self): """ Test :meth:`bet.sensitivity.chooseQoIs.chooseOptQoIs_verbose`. """ - self.qoiIndices = range(0, self.num_qois) + self.qoiIndices = range(0, self.output_dim) [self.condnum_indices_mat, self.optsingvals] = \ - cQoIs.chooseOptQoIs_verbose(self.G, self.qoiIndices, - self.num_qois_return, self.num_optsets_return) + cQoIs.chooseOptQoIs_verbose(self.input_set, self.qoiIndices, + self.output_dim_return, self.num_optsets_return) # Test that optsingvals is the right shape self.assertEqual(self.optsingvals.shape, ((self.num_centers, - self.num_qois_return, self.num_optsets_return))) + self.output_dim_return, self.num_optsets_return))) def test_find_unique_vecs(self): """ Test :meth:`bet.sensitivity.chooseQoIs.find_unique_vecs`. """ - self.qoiIndices = range(0, self.num_qois) - unique_indices = cQoIs.find_unique_vecs(self.G, self.inner_prod_tol, - self.qoiIndices) + self.qoiIndices = range(0, self.output_dim) + unique_indices = cQoIs.find_unique_vecs(self.input_set, + self.inner_prod_tol, self.qoiIndices) # Test that pairwise inner products are <= inner_prod_tol pairs = np.array(list(combinations(list(unique_indices), 2))) for pair in range(pairs.shape[0]): curr_set = pairs[pair] - curr_inner_prod = np.sum(self.G[:, curr_set[0], :] * self.G[:, - curr_set[1], :]) / self.G.shape[0] + curr_inner_prod = np.sum(self.input_set._jacobians[:, + curr_set[0], :] * self.input_set._jacobians[:, + curr_set[1], :]) / self.input_set._jacobians.shape[0] nptest.assert_array_less(curr_inner_prod, self.inner_prod_tol) def test_chooseOptQoIs_large(self): """ Test :meth:`bet.sensitivity.chooseQoIs.chooseOptQoIs_large`. """ - self.qoiIndices = range(0, self.num_qois) - best_sets = cQoIs.chooseOptQoIs_large(self.G, qoiIndices=self.qoiIndices, - inner_prod_tol=self.inner_prod_tol, cond_tol=self.cond_tol) + self.qoiIndices = range(0, self.output_dim) + best_sets = cQoIs.chooseOptQoIs_large(self.input_set, + qoiIndices=self.qoiIndices, inner_prod_tol=self.inner_prod_tol, + measskew_tol=self.measskew_tol) - if self.cond_tol == np.inf: - self.cond_tol = sys.float_info[0] + if self.measskew_tol == np.inf: + self.measskew_tol = sys.float_info[0] # Test that the best_sets have condition number less than the tolerance - for Ldim in range(self.Lambda_dim - 1): + for Ldim in range(self.input_dim - 1): inds = best_sets[Ldim][:, 0] != np.inf - nptest.assert_array_less(best_sets[Ldim][inds, 0], self.cond_tol) + nptest.assert_array_less(best_sets[Ldim][inds, 0], + self.measskew_tol) def test_chooseOptQoIs_large_verbose(self): """ Test :meth:`bet.sensitivity.chooseQoIs.chooseOptQoIs_large_verbose`. """ - self.qoiIndices = range(0, self.num_qois) - [best_sets, optsingvals_list] = cQoIs.chooseOptQoIs_large_verbose(self.G, - qoiIndices=self.qoiIndices, num_optsets_return=self.num_optsets_return, - inner_prod_tol=self.inner_prod_tol, cond_tol=self.cond_tol) + self.qoiIndices = range(0, self.output_dim) + [best_sets, optsingvals_list] = cQoIs.chooseOptQoIs_large_verbose(\ + self.input_set, qoiIndices=self.qoiIndices, + num_optsets_return=self.num_optsets_return, + inner_prod_tol=self.inner_prod_tol, measskew_tol=self.measskew_tol) - # Test that Lambda_dim - 1 optsingval tensors are returned - self.assertEqual(len(optsingvals_list), self.Lambda_dim - 1) + # Test that input_dim - 1 optsingval tensors are returned + self.assertEqual(len(optsingvals_list), self.input_dim - 1) # Test that each tensor is the right shape - for i in range(self.Lambda_dim - 1): + for i in range(self.input_dim - 1): self.assertEqual(optsingvals_list[i].shape, (self.num_centers, i + 2, self.num_optsets_return)) class test_2to20_choose2(ChooseQoIsMethods, unittest.TestCase): def setUp(self): - self.Lambda_dim = 2 - self.num_qois_return = 2 + self.input_dim = 2 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + self.output_dim_return = 2 self.num_optsets_return = 5 self.radius = 0.01 np.random.seed(0) self.num_centers = 10 - self.centers = np.random.random((self.num_centers, self.Lambda_dim)) - self.samples = grad.sample_l1_ball(self.centers, - self.Lambda_dim + 1, self.radius) + self.centers = np.random.random((self.num_centers, self.input_dim)) + self.input_set_centers._values = self.centers + self.input_set._values = grad.sample_l1_ball(self.input_set_centers, self.input_dim + 1, self.radius) - self.num_qois = 20 - coeffs = np.random.random((self.Lambda_dim, - self.num_qois-self.Lambda_dim)) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 20 + self.output_set = sample.sample_set(self.output_dim) + coeffs = np.random.random((self.input_dim, + self.output_dim-self.input_dim)) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) - self.data = self.samples.dot(self.coeffs) - self.G = grad.calculate_gradients_rbf(self.samples, self.data, - self.centers) + self.output_set._values = self.input_set._values.dot(self.coeffs) + self.input_set._jacobians = grad.calculate_gradients_rbf(\ + self.input_set, self.output_set, self.input_set_centers) self.inner_prod_tol = 1.0 - self.cond_tol = 100.0 + self.measskew_tol = 100.0 class test_4to20_choose4(ChooseQoIsMethods, unittest.TestCase): def setUp(self): - self.Lambda_dim = 4 - self.num_qois_return = 4 + self.input_dim = 4 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + self.output_dim_return = 4 self.num_optsets_return = 5 self.radius = 0.01 np.random.seed(0) self.num_centers = 100 - self.centers = np.random.random((self.num_centers, self.Lambda_dim)) - self.samples = grad.sample_l1_ball(self.centers, - self.Lambda_dim + 1, self.radius) + self.centers = np.random.random((self.num_centers, self.input_dim)) + self.input_set_centers._values = self.centers + self.input_set._values = grad.sample_l1_ball(self.input_set_centers, self.input_dim + 1, self.radius) - self.num_qois = 20 - coeffs = np.random.random((self.Lambda_dim, - self.num_qois-self.Lambda_dim)) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 20 + self.output_set = sample.sample_set(self.output_dim) + coeffs = np.random.random((self.input_dim, + self.output_dim-self.input_dim)) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) - self.data = self.samples.dot(self.coeffs) - self.G = grad.calculate_gradients_rbf(self.samples, self.data, - self.centers) + self.output_set._values = self.input_set._values.dot(self.coeffs) + self.input_set._jacobians = grad.calculate_gradients_rbf(\ + self.input_set, self.output_set, self.input_set_centers) self.inner_prod_tol = 0.9 - self.cond_tol = 20.0 + self.measskew_tol = 20.0 class test_9to15_choose9(ChooseQoIsMethods, unittest.TestCase): def setUp(self): - self.Lambda_dim = 9 - self.num_qois_return = 9 + self.input_dim = 9 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + self.output_dim_return = 9 self.num_optsets_return = 50 self.radius = 0.01 np.random.seed(0) self.num_centers = 15 - self.centers = np.random.random((self.num_centers, self.Lambda_dim)) - self.samples = grad.sample_l1_ball(self.centers, self.Lambda_dim + \ - 1, self.radius) + self.centers = np.random.random((self.num_centers, self.input_dim)) + self.input_set_centers._values = self.centers + self.input_set._values = grad.sample_l1_ball(self.input_set_centers, self.input_dim + 1, self.radius) - self.num_qois = 15 - coeffs = np.random.random((self.Lambda_dim, - self.num_qois - self.Lambda_dim)) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 15 + self.output_set = sample.sample_set(self.output_dim) + coeffs = np.random.random((self.input_dim, + self.output_dim - self.input_dim)) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) - self.data = self.samples.dot(self.coeffs) - self.G = grad.calculate_gradients_rbf(self.samples, self.data, - self.centers) + self.output_set._values = self.input_set._values.dot(self.coeffs) + self.input_set._jacobians = grad.calculate_gradients_rbf(\ + self.input_set, self.output_set, self.input_set_centers) self.inner_prod_tol = 0.8 - self.cond_tol = 100.0 + self.measskew_tol = 100.0 class test_9to15_choose4(ChooseQoIsMethods, unittest.TestCase): def setUp(self): - self.Lambda_dim = 9 - self.num_qois_return = 4 + self.input_dim = 9 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + self.output_dim_return = 4 self.num_optsets_return = 1 self.radius = 0.01 np.random.seed(0) self.num_centers = 11 - self.centers = np.random.random((self.num_centers, self.Lambda_dim)) - self.samples = grad.sample_l1_ball(self.centers, - self.Lambda_dim + 1, self.radius) + self.centers = np.random.random((self.num_centers, self.input_dim)) + self.input_set_centers._values = self.centers + self.input_set._values = grad.sample_l1_ball(self.input_set_centers, self.input_dim + 1, self.radius) - self.num_qois = 15 - coeffs = np.random.random((self.Lambda_dim, self.num_qois - \ - self.Lambda_dim)) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 15 + self.output_set = sample.sample_set(self.output_dim) + coeffs = np.random.random((self.input_dim, self.output_dim - \ + self.input_dim)) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) - self.data = self.samples.dot(self.coeffs) - self.G = grad.calculate_gradients_rbf(self.samples, self.data, - self.centers) + self.output_set._values = self.input_set._values.dot(self.coeffs) + self.input_set._jacobians = grad.calculate_gradients_rbf(\ + self.input_set, self.output_set, self.input_set_centers) self.inner_prod_tol = 0.9 - self.cond_tol = 50.0 + self.measskew_tol = 50.0 class test_2to28_choose2_zeros(ChooseQoIsMethods, unittest.TestCase): def setUp(self): - self.Lambda_dim = 2 - self.num_qois_return = 2 + self.input_dim = 2 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + self.output_dim_return = 2 self.num_optsets_return = 5 self.radius = 0.01 np.random.seed(0) self.num_centers = 10 - self.centers = np.random.random((self.num_centers, self.Lambda_dim)) - self.samples = grad.sample_l1_ball(self.centers, - self.Lambda_dim + 1, self.radius) + self.centers = np.random.random((self.num_centers, self.input_dim)) + self.input_set_centers._values = self.centers + self.input_set._values = grad.sample_l1_ball(self.input_set_centers, self.input_dim + 1, self.radius) - self.num_qois = 28 - coeffs = np.zeros((self.Lambda_dim, 2*self.Lambda_dim)) - coeffs = np.append(coeffs, np.random.random((self.Lambda_dim, - self.num_qois - 3 * self.Lambda_dim)), axis=1) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 28 + self.output_set = sample.sample_set(self.output_dim) + coeffs = np.zeros((self.input_dim, 2*self.input_dim)) + coeffs = np.append(coeffs, np.random.random((self.input_dim, + self.output_dim - 3 * self.input_dim)), axis=1) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) - self.data = self.samples.dot(self.coeffs) - self.G = grad.calculate_gradients_rbf(self.samples, self.data, - self.centers) + self.output_set._values = self.input_set._values.dot(self.coeffs) + self.input_set._jacobians = grad.calculate_gradients_rbf(\ + self.input_set, self.output_set, self.input_set_centers) self.inner_prod_tol = 0.9 - self.cond_tol = np.inf + self.measskew_tol = np.inf diff --git a/test/test_sensitivity/test_gradients.py b/test/test_sensitivity/test_gradients.py index 2ce1e84b..4fb8514e 100644 --- a/test/test_sensitivity/test_gradients.py +++ b/test/test_sensitivity/test_gradients.py @@ -9,6 +9,7 @@ import bet.sensitivity.gradients as grad import numpy as np import numpy.testing as nptest +import bet.sample as sample class GradientsMethods: """ @@ -19,80 +20,86 @@ def test_sample_linf_ball(self): """ Test :meth:`bet.sensitivity.gradients.sample_linf_ball`. """ - self.samples = grad.sample_linf_ball(self.centers, - self.num_close, self.rvec, self.lam_domain) + self.input_set._values = grad.sample_linf_ball(self.input_set_centers, self.num_close, self.rvec) # Test the method returns the correct dimensions - self.assertEqual(self.samples.shape, ((self.num_close+1) * \ - self.num_centers, self.Lambda_dim)) + self.assertEqual(self.input_set._values.shape, ((self.num_close+1) * self.num_centers, self.input_dim)) # Check the method returns centers followed by the clusters around the # first center. self.repeat = np.repeat(self.centers, self.num_close, axis=0) - nptest.assert_array_less(np.linalg.norm(self.samples[self.num_centers:]\ - - self.repeat, np.inf, axis=1), np.max(self.rvec)) + + nptest.assert_array_less(np.linalg.norm(self.input_set._values[\ + self.num_centers:] - self.repeat, np.inf, axis=1), + np.max(self.rvec)) # Check that the samples are in lam_domain - for Ldim in range(self.Lambda_dim): - nptest.assert_array_less(self.lam_domain[Ldim,0], - self.samples[:,Ldim]) - nptest.assert_array_less(self.samples[:,Ldim], - self.lam_domain[Ldim,1]) + for Ldim in range(self.input_set._dim): + nptest.assert_array_less(self.input_set._domain[Ldim, 0], + self.input_set._values[:, Ldim]) + nptest.assert_array_less(self.input_set._values[:, Ldim], + self.input_set._domain[Ldim, 1]) + def test_sample_l1_ball(self): """ Test :meth:`bet.sensitivity.gradients.sample_l1_ball`. """ - self.samples = grad.sample_l1_ball(self.centers, self.num_close, - self.rvec) + self.input_set._values = grad.sample_l1_ball(self.input_set_centers, self.num_close, self.rvec) # Test that the samples are within max(rvec) of center (l1 dist) self.repeat = np.repeat(self.centers, self.num_close, axis=0) - nptest.assert_array_less(np.linalg.norm(self.samples[self.num_centers:]\ - - self.repeat, 1, axis=1), np.max(self.rvec)) + nptest.assert_array_less(np.linalg.norm(self.input_set._values[\ + self.num_centers:] - self.repeat, np.inf, axis=1), + np.max(self.rvec)) # Test the method returns the correct dimensions - self.assertEqual(self.samples.shape, ((self.num_close+1) * \ - self.num_centers, self.Lambda_dim)) + self.assertEqual(self.input_set._values.shape, ((self.num_close+1) * \ + self.num_centers, self.input_dim)) # Test FD methods def test_pick_ffd_points(self): """ Test :meth:`bet.sensitivity.gradients.sample_linf_ball`. """ - self.samples = grad.pick_ffd_points(self.centers, self.rvec) + self.input_set._values = grad.pick_ffd_points(self.input_set_centers, + self.rvec) + + #self.samples = grad.pick_ffd_points(self.centers, self.rvec) if not isinstance(self.rvec, np.ndarray): - self.rvec = np.ones(self.Lambda_dim) * self.rvec + self.rvec = np.ones(self.input_dim) * self.rvec # Check the distance to the corresponding center is equal to rvec - self.centersrepeat = np.repeat(self.centers, self.Lambda_dim, axis=0) + self.centersrepeat = np.repeat(self.centers, self.input_set._dim, axis=0) nptest.assert_array_almost_equal(np.linalg.norm(self.centersrepeat - \ - self.samples[self.num_centers:], axis=1), np.tile(self.rvec, - self.num_centers)) + self.input_set._values[self.num_centers:], axis=1), + np.tile(self.rvec, self.num_centers)) # Test the method returns the correct dimensions - self.assertEqual(self.samples.shape, ((self.Lambda_dim+1) * \ - self.num_centers, self.Lambda_dim)) + self.assertEqual(self.input_set._values.shape, ((self.input_set._dim + \ + 1) * self.num_centers, self.input_set._dim)) def test_pick_cfd_points(self): """ Test :meth:`bet.sensitivity.gradients.sample_l1_ball`. """ - self.samples = grad.pick_cfd_points(self.centers, self.rvec) + self.input_set._values = grad.pick_cfd_points(self.input_set_centers, + self.rvec) if not isinstance(self.rvec, np.ndarray): - self.rvec = np.ones(self.Lambda_dim) * self.rvec + self.rvec = np.ones(self.input_dim) * self.rvec # Check the distance to the corresponding center is equal to rvec - self.centersrepeat = np.repeat(self.centers, 2*self.Lambda_dim, axis=0) + self.centersrepeat = np.repeat(self.centers, 2*self.input_set._dim, + axis=0) nptest.assert_array_almost_equal(np.linalg.norm(self.centersrepeat - \ - self.samples[self.num_centers:], axis=1), np.tile(self.rvec, - self.num_centers * 2)) + self.input_set._values[self.num_centers:], axis=1), + np.tile(self.rvec, self.num_centers * 2)) # Test the method returns the correct dimension - self.assertEqual(self.samples.shape, ((2*self.Lambda_dim + 1) * \ - self.num_centers, self.Lambda_dim)) + self.assertEqual(self.input_set._values.shape, ((2*self.input_dim + 1) \ + * self.num_centers, self.input_set._dim)) # Test RBF methods def test_radial_basis_function(self): @@ -127,63 +134,75 @@ def test_calculate_gradients_rbf(self): """ Test :meth:`bet.sensitivity.gradients.calculate_gradients_rbf`. """ - self.samples = grad.sample_l1_ball(self.centers, self.num_close, - self.rvec) - self.data = self.samples.dot(self.coeffs) - self.G = grad.calculate_gradients_rbf(self.samples, self.data, - self.centers) + self.output_set = sample.sample_set(self.output_dim) + self.input_set._values = grad.sample_l1_ball(self.input_set_centers, self.num_close, self.rvec) + self.output_set._values = self.input_set._values.dot(self.coeffs) + self.input_set._jacobians = grad.calculate_gradients_rbf(self.input_set, + self.output_set, self.input_set_centers) # Test the method returns the correct size tensor - self.assertEqual(self.G.shape, (self.num_centers, self.num_qois, - self.Lambda_dim)) + self.assertEqual(self.input_set._jacobians.shape, (self.num_centers, + self.output_dim, self.input_dim)) # Test that each vector is normalized or a zero vector - normG = np.linalg.norm(self.G, ord=1, axis=2) + normG = np.linalg.norm(self.input_set._jacobians, ord=1, axis=2) - # If its a zero vectors, make it the unit vector in Lambda_dim - self.G[normG==0] = 1.0/self.Lambda_dim - nptest.assert_array_almost_equal(np.linalg.norm(self.G, ord=1, axis=2), - np.ones((self.G.shape[0], self.G.shape[1]))) + # If its a zero vectors, make it the unit vector in input_dim + self.input_set._jacobians[normG==0] = 1.0/self.input_dim + nptest.assert_array_almost_equal(np.linalg.norm( + self.input_set._jacobians, ord=1, axis=2), + np.ones((self.input_set._jacobians.shape[0], + self.input_set._jacobians.shape[1]))) def test_calculate_gradients_ffd(self): """ Test :meth:`bet.sensitivity.gradients.calculate_gradients_ffd`. """ - self.samples = grad.pick_ffd_points(self.centers, self.rvec) - self.data = self.samples.dot(self.coeffs) - self.G = grad.calculate_gradients_ffd(self.samples, self.data) + self.output_set = sample.sample_set(self.output_dim) + self.input_set._values = grad.pick_ffd_points(self.input_set_centers, + self.rvec) + self.output_set._values = self.input_set._values.dot(self.coeffs) + self.input_set._jacobians = grad.calculate_gradients_ffd(self.input_set, + self.output_set) # Test the method returns the correct size tensor - self.assertEqual(self.G.shape, (self.num_centers, self.num_qois, - self.Lambda_dim)) + self.assertEqual(self.input_set._jacobians.shape, (self.num_centers, + self.output_dim, self.input_dim)) # Test that each vector is normalized - normG = np.linalg.norm(self.G, ord=1, axis=2) + normG = np.linalg.norm(self.input_set._jacobians, ord=1, axis=2) - # If its a zero vectors, make it the unit vector in Lambda_dim - self.G[normG==0] = 1.0/self.Lambda_dim - nptest.assert_array_almost_equal(np.linalg.norm(self.G, ord=1, axis=2), - np.ones((self.G.shape[0], self.G.shape[1]))) + # If its a zero vectors, make it the unit vector in input_dim + self.input_set._jacobians[normG==0] = 1.0/self.input_dim + nptest.assert_array_almost_equal(np.linalg.norm(\ + self.input_set._jacobians, ord=1, axis=2), + np.ones((self.input_set._jacobians.shape[0], + self.input_set._jacobians.shape[1]))) def test_calculate_gradients_cfd(self): """ Test :meth:`bet.sensitivity.gradients.calculate_gradients_cfd`. """ - self.samples = grad.pick_cfd_points(self.centers, self.rvec) - self.data = self.samples.dot(self.coeffs) - self.G = grad.calculate_gradients_cfd(self.samples, self.data) + self.output_set = sample.sample_set(self.output_dim) + self.input_set._values = grad.pick_cfd_points(self.input_set_centers, + self.rvec) + self.output_set._values = self.input_set._values.dot(self.coeffs) + self.input_set._jacobians = grad.calculate_gradients_cfd(self.input_set, + self.output_set) # Test the method returns the correct size tensor - self.assertEqual(self.G.shape, (self.num_centers, self.num_qois, - self.Lambda_dim)) + self.assertEqual(self.input_set._jacobians.shape, (self.num_centers, + self.output_dim, self.input_dim)) # Test that each vector is normalized - normG = np.linalg.norm(self.G, ord=1, axis=2) + normG = np.linalg.norm(self.input_set._jacobians, ord=1, axis=2) - # If its a zero vectors, make it the unit vector in Lambda_dim - self.G[normG==0] = 1.0/self.Lambda_dim - nptest.assert_array_almost_equal(np.linalg.norm(self.G, ord=1, axis=2), - np.ones((self.G.shape[0], self.G.shape[1]))) + # If its a zero vectors, make it the unit vector in input_dim + self.input_set._jacobians[normG==0] = 1.0/self.input_set._dim + nptest.assert_array_almost_equal(np.linalg.norm(\ + self.input_set._jacobians, ord=1, axis=2), + np.ones((self.input_set._jacobians.shape[0], + self.input_set._jacobians.shape[1]))) # Test the accuracy of the gradient approximation methods class GradientsAccuracy: @@ -195,244 +214,306 @@ def test_calculate_gradients_rbf_accuracy(self): """ Test :meth:`bet.sensitivity.gradients.calculate_gradients_rbf`. """ - self.G_nonlin = grad.calculate_gradients_rbf(self.samples_rbf, - self.data_nonlin_rbf, normalize=False) + self.input_set_rbf._jacobians = grad.calculate_gradients_rbf(\ + self.input_set_rbf, self.output_set_rbf, normalize=False) - nptest.assert_array_almost_equal(self.G_nonlin - self.G_exact, 0, decimal = 2) + nptest.assert_array_almost_equal(self.input_set_rbf._jacobians - \ + self.G_exact, 0, decimal = 2) def test_calculate_gradients_ffd_accuracy(self): """ Test :meth:`bet.sensitivity.gradients.calculate_gradients_ffd`. """ - self.G_nonlin = grad.calculate_gradients_ffd(self.samples_ffd, - self.data_nonlin_ffd, normalize=False) + self.input_set_ffd._jacobians = grad.calculate_gradients_ffd(\ + self.input_set_ffd, self.output_set_ffd, normalize=False) - nptest.assert_array_almost_equal(self.G_nonlin - self.G_exact, 0, decimal = 2) + nptest.assert_array_almost_equal(self.input_set_ffd._jacobians - \ + self.G_exact, 0, decimal = 2) def test_calculate_gradients_cfd_accuracy(self): """ Test :meth:`bet.sensitivity.gradients.calculate_gradients_cfd`. """ - self.G_nonlin = grad.calculate_gradients_cfd(self.samples_cfd, - self.data_nonlin_cfd, normalize=False) + self.input_set_cfd._jacobians = grad.calculate_gradients_cfd(\ + self.input_set_cfd, self.output_set_cfd, normalize=False) - nptest.assert_array_almost_equal(self.G_nonlin - self.G_exact, 0, decimal = 2) + nptest.assert_array_almost_equal(self.input_set_cfd._jacobians - \ + self.G_exact, 0, decimal = 2) # Test cases class test_1to20_1centers_unitsquare(GradientsMethods, unittest.TestCase): def setUp(self): - # Define the parameter space (Lambda) - self.Lambda_dim = 1 - self.lam_domain = np.zeros((self.Lambda_dim, 2)) - self.lam_domain[:,0] = np.zeros(self.Lambda_dim) - self.lam_domain[:,1] = np.ones(self.Lambda_dim) + # Define the input domain (Lambda) + self.input_dim = 1 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) - # Choose random centers to cluster points around + self.lam_domain = np.zeros((self.input_set._dim, 2)) + self.lam_domain[:,0] = np.zeros(self.input_set._dim) + self.lam_domain[:,1] = np.ones(self.input_set._dim) + + self.input_set._domain = self.lam_domain + self.input_set_centers._domain = self.lam_domain + + # Choose random centers in input_domian to cluster points around self.num_centers = 1 - np.random.seed(0) - self.centers = (self.lam_domain[:,1] - self.lam_domain[:,0]) * \ - np.random.random((self.num_centers,self.Lambda_dim)) + \ - self.lam_domain[:,0] - self.num_close = self.Lambda_dim + 1 + self.num_close = self.input_set._dim + 1 self.rvec = 0.1 + np.random.seed(0) + self.centers = np.random.uniform(self.lam_domain[:, 0], + self.lam_domain[:, 1] - self.lam_domain[:, 0], [self.num_centers, + self.input_set._dim]) + self.input_set_centers._values = self.centers # Choose array shapes for RBF methods np.random.seed(0) self.radii_rbf = np.random.random([self.num_close, self.num_close]) - self.radii_rbfdxi = np.random.random([self.Lambda_dim, self.num_close]) - self.dxi = np.random.random([self.Lambda_dim, self.num_close]) + self.radii_rbfdxi = np.random.random([self.input_dim, self.num_close]) + self.dxi = np.random.random([self.input_dim, self.num_close]) # Define example linear functions (QoIs) for gradient approximation # methods - self.num_qois = 20 - coeffs = np.random.random((self.Lambda_dim, - self.num_qois-self.Lambda_dim)) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 20 + coeffs = np.random.random((self.input_dim, + self.output_dim-self.input_dim)) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) class test_2to20_1centers_unitsquare(GradientsMethods, unittest.TestCase): def setUp(self): # Define the parameter space (Lambda) - self.Lambda_dim = 2 - self.lam_domain = np.zeros((self.Lambda_dim, 2)) - self.lam_domain[:,0] = np.zeros(self.Lambda_dim) - self.lam_domain[:,1] = np.ones(self.Lambda_dim) + self.input_dim = 2 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + + self.lam_domain = np.zeros((self.input_dim, 2)) + self.lam_domain[:,0] = np.zeros(self.input_dim) + self.lam_domain[:,1] = np.ones(self.input_dim) + + self.input_set._domain = self.lam_domain + self.input_set_centers._domain = self.lam_domain # Choose random centers to cluster points around self.num_centers = 1 np.random.seed(0) - self.centers = (self.lam_domain[:,1] - self.lam_domain[:,0]) * \ - np.random.random((self.num_centers,self.Lambda_dim)) + \ - self.lam_domain[:,0] - self.num_close = self.Lambda_dim + 1 - self.rvec = np.random.random(self.Lambda_dim) + self.centers = np.random.uniform(self.lam_domain[:, 0], + self.lam_domain[:, 1] - self.lam_domain[:, 0], [self.num_centers, + self.input_set._dim]) + self.input_set_centers._values = self.centers + self.num_close = self.input_dim + 1 + self.rvec = np.random.random(self.input_dim) # Choose array shapes for RBF methods np.random.seed(0) self.radii_rbf = np.random.random([self.num_close, self.num_close]) - self.radii_rbfdxi = np.random.random([self.Lambda_dim, self.num_close]) - self.dxi = np.random.random([self.Lambda_dim, self.num_close]) + self.radii_rbfdxi = np.random.random([self.input_dim, self.num_close]) + self.dxi = np.random.random([self.input_dim, self.num_close]) # Define example linear functions (QoIs) for gradient approximation # methods - self.num_qois = 20 - coeffs = np.random.random((self.Lambda_dim, - self.num_qois-self.Lambda_dim)) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 20 + coeffs = np.random.random((self.input_dim, + self.output_dim-self.input_dim)) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) class test_4to20_100centers_randomhyperbox(GradientsMethods, unittest.TestCase): def setUp(self): # Define the parameter space (Lambda) - self.Lambda_dim = 4 - self.lam_domain = np.zeros((self.Lambda_dim, 2)) + self.input_dim = 4 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + + self.lam_domain = np.zeros((self.input_dim, 2)) np.random.seed(0) - self.lam_domain[:,0] = np.random.random(self.Lambda_dim) - self.lam_domain[:,1] = np.random.random(self.Lambda_dim) + 2 + self.lam_domain[:,0] = np.random.random(self.input_dim) + self.lam_domain[:,1] = np.random.random(self.input_dim) + 2 + + self.input_set._domain = self.lam_domain + self.input_set_centers._domain = self.lam_domain # Choose random centers to cluster points around self.num_centers = 100 - self.centers = (self.lam_domain[:,1] - self.lam_domain[:,0]) * \ - np.random.random((self.num_centers,self.Lambda_dim)) + \ - self.lam_domain[:,0] - self.num_close = self.Lambda_dim + 1 + self.centers = np.random.uniform(self.lam_domain[:, 0], + self.lam_domain[:, 1] - self.lam_domain[:, 0], [self.num_centers, + self.input_set._dim]) + self.input_set_centers._values = self.centers + self.num_close = self.input_set._dim + 1 self.rvec = 0.1 # Choose array shapes for RBF methods np.random.seed(0) self.radii_rbf = np.random.random([self.num_close, self.num_close]) - self.radii_rbfdxi = np.random.random([self.Lambda_dim, self.num_close]) - self.dxi = np.random.random([self.Lambda_dim, self.num_close]) + self.radii_rbfdxi = np.random.random([self.input_dim, self.num_close]) + self.dxi = np.random.random([self.input_dim, self.num_close]) # Define example linear functions (QoIs) for gradient approximation # methods - self.num_qois = 20 - coeffs = np.random.random((self.Lambda_dim, - self.num_qois-self.Lambda_dim)) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 20 + coeffs = np.random.random((self.input_dim, + self.output_dim-self.input_dim)) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) class test_9to20_100centers_randomhyperbox(GradientsMethods, unittest.TestCase): def setUp(self): # Define the parameter space (Lambda) - self.Lambda_dim = 9 - self.lam_domain = np.zeros((self.Lambda_dim, 2)) + self.input_dim = 9 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + + self.lam_domain = np.zeros((self.input_dim, 2)) np.random.seed(0) - self.lam_domain[:,0] = np.random.random(self.Lambda_dim) - self.lam_domain[:,1] = np.random.random(self.Lambda_dim) + 2 + self.lam_domain[:,0] = np.random.random(self.input_dim) + self.lam_domain[:,1] = np.random.random(self.input_dim) + 2 + + self.input_set._domain = self.lam_domain + self.input_set_centers._domain = self.lam_domain # Choose random centers to cluster points around self.num_centers = 100 - self.centers = (self.lam_domain[:,1] - self.lam_domain[:,0]) * \ - np.random.random((self.num_centers,self.Lambda_dim)) + \ - self.lam_domain[:,0] - self.num_close = self.Lambda_dim + 1 + self.centers = np.random.uniform(self.lam_domain[:, 0], + self.lam_domain[:, 1] - self.lam_domain[:, 0], [self.num_centers, + self.input_set._dim]) + self.input_set_centers._values = self.centers + self.num_close = self.input_dim + 1 self.rvec = 0.1 # Choose array shapes for RBF methods np.random.seed(0) self.radii_rbf = np.random.random([self.num_close, self.num_close]) - self.radii_rbfdxi = np.random.random([self.Lambda_dim, self.num_close]) - self.dxi = np.random.random([self.Lambda_dim, self.num_close]) + self.radii_rbfdxi = np.random.random([self.input_dim, self.num_close]) + self.dxi = np.random.random([self.input_dim, self.num_close]) # Define example linear functions (QoIs) for gradient approximation # methods - self.num_qois = 20 - coeffs = np.random.random((self.Lambda_dim, - self.num_qois-self.Lambda_dim)) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 20 + coeffs = np.random.random((self.input_dim, + self.output_dim-self.input_dim)) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) class test_15to37_143centers_negrandomhyperbox(GradientsMethods, unittest.TestCase): def setUp(self): # Define the parameter space (Lambda) - self.Lambda_dim = 15 - self.lam_domain = np.zeros((self.Lambda_dim, 2)) + self.input_dim = 15 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + + self.lam_domain = np.zeros((self.input_dim, 2)) np.random.seed(0) - self.lam_domain[:,0] = -1*np.random.random(self.Lambda_dim) - 2 - self.lam_domain[:,1] = -1*np.random.random(self.Lambda_dim) + self.lam_domain[:,0] = -1*np.random.random(self.input_dim) - 2 + self.lam_domain[:,1] = -1*np.random.random(self.input_dim) + + self.input_set._domain = self.lam_domain + self.input_set_centers._domain = self.lam_domain # Choose random centers to cluster points around self.num_centers = 143 self.centers = (self.lam_domain[:,1] - self.lam_domain[:,0]) * \ - np.random.random((self.num_centers,self.Lambda_dim)) + \ + np.random.random((self.num_centers,self.input_dim)) + \ self.lam_domain[:,0] - self.num_close = self.Lambda_dim + 1 + self.input_set_centers._values = self.centers + self.num_close = self.input_dim + 1 self.rvec = 0.1 # Choose array shapes for RBF methods np.random.seed(0) self.radii_rbf = np.random.random([self.num_close, self.num_close]) - self.radii_rbfdxi = np.random.random([self.Lambda_dim, self.num_close]) - self.dxi = np.random.random([self.Lambda_dim, self.num_close]) + self.radii_rbfdxi = np.random.random([self.input_dim, self.num_close]) + self.dxi = np.random.random([self.input_dim, self.num_close]) # Define example linear functions (QoIs) for gradient approximation # methods - self.num_qois = 37 - coeffs = np.random.random((self.Lambda_dim, - self.num_qois-self.Lambda_dim)) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 37 + coeffs = np.random.random((self.input_dim, + self.output_dim-self.input_dim)) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) class test_9to30_100centers_randomhyperbox_zeroQoIs(GradientsMethods, unittest.TestCase): def setUp(self): # Define the parameter space (Lambda) - self.Lambda_dim = 9 - self.lam_domain = np.zeros((self.Lambda_dim, 2)) + self.input_dim = 9 + self.input_set = sample.sample_set(self.input_dim) + self.input_set_centers = sample.sample_set(self.input_dim) + + self.lam_domain = np.zeros((self.input_dim, 2)) np.random.seed(0) - self.lam_domain[:,0] = np.random.random(self.Lambda_dim) - self.lam_domain[:,1] = np.random.random(self.Lambda_dim) + 2 + self.lam_domain[:,0] = np.random.random(self.input_dim) + self.lam_domain[:,1] = np.random.random(self.input_dim) + 2 + + self.input_set._domain = self.lam_domain + self.input_set_centers._domain = self.lam_domain # Choose random centers to cluster points around self.num_centers = 100 self.centers = (self.lam_domain[:,1] - self.lam_domain[:,0]) * \ - np.random.random((self.num_centers,self.Lambda_dim)) + \ + np.random.random((self.num_centers,self.input_dim)) + \ self.lam_domain[:,0] - self.num_close = self.Lambda_dim + 1 - self.rvec = np.random.random(self.Lambda_dim) + self.input_set_centers._values = self.centers + self.num_close = self.input_dim + 1 + self.rvec = np.random.random(self.input_dim) # Choose array shapes for RBF methods np.random.seed(0) self.radii_rbf = np.random.random([self.num_close, self.num_close]) - self.radii_rbfdxi = np.random.random([self.Lambda_dim, self.num_close]) - self.dxi = np.random.random([self.Lambda_dim, self.num_close]) + self.radii_rbfdxi = np.random.random([self.input_dim, self.num_close]) + self.dxi = np.random.random([self.input_dim, self.num_close]) # Define example linear functions (QoIs) for gradient approximation # methods - self.num_qois = 30 - coeffs = np.zeros((self.Lambda_dim, 2*self.Lambda_dim)) - coeffs = np.append(coeffs, np.random.random((self.Lambda_dim, - self.num_qois-3*self.Lambda_dim)), axis=1) - self.coeffs = np.append(coeffs, np.eye(self.Lambda_dim), axis=1) + self.output_dim = 30 + coeffs = np.zeros((self.input_dim, 2 * self.input_dim)) + coeffs = np.append(coeffs, np.random.random((self.input_dim, + self.output_dim - 3 * self.input_dim)), axis=1) + self.coeffs = np.append(coeffs, np.eye(self.input_dim), axis=1) # Test cases for the gradient approximation accuracy class test_2to2_100centers_unitbox(GradientsAccuracy, unittest.TestCase): def setUp(self): # Define the parameter space (Lambda) - self.Lambda_dim = 2 - self.num_qois = 2 - self.lam_domain = np.zeros((self.Lambda_dim, 2)) - self.lam_domain[:,0] = np.zeros(self.Lambda_dim) - self.lam_domain[:,1] = np.ones(self.Lambda_dim) + self.input_dim = 2 + self.input_set_rbf = sample.sample_set(self.input_dim) + self.input_set_ffd = sample.sample_set(self.input_dim) + self.input_set_cfd = sample.sample_set(self.input_dim) + + self.input_set_centers = sample.sample_set(self.input_dim) + + self.output_dim = 2 + self.output_set_rbf = sample.sample_set(self.output_dim) + self.output_set_ffd = sample.sample_set(self.output_dim) + self.output_set_cfd = sample.sample_set(self.output_dim) + + self.lam_domain = np.zeros((self.input_dim, 2)) + self.lam_domain[:,0] = np.zeros(self.input_dim) + self.lam_domain[:,1] = np.ones(self.input_dim) + + self.input_set_rbf._domain = self.lam_domain + self.input_set_ffd._domain = self.lam_domain + self.input_set_cfd._domain = self.lam_domain # Choose random centers to cluster points around self.num_centers = 100 np.random.seed(0) self.centers = (self.lam_domain[:,1] - self.lam_domain[:,0]) * \ - np.random.random((self.num_centers,self.Lambda_dim)) + \ + np.random.random((self.num_centers,self.input_dim)) + \ self.lam_domain[:,0] - self.num_close = self.Lambda_dim + 1 - self.rvec = 0.01 * np.ones(self.Lambda_dim) + self.input_set_centers._values = self.centers + self.num_close = self.input_dim + 1 + self.rvec = 0.01 * np.ones(self.input_dim) - self.samples_rbf = grad.sample_l1_ball(self.centers, self.num_close, + self.input_set_rbf._values = grad.sample_l1_ball(self.input_set_centers, + self.num_close, self.rvec) + self.input_set_ffd._values = grad.pick_ffd_points(self.input_set_centers, + self.rvec) + self.input_set_cfd._values = grad.pick_cfd_points(self.input_set_centers, self.rvec) - self.samples_ffd = grad.pick_ffd_points(self.centers, self.rvec) - self.samples_cfd = grad.pick_cfd_points(self.centers, self.rvec) # Define a vector valued function f : [0,1]x[0,1] -> [x^2, y^2] def f(x): @@ -441,11 +522,11 @@ def f(x): f[:, 1] = x[:, 1]**2 return f - self.data_nonlin_rbf = f(self.samples_rbf) - self.data_nonlin_ffd = f(self.samples_ffd) - self.data_nonlin_cfd = f(self.samples_cfd) + self.output_set_rbf._values = f(self.input_set_rbf._values) + self.output_set_ffd._values = f(self.input_set_ffd._values) + self.output_set_cfd._values = f(self.input_set_cfd._values) - self.G_exact = np.zeros([self.num_centers, self.num_qois, - self.Lambda_dim]) + self.G_exact = np.zeros([self.num_centers, self.output_dim, + self.input_dim]) self.G_exact[:, 0, 0] = 2 * self.centers[:, 0] self.G_exact[:, 1, 1] = 2 * self.centers[:, 1]