diff --git a/examples/density_methods/nonlinearMap/myModel.py b/examples/density_methods/nonlinearMap/myModel.py index 6ec0c3b1..0ec1aa0c 100644 --- a/examples/density_methods/nonlinearMap/myModel.py +++ b/examples/density_methods/nonlinearMap/myModel.py @@ -73,4 +73,4 @@ def QoI_map(parameter_samples): def my_model(parameter_samples): QoI_samples = QoI_map(parameter_samples) - return QoI_samples + return QoI_samples \ No newline at end of file diff --git a/examples/measure_methods/FEniCS/Compute_Save_KL.py b/examples/measure_methods/FEniCS/Compute_Save_KL.py index 18be653f..6aeb183c 100644 --- a/examples/measure_methods/FEniCS/Compute_Save_KL.py +++ b/examples/measure_methods/FEniCS/Compute_Save_KL.py @@ -7,8 +7,19 @@ from projectKL import projectKL from poissonRandField import solvePoissonRandomField import scipy.io as sio -import sys +import argparse +# This has been changed from a function to a script which can be run in +# simple docker container. The function is defined below, but as a script, +# this file takes in-line arguments for numKL. Default value is 2. + +parser = argparse.ArgumentParser(description='Process some integers.') +parser.add_argument('Kval', type=int, nargs='?', default='2', + help='an integer for the number of KL terms') + +nK = parser.parse_args() +print(nK.Kval) +print("which is type: " +str(type(nK.Kval))) def computeSaveKL(numKL): ''' @@ -73,3 +84,8 @@ def computeSaveKL(numKL): kl_mdat['KL_eigen_vals'] = eigen_val sio.savemat("KL_expansion", kl_mdat) + + +# added code which runs KL function defined above with nK terms +computeSaveKL(nK.Kval) +print("Were the KL_expansions saved?") diff --git a/examples/measure_methods/FEniCS/Fenics_Docker_Example.ipynb b/examples/measure_methods/FEniCS/Fenics_Docker_Example.ipynb new file mode 100644 index 00000000..a64fc80f --- /dev/null +++ b/examples/measure_methods/FEniCS/Fenics_Docker_Example.ipynb @@ -0,0 +1,450 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: FEniCS with Docker\n", + "[Adapted from BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/FEniCS.html#fenicsexample)\n", + "\n", + "We will walk through the following example. This example will only run in serial using serial runs of a model (described below). If the user takes Steps (0)-(3) and runs them in a separate script, then the saved discretization object can be loaded into a different script containing Steps (4)-(5) (and optionally including Step (6)) to solve the stochastic inverse problem in parallel using BET. To see an example describing how to run multiple instances of the (serial) model with different parameters, see Example: Multiple Serial FEniCS for more information.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This example requires the following external packages not shipped with BET:\n", + "\n", + "* An installation of Docker\n", + "* A pulled image of FEniCS via Docker\n", + "\n", + "For more information on how to install docker on you computer, visit [the Docker website](https://www.docker.com/). Instructions on how to pull a working version of FEniCS via Docker can be [found here](http://fenics.readthedocs.io/projects/containers/en/latest/index.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This example generates samples for a KL expansion associated with a covariance defined by `cov` in computeSaveKL.py on an L-shaped mesh that defines the permeability field for a Poisson equation solved in myModel.py.\n", + "\n", + "The quantities of interest (QoI) are defined as two spatial averages of the solution to the PDE.\n", + "\n", + "The user defines the dimension of the parameter space (corresponding to the number of KL terms) and the number of samples in this space." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "Docker allows BET to utilize the FEniCS PDE solver without having to deal with several of the more arduous customizations required to set up FEniCS on the user's personal machine. In myModel_Interface.py, a Docker container is created and then all the FEniCS calculations are executed in the container, afterwhich the container is closed. Because of the portability and customizability of Docker containers, this example provides a framework for how BET could potentially be linked to all sorts of other computational models which require specific computational software.\n", + "\n", + "![Placeholder for actual picture visualizing BET / Docker / FEniCS container interaction](temp_pic.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Even though we are coupling to the state-of-the-art FEniCS code for solving a PDE, we again see that the actual process for solving the stochastic inverse problem is quite simple requiring a total of 5 steps with BET excluding any post-processing the user may want. In general the user will probably not write code with various options as was done here for pedagogical purposes. We break down the actual example included with BET step-by-step below, but first, to showcase the overall simplicitly, we show the “entire” code (omitting setting the environment, post-processing, and commenting) required for solving the stochastic inverse problem using some default options:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "sampler = bsam.sampler(my_model)\n", + "\n", + "num_KL_terms = 2\n", + "computeSaveKL(num_KL_terms)\n", + "input_samples = samp.sample_set(num_KL_terms)\n", + "KL_term_min = -3.0\n", + "KL_term_max = 3.0\n", + "input_samples.set_domain(np.repeat([[KL_term_min, KL_term_max]],\n", + " num_KL_terms,\n", + " axis=0))\n", + "input_samples = sampler.regular_sample_set(input_samples, num_samples_per_dim=[10, 10])\n", + "input_samples.estimate_volume_mc()\n", + "\n", + "my_discretization = sampler.compute_QoI_and_create_discretization(input_samples)\n", + "\n", + "param_ref = np.ones((1,num_KL_terms))\n", + "Q_ref = my_model(param_ref)\n", + "simpleFunP.regular_partition_uniform_distribution_rectangle_scaled(\n", + " data_set=my_discretization, Q_ref=Q_ref[0,:], rect_scale=0.1,\n", + " cells_per_dimension=3)\n", + "\n", + "calculateP.prob(my_discretization)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "## Step (0): Setting up the environment\n", + "Import the necessary modules:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import bet.calculateP.simpleFunP as simpleFunP\n", + "import bet.calculateP.calculateP as calculateP\n", + "import bet.postProcess.plotP as plotP\n", + "import bet.postProcess.plotDomains as plotD\n", + "import bet.sample as samp\n", + "import bet.sampling.basicSampling as bsam\n", + "import subprocess as sp\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (1): Define interface to the model\n", + "Import the Python script interface to the model using FEniCS that takes as input a numpy array of model input parameter samples, generated from the sampler (see below), creates or starts appropriate Docker container, and executes the FEniCS scripts to evaluate the model and generate QoI samples:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from myModel_Interface import bet_docker_interface" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the sampler that will be used to create the discretization object, which is the fundamental object used by BET to compute solutions to the stochastic inverse problem. The sampler and my_model is the interface of BET to the model, and it allows BET to create input/output samples of the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sampler = bsam.sampler(bet_docker_interface)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (2): Describe and sample the input space\n", + "We compute and save the KL expansion once so that this part, which can be computationally expensive, can be done just once and then commented out for future runs of the code using the same set of KL coefficients defining the parameter space." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Choose the number of KL terms\n", + "num_KL_terms = 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating a Docker Container and Running KL Expansion\n", + "\n", + "Since the KL expansion requires FEniCS, we can create a FEniCS docker container to execute the KL expansion script. After we create the container, we can run the script in the container just once." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following checks to make sure docker is installed and running on the user's computer by checking the Docker version. If it is not working, then it will raise an error." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# checks to make sure docker is installed and running correctly\n", + "try:\n", + " dockercommand = (\"docker version\")\n", + " print(sp.check_output(dockercommand.split(\" \")))\n", + "except:\n", + " print(\"\\n Error Running Docker: \\n Check that Docker \"+\n", + " \"is properly installed and currently running \\n\")\n", + " raise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following code creates the FEniCS Docker container. It shares the local directory as a \"volume\" with the container so that scripts in the current directory can be run inside the container and the results saved on the local machine. For details about the options and arguments, see the [Docker documentation](https://docs.docker.com/engine/reference/commandline/run/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note**: on Windows, make sure appropriate drives have been shared via Docker for Windows settings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# get working directory and define container name\n", + "localdirect = os.getcwd()\n", + "containername = (\"ComputeKL_FEniCS\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# docker create command string\n", + "dockercreate = (\"docker create -i --name \"+containername\n", + " + \" -w /home/fenics/shared\" # sets working directory\n", + " +\" -v \"+localdirect+\":/home/fenics/shared\" # share current dir.\n", + " +\" quay.io/fenicsproject/stable\") # name of parent image \n", + "\n", + "#print(dockercreate)\n", + "\n", + "# use subprocess to run command string and check output\n", + "out = sp.check_output(dockercreate.split(\" \"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we start the container and execute the script [Compute_Save_KL.py](Compute_Save_KL.py). Note we also add the optional argument \"num_KL_terms\" to the script execution. The last few lines stops the container. \n", + "\n", + "*Note: Removing the container is optional, but note that you cannot create two containers with the same name.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# name of the python script which runs the fenics model\n", + "fenics_script = (\"Compute_Save_KL.py \"+str(num_KL_terms))\n", + " \n", + "# starts container\n", + "dockerstart = (\"docker start \"+containername)\n", + "outstatus = sp.check_output(dockerstart.split(\" \"))\n", + "print(outstatus+\" container has started...\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# execute python script in FEniCS container\n", + "dockerexec = (\"docker exec \"+containername+\" python \"+fenics_script)\n", + "\n", + "outstatus = sp.Popen(dockerexec.split(\" \"),stdout=sp.PIPE)\n", + "print(outstatus.communicate()[0])\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# close docker container\n", + "dockerclose = (\"docker stop \"+containername)\n", + "outstatus = sp.check_output(dockerclose.split(\" \"))\n", + "print(outstatus+\" container has closed.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# remove docker container if needed\n", + "#dockerRemove = (\"docker rm \"+containername)\n", + "#outstatus = sp.check_output(dockerRemove.split(\" \"))\n", + "#print(outstatus+\" has been removed.\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The KL Expansion should now be computed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initializing the Sampler\n", + "Now we can initialize the parameter space and assume that any KL coefficient belongs to the interval [-3.0,3.0]:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "input_samples = samp.sample_set(num_KL_terms)\n", + "KL_term_min = -3.0\n", + "KL_term_max = 3.0\n", + "input_samples.set_domain(np.repeat([[KL_term_min, KL_term_max]],\n", + " num_KL_terms,\n", + " axis=0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (1):\n", + "Try with and without random sampling.\n", + "\n", + "If using **regular** sampling, try different numbers of samples per dimension (note that if `num_KL_terms` is not equal to 2, then the user needs to be careful using regular sampling):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "randomSampling = False\n", + "if randomSampling is True:\n", + " input_samples = sampler.random_sample_set('random', input_samples, \n", + " num_samples=1E2)\n", + "else:\n", + " input_samples = sampler.regular_sample_set(input_samples, \n", + " num_samples_per_dim=[10, 10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (2):\n", + "A standard Monte Carlo (MC) assumption is that every Voronoi cell has the same volume. If a regular grid of samples was used, then the standard MC assumption is true.\n", + "\n", + "See what happens if the MC assumption is not assumed to be true, and if different numbers of points are used to estimate the volumes of the Voronoi cells:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "MC_assumption = True\n", + "if MC_assumption is False:\n", + " input_samples.estimate_volume(n_mc_points=1E5)\n", + "else:\n", + " input_samples.estimate_volume_mc()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (3): Generate QoI samples\n", + "Create the discretization object holding all the input (parameter) samples and output (QoI) samples using the sampler:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_discretization = sampler.compute_QoI_and_create_discretization(\n", + " input_samples, savefile='FEniCS_Example.txt.gz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point, all of the model information has been extracted for BET (with the possibly exception of evaluating the model to generate a reference QoI datum or a distribution of the QoI), so the model is no longer required for evaluation. The user could do Steps (0)-(3) in a separate script, and then simply load the discretization object as part of a separate BET script that does the remaining steps. When the model is expensive to evaluate, this is an attractive option since we can now solve the stochastic inverse problem (with many different distributions defined on the data space) without ever having to re-solve the model (so long as we are happy with the resolution provided by the current discretization of the parameter and data spaces)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/measure_methods/FEniCS/fenicsCalculator.py b/examples/measure_methods/FEniCS/fenicsCalculator.py new file mode 100644 index 00000000..c0c5cd2e --- /dev/null +++ b/examples/measure_methods/FEniCS/fenicsCalculator.py @@ -0,0 +1,13 @@ +# Example from Fenics Project Tutorial: +# https://fenicsproject.org/pub/tutorial/sphinx1/._ftut1003.html + +import numpy as np +import scipy as sci +from myModel import my_model + +# loads parameter samples & saves output quantities of interest +in_samples = np.load("parameter_sample_values.npy") + +out_samples = my_model(in_samples) + +np.save("QoI_outsample_values",out_samples) diff --git a/examples/measure_methods/FEniCS/myModel_Interface.py b/examples/measure_methods/FEniCS/myModel_Interface.py new file mode 100644 index 00000000..8a53ef52 --- /dev/null +++ b/examples/measure_methods/FEniCS/myModel_Interface.py @@ -0,0 +1,81 @@ +import subprocess as sp +import numpy as np +import os + +def bet_docker_interface(parameter_samples): + + # saves parameter samples to a local file in *.npy format + np.save("parameter_sample_values",parameter_samples) + + # checks to make sure docker is installed and running correctly + try: + dockercommand = ("docker version") + sp.check_call(dockercommand.split(" ")) + except: + print("\n Error Running Docker: \n Check that Docker "+ + "is properly installed and currently running \n") + raise + + ##### + # checks that appropriate docker FEniCS container exists + # if the container does not exist, creates the container + + # gets list of all docker containers + dockercommand = ("docker ps -a") + containerlist = sp.check_output(dockercommand.split(" ")) + #print(containerlist) + + # The name of the FEniCS image: should be downloaded from FEniCS + # but creation of container will auto download if not downloaded already + imagename = ("quay.io/fenicsproject/stable") + + # container created by this interface + containername = ("BET_to_FEniCS") + + if not(containername in containerlist): + print("Container named '"+containername+"' not found." + +" Creating new Docker container...") + + # local directory name + localdirect = os.getcwd() + + # docker create command string + dockercreate = ("docker create -i --name "+containername + + " -w /home/fenics/shared" # sets working directory + +" -v "+localdirect+":/home/fenics/shared" # share current dir. + +" quay.io/fenicsproject/stable") # name of parent image + + # use subprocess to run command string and check output + sp.check_output(dockercreate.split(" ")) + + print("New container created named: "+containername) + + + #### + # Runs the FEniCS model using container + + # name of the python script which runs the fenics model + fenics_script = "fenicsCalculator.py" + + # starts container + dockerstart = ("docker start "+containername) + outstatus = sp.check_output(dockerstart.split(" ")) + print(outstatus+" container has started...") + + # execute python script in FEniCS container + dockerexec = ("docker exec "+containername+" python "+fenics_script) + outstatus = sp.Popen(dockerexec.split(" "),stdout=sp.PIPE) + print(outstatus.communicate()[0]) + + # close docker container + dockerclose = ("docker stop "+containername) + outstatus = sp.check_output(dockerclose.split(" ")) + print(outstatus+" container has closed.") + + # Load and save the quantity of interest + QoI_samples = np.load("QoI_outsample_values.npy") + + return QoI_samples + + + \ No newline at end of file diff --git a/examples/measure_methods/contaminantTransport/ConcentrationContaminant.ipynb b/examples/measure_methods/contaminantTransport/ConcentrationContaminant.ipynb new file mode 100644 index 00000000..62adb8d1 --- /dev/null +++ b/examples/measure_methods/contaminantTransport/ConcentrationContaminant.ipynb @@ -0,0 +1,520 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Concentration of Contaminant in Wells Based on Transport Parameters\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/contaminantTransport.html#contaminanttransport))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will walk through the following example. This example takes uniformly distributed samples of parameters and output data from a simple groundwater contaminant transport model, and calculates solutions to the stochastic inverse problem. The parameter domain is 5D, where the uncertain parameters are the x and y locations of the source, the horizontal dispersivity, the flow angle, and the contaminant flux. There are 11 measured QoIs in the data space available. By changing the choice of QoIs that we use to solve the stochastic inverse problem, we see the effect of geometric distinctness. Probabilities in the parameter space are calculated using the MC assumption. 1D and 2D marginals are calculated, smoothed, and plotted. The samples are then ranked by probability density and the volume of high-probability regions are calculated. Probabilistic predictions of other QoI are made." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import the necessary modules:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import bet.calculateP as calculateP\n", + "import bet.postProcess as postProcess\n", + "import bet.calculateP.simpleFunP as simpleFunP\n", + "import bet.calculateP.calculateP as calculateP\n", + "import bet.postProcess.plotP as plotP\n", + "import bet.postProcess.plotDomains as plotD\n", + "import bet.postProcess.postTools as postTools\n", + "import bet.sample as samp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import the samples, data, reference data, and reference parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "parameter_domain = np.loadtxt(\"files/lam_domain.txt.gz\") #parameter domain\n", + "parameter_dim = parameter_domain.shape[0]\n", + "\n", + "# Create input sample set\n", + "input_samples = samp.sample_set(parameter_dim)\n", + "input_samples.set_domain(parameter_domain)\n", + "input_samples.set_values(np.loadtxt(\"files/samples.txt.gz\"))\n", + "input_samples.estimate_volume_mc() # Use standard MC estimate of volumes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Choose which subset of the 11 QoIs to use for inversion:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "QoI_indices_observe = np.array([0,1,2,3])\n", + "output_samples = samp.sample_set(QoI_indices_observe.size)\n", + "output_samples.set_values(np.loadtxt(\"files/data.txt.gz\")[:,QoI_indices_observe])\n", + "\n", + "# Create discretization object\n", + "my_discretization = samp.discretization(input_sample_set=input_samples,\n", + " output_sample_set=output_samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Choose the bin ratio for the uniform output probability:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "bin_ratio = 0.25 #ratio of length of data region to invert" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the reference parameter and QoI values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "param_ref = np.loadtxt(\"files/lam_ref.txt.gz\") #reference parameter set\n", + "Q_ref = np.loadtxt(\"files/Q_ref.txt.gz\")[QoI_indices_observe] #reference QoI set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot 2D projections of the data domain:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plotD.scatter_rhoD(my_discretization, ref_sample=Q_ref, io_flag='output', showdim=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hack to refresh html after changes within notebook\n", + "import random\n", + "__counter__ = random.randint(0,2e9)\n", + "\n", + "# displays 2D Projections\n", + "from IPython.display import HTML, display\n", + "display(HTML(\"\"+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"
2D Data Projections
\" ))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user (1):\n", + "\n", + "Change the `QoI_indices` and note how it changes the plots of the data domain. The white points are ones with zero probability and the dark points are those with nonzero probability." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user (2):\n", + "\n", + "Try different ways of discretizing the probability measure on $\\mathcal{D}$ defined as a uniform probability measure on a rectangle (since $\\mathcal{D}$ is 2-dimensional)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* `regular_partition_uniform_distribution_rectangle_scaled` creates a uniform measure on a hyperbox with dimensions relative to the size of the circumscribed hyperbox of the set $\\mathcal{D}$ using the bin_ratio. A total of M samples are drawn within a slightly larger scaled hyperbox to discretize this measure defining M total generalized contour events in $\\Lambda$. The reason a slightly larger scaled hyperbox is used to draw the samples to discretize $\\mathcal{D}$ is because otherwise every generalized contour event will have non-zero probability which obviously defeats the purpose of “localizing” the probability within a subset of $\\mathcal{D}$.\n", + "\n", + "* `uniform_partition_uniform_distribution_rectangle_scaled` uses the same measure defined in the same way as `unif_unif`, but the difference is in the discretization which is on a regular grid defined by `cells_per_dimension`. If `cells_per_dimension = 1`, then the contour event corresponding to the entire support of $\\rho_\\mathcal{D}$ is approximated as a single event. This is done by carefully placing a regular 3x3 grid (since $\\dim(\\mathcal{D})=2$ in this case) of points in $\\mathcal{D}$ with the center point of the grid in the center of the support of the measure and the other points placed outside of the rectangle defining the support to define a total of 9 contour events with 8 of them having exactly zero probability." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a simple function approximation of the probablity measure on $\\mathcal{D}$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "deterministic_discretize_D = True\n", + "if deterministic_discretize_D == True:\n", + " simpleFunP.regular_partition_uniform_distribution_rectangle_scaled(data_set=my_discretization,\n", + " Q_ref=Q_ref,\n", + " rect_scale=0.25,\n", + " cells_per_dimension = 1)\n", + "else:\n", + " simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled(data_set=my_discretization,\n", + " Q_ref=Q_ref,\n", + " rect_scale=0.25,\n", + " M=50,\n", + " num_d_emulate=1E5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate probablities using the MC assumption:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "calculateP.prob(my_discretization)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user (3): Calculate 2D marginal probs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point, the only thing that should change in the `plotP.*` inputs should be either the `nbins` values or sigma (which influences the kernel density estimation with smaller values implying a density estimate that looks more like a histogram and larger values smoothing out the values more).\n", + "\n", + "There are ways to determine “optimal” smoothing parameters (e.g., see CV, GCV, and other similar methods), but we have not incorporated these into the code as lower-dimensional marginal plots have limited value in understanding the structure of a high dimensional non-parametric probability measure." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot the marginal probabilities:\n", + "> **Note**: *Error in the plot needs troubleshooting.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(bins, marginals2D) = plotP.calculate_2D_marginal_probs(my_discretization, nbins = 10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Smooth 2d marginals probs (optional):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "marginals2D = plotP.smooth_marginals_2D(marginals2D,bins, sigma=1.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot 2d marginals probs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plotP.plot_2D_marginal_probs(marginals2D, bins, my_discretization, filename = \"contaminant_map\",\n", + " plot_surface=False,\n", + " lam_ref = param_ref,\n", + " lambda_label=labels,\n", + " interactive=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate 1d marginal probs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(bins, marginals1D) = plotP.calculate_1D_marginal_probs(my_discretization, nbins = 20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Smooth 1d marginal probs (optional):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=1.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot 1d marginal probs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plotP.plot_1D_marginal_probs(marginals1D, bins, my_discretization,\n", + " filename = \"contaminant_map\",\n", + " interactive=False,\n", + " lam_ref=param_ref,\n", + " lambda_label=labels)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sort samples by highest probability density and take highest x percent:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "percentile = 1.0\n", + "# Sort samples by highest probability density and sample highest percentile percent samples\n", + "(num_samples, my_discretization_highP, indices)= postTools.sample_highest_prob(\n", + " percentile, my_discretization, sort=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print the number of these samples and the ratio of the volume they take up:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print (num_samples, np.sum(my_discretization_highP._input_sample_set.get_volumes()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user (4):\n", + "Notice how the marginal probabilites change with different choices of `QoI_indices`. Try choosing only 2 or 3, instead of 4, indices and notice the higher-dimensionality of the structure in the 2d marginals. Notice how some QoI concentrate the probability into smaller regions. These QoI are more geometrically distinct.\n", + "\n", + "Notice that the volume that the high-probability samples take up is smaller with more geometrically distinct QoIs.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user (5):\n", + "Change `percentile` to values between 1.0 and 0.0. Notice that while the region of nonzero probabibilty may have a significant volume, much of this volume contains relatively low probability. Change the value to 0.95, 0.9, 0.75, and 0.5 and notice the volume decrease significantly.\n", + "\n", + "Propogate highest probability part of the probability measure through a different QoI map:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "QoI_indices_predict = np.array([7])\n", + "output_samples_predict = samp.sample_set(QoI_indices_predict.size)\n", + "output_samples_predict.set_values(np.loadtxt(\"files/data.txt.gz\")[:,QoI_indices_predict])\n", + "output_samples_predict.set_probabilities(input_samples.get_probabilities())\n", + "\n", + "# Determine range of predictions and store as domain for plotting purposes\n", + "output_samples_predict.set_domain(output_samples_predict.get_bounding_box())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate and plot PDF of predicted QoI:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "(bins_pred, marginals1D_pred) = plotP.calculate_1D_marginal_probs(output_samples_predict,nbins = 20)\n", + "plotP.plot_1D_marginal_probs(marginals1D_pred, bins_pred, output_samples_predict,\n", + " filename = \"contaminant_prediction\", interactive=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hack to refresh html after changes within notebook\n", + "import random\n", + "__counter__ = random.randint(0,2e9)\n", + "\n", + "# Plot QoI\n", + "from IPython.display import HTML, display\n", + "display(HTML(\"\"% __counter__ ))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user (6):\n", + "Change the prediction QoI map. Compare to the reference values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/measure_methods/fromFile_ADCIRCMap/BET_Q_3D.py b/examples/measure_methods/fromFile_ADCIRCMap/BET_Q_3D.py index 5171c89f..195a7e94 100644 --- a/examples/measure_methods/fromFile_ADCIRCMap/BET_Q_3D.py +++ b/examples/measure_methods/fromFile_ADCIRCMap/BET_Q_3D.py @@ -71,10 +71,10 @@ def postprocess(station_nums, ref_num): postprocess(station_nums, ref_num) -station_nums = [0, 8, 6] # 1, 5, 12 +station_nums = [0, 8, 6] # 1, 9, 7 postprocess(station_nums, ref_num) -station_nums = [0, 8, 11] # 1, 5, 12 +station_nums = [0, 8, 11] # 1, 9, 12 postprocess(station_nums, ref_num) """ diff --git a/examples/measure_methods/fromFile_ADCIRCMap/BatchAdaptiveSampling-2to2.ipynb b/examples/measure_methods/fromFile_ADCIRCMap/BatchAdaptiveSampling-2to2.ipynb new file mode 100644 index 00000000..481f3f36 --- /dev/null +++ b/examples/measure_methods/fromFile_ADCIRCMap/BatchAdaptiveSampling-2to2.ipynb @@ -0,0 +1,486 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Batch Adaptive Sampling (2-to-2 example)\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/fromfile2D.html#fromfile2dexample))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">**Note:**\n", + "*This example shows how to generate adaptive samples in a specific way by implicitly defining an input event of interest. It does NOT show how to solve the stochastic inverse problem using these samples, which can be found by reading other examples. Thus, we only present the first few steps involved in discretizing the parameter and data spaces using a specific type of adaptive sampling. The user is referred to some other examples for filling in the remaining steps for solving the stochastic inverse problem following the construction of the adaptive samples.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will walk through the following [example](fromFile2D.py) that uses a linear interpolant of a 2-dimensional QoI map used to define a 2-dimensional data space. The parameter space in this example is also 2-dimensional.\n", + "\n", + "This example specifically demonstrates the adaptive generation of samples using a goal-oriented adaptive sampling algorithm. This example is based upon the results shown in Section 8.5 of the manuscript [Definition and solution of a stochastic inverse problem for the Manning’s n parameter field in hydrodynamic models](http://dx.doi.org/10.1016/j.advwatres.2015.01.011) where the QoI map is given by $Q(\\lambda) = (q_1(\\lambda), q_6(\\lambda))$. We refer the reader to that example for more information about the physical interpretation of the parameter and data space, as well as the physical locations of the observation stations defining the QoI map." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">**Note:**\n", + ">*In this example, we have used ADCIRC to generate data files based on a regular discretization of the parameter space whose sole purpose is to create an (accurate) surrogate QoI map defined as a piecewise linear interpolant. This is quite different from many of the other examples, but the use of the surrogate QoI map is immaterial. The user could also interface the sampler directly to ADCIRC, but this would require a copy of ADCIRC, the finite element mesh, and significant training on the use of this state-of-the-art shallow water equation code. The primary focus of this example is the generation of adaptive samples. If the user knows how to use the ADCIRC model, then the user may instead opt to significantly change Step (1) below to interface to ADCIRC instead of to our “model” defined in terms of the surrogate QoI map. Interfacing to ADCIRC directly would likely require the use of [PolyADCIRC](https://github.com/UT-CHG/PolyADCIRC).*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generating a single set of adaptive samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step (0): Setting up the environment\n", + "Import the necessary modules:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import bet.sampling.adaptiveSampling as asam\n", + "import bet.postProcess.plotDomains as pDom\n", + "import scipy.io as sio\n", + "from scipy.interpolate import griddata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step (1): Define the interface to the model and goal-oriented adaptive sampler\n", + "This is where we interface the adaptive sampler imported above to the model. In other examples, we have imported a Python interface to a computational model. In this example, we instead define the model as a (piecewise-defined) linear interpolant to the QoI map $Q(λ)=(q1(λ),q6(λ))Q(λ)=(q1(λ),q6(λ))$ using data read from a `.mat` [file](../matfiles/Q_2D.mat):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "station_nums = [0, 5] # 1, 6\n", + "mdat = sio.loadmat('../matfiles/Q_2D')\n", + "Q = mdat['Q']\n", + "Q = Q[:, station_nums]\n", + "# Create experiment model\n", + "points = mdat['points']\n", + "def model(inputs):\n", + " interp_values = np.empty((inputs.shape[0], Q.shape[1]))\n", + " for i in xrange(Q.shape[1]):\n", + " interp_values[:, i] = griddata(points.transpose(), Q[:, i],\n", + " inputs)\n", + " return interp_values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we use the adaptive sampler defined by `rhoD_kernel`, which requires an identification of a data distribution used to modify the transition kernel for input samples. The idea is to place more samples in the parameter space that correspond to a contour event of higher probability as specified by the data distribution `rho_D` shown below." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we create the `transition_set` with an initial step size ratio of 0.5 and a minimum, maximum step size ratio of `.5**5` and 1.0 respectively. Note that this algorithm only generates samples inside the parameter domain, `lam_domain` (see Step (2) below):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Create Transition Kernel\n", + "transition_set = asam.transition_set(.5, .5**5, 1.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we implicty designate a region of interest $\\Lambda_k = Q^{-1}(D_k)$ in $\\Lambda$ for some $D_k \\subset \\mathcal{D}$ through the use of the data distribution kernel. In this instance we choose our kernel $p_k(Q) = \\rho_\\mathcal{D}(Q)$, see `rhoD_kernel`.\n", + "\n", + "We choose some $\\lambda_{ref}$ and let $Q_{ref} = Q(\\lambda_{ref})$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "Q_ref = mdat['Q_true']\n", + "Q_ref = Q_ref[15, station_nums] # 16th/20" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We define a rectangle, $R_{ref} \\subset \\mathcal{D}$ centered at $Q(\\lambda_{ref})$ with sides 15% the length of $q_1$ and $q_6$. Set $\\rho_\\mathcal{D}(q) = \\dfrac{\\mathbf{1}_{R_{ref}}(q)}{||\\mathbf{1}_{R_{ref}}||}$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "bin_ratio = 0.15\n", + "bin_size = (np.max(Q, 0)-np.min(Q, 0))*bin_ratio\n", + "# Create kernel\n", + "maximum = 1/np.product(bin_size)\n", + "def rho_D(outputs):\n", + " rho_left = np.repeat([Q_ref-.5*bin_size], outputs.shape[0], 0)\n", + " rho_right = np.repeat([Q_ref+.5*bin_size], outputs.shape[0], 0)\n", + " rho_left = np.all(np.greater_equal(outputs, rho_left), axis=1)\n", + " rho_right = np.all(np.less_equal(outputs, rho_right),axis=1)\n", + " inside = np.logical_and(rho_left, rho_right)\n", + " max_values = np.repeat(maximum, outputs.shape[0], 0)\n", + " return inside.astype('float64')*max_values\n", + "\n", + "kernel_rD = asam.rhoD_kernel(maximum, rho_D)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The basic idea is that when the region of interest has been “found” by some sample in a chain, the transition set is modified by the adaptive sampler (it is made smaller) so that more samples are placed within this event of interest.\n", + "\n", + "Given a (M, mdim) data vector `rhoD_kernel` expects that `rho_D` will return a `ndarray` of shape (M,).\n", + "\n", + "Next, we create the `sampler`. This `sampler` will create 80 independent sampling chains that are each 125 samples long:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create sampler\n", + "chain_length = 125\n", + "num_chains = 80\n", + "num_samples = chain_length*num_chains\n", + "sampler = asam.sampler(num_samples, chain_length, model)\n", + "sample_save_file = 'sandbox2d'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">**Note:**\n", + "* *In the first line of code above change `chain_length` and `num_chains` to reduce the total number of forward solves.*\n", + "* *If `num_chains = 1` above, then this is no longer a “batch” sampling process where multiple chains are run simultaneously to “search for” the region of interest.*\n", + "* *Saves to `sandbox2d.mat`.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step (2) [and Step (3)]: Describe and (adaptively) sample the input (and output) space\n", + "The adaptive sampling of the input space requires feedback from the corresponding output samples, so the sets of samples are, in a sense, created simultaneously in order to define the discretization of the spaces used to solve the stochastic inverse problem. While this can always be the case, in other examples, we often sampled the input space completely in one step, and then propagated the samples through the model to generate the QoI samples in another step, and these two samples sets together were used to define the discretization object used to solve the stochastic inverse problem.\n", + "\n", + "The compact (bounded, finite-dimensional) paramter space for this example is:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "lam_domain = np.array([[.07, .15], [.1, .2]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We choose an initial sample type to seed the sampling chains, which in this case comes from using Latin-Hypercube sampling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "inital_sample_type = \"lhs\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we adaptively generate the samples using `generalized_chains()`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(my_disc, all_step_ratios) = sampler.generalized_chains(lam_domain,\n", + " transition_set, kernel_rD, sample_save_file, inital_sample_type)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### [Optional]:\n", + "We may choose to visualize the results by executing the following code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Read in points_ref and plot results\n", + "ref_sample = mdat['points_true']\n", + "ref_sample = ref_sample[5:7, 15]\n", + "\n", + "# Show the samples in the parameter space\n", + "pDom.scatter_rhoD(my_disc, rho_D=rho_D, ref_sample=ref_sample, io_flag='input')\n", + "# Show the corresponding samples in the data space\n", + "pDom.scatter_rhoD(my_disc, rho_D=rho_D, ref_sample=Q_ref, io_flag='output')\n", + "# Show the data domain that corresponds with the convex hull of samples in the\n", + "# parameter space\n", + "pDom.show_data_domain_2D(my_disc, Q_ref=Q_ref)\n", + "# Show multiple data domains that correspond with the convex hull of samples in\n", + "# the parameter space\n", + "pDom.show_data_domain_multi(my_disc, Q_ref=Q_ref, showdim='all')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hack to refresh html after changes within notebook\n", + "import random\n", + "__counter__ = random.randint(0,2e9)\n", + "\n", + "# displays saved visualizations\n", + "from IPython.display import HTML, display\n", + "display(HTML(\"\"+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"
Visualizations
\" ))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">**Note:**\n", + ">*The user could simply run the example [plotDomains2D.py](plotDomains2D.py) to see the results for a previously generated set of adaptive samples.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Steps (4)-(5) [user]: Defining and solving a stochastic inverse problem\n", + "In the call to `sampler.generalized_chains` above, a discretization object is created and saved. The user may wish to follow some of the other examples (e.g., [Example: Linear Map with Uniform Sampling](../linearMap/LinearMapExample.ipynb) or [Example: Nonlinear Map with Uniform Sampling](../nonlinearMap/NonLinearMapExample.ipynb)) along with the paper referenced above to describe a data distribution around a reference datum (Step (4)) and solve the stochastic inverse problem (Step (5)) using the adaptively generated discretization object by loading it from file. This can be done in a separate script (but do not forget to do Step (0) which sets up the environment before coding Steps (4) and (5))." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generating and comparing several sets of adaptive samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In some instances the user may want to generate and compare several sets of adaptive samples using a surrogate model to determine what the best kernel, transition set, number of generalized chains, and chain length are before adaptively sampling a more computationally expensive model. See [sandbox_test_2D.py](sandbox_test_2D.py). The set up in sandbox_test_2D.py is very similar to the set up in [fromFile2D.py](fromFile2D.py), which was examined in detail above, and is omitted for brevity." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note:**\n", + ">*If all of the above code in this notebook has been run, our current environment is already set up to compare several sets of adaptive samples and we can proceed with the following exploration.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can explore several types of kernels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "kernel_mm = asam.maxima_mean_kernel(np.array([Q_ref]), rho_D)\n", + "kernel_rD = asam.rhoD_kernel(maximum, rho_D)\n", + "kernel_m = asam.maxima_kernel(np.array([Q_ref]), rho_D)\n", + "kern_list = [kernel_mm, kernel_rD, kernel_m]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create sampler\n", + "chain_length = 125\n", + "num_chains = 80\n", + "num_samples = chain_length*num_chains\n", + "sampler = asam.sampler(num_samples, chain_length, model)\n", + "inital_sample_type = \"lhs\"\n", + "\n", + "# Get samples\n", + "# Run with varying kernels\n", + "gen_results = sampler.run_gen(kern_list, rho_D, maximum, lam_domain,\n", + " transition_set, sample_save_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can explore `transition_set` with various inital, minimum, and maximum step size ratios:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run with varying transition sets bounds\n", + "init_ratio = [0.1, 0.25, 0.5]\n", + "min_ratio = [2e-3, 2e-5, 2e-8]\n", + "max_ratio = [.5, .75, 1.0]\n", + "tk_results = sampler.run_tk(init_ratio, min_ratio, max_ratio, rho_D,\n", + " maximum, lam_domain, kernel_rD, sample_save_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can explore a single kernel with varying values of ratios for increasing and decreasing the step size (i.e. the size of the hyperrectangle to draw a new step from using a transition set):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "increase = [1.0, 2.0, 4.0]\n", + "decrease = [0.5, 0.5e2, 0.5e3]\n", + "tolerance = [1e-4, 1e-6, 1e-8]\n", + "incdec_results = sampler.run_inc_dec(increase, decrease, tolerance, rho_D,\n", + " maximum, lam_domain, transition_set, sample_save_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">**Note**: *The above examples just use a zip combination of the lists uses to define varying parameters for the kernels and transition sets. To explore the product of these lists you need to use numpy.meshgrid and numpy.ravel or a similar process.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">**Note:** *To compare the results in terms of yield or the total number of samples generated in the region of interest we could use `compare_yield` to display the results to screen, however this tool is being depreciated. Here compare_yield() simply would display to screen the `sample_quality` and `run_param` sorted by `sample_quality` and indexed by `sort_ind`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import bet.postProcess.postTools as ptools\n", + "# Compare the quality of several sets of samples\n", + "print \"Compare yield of sample sets with various kernels\"\n", + "ptools.compare_yield(gen_results[3], gen_results[2], gen_results[4])\n", + "print \"Compare yield of sample sets with various transition sets bounds\"\n", + "ptools.compare_yield(tk_results[3], tk_results[2], tk_results[4])\n", + "print \"Compare yield of sample sets with variouos increase/decrease ratios\"\n", + "ptools.compare_yield(incdec_results[3], incdec_results[2],incdec_results[4])\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/measure_methods/fromFile_ADCIRCMap/BatchAdaptiveSampling-3to3.ipynb b/examples/measure_methods/fromFile_ADCIRCMap/BatchAdaptiveSampling-3to3.ipynb new file mode 100644 index 00000000..e9b23942 --- /dev/null +++ b/examples/measure_methods/fromFile_ADCIRCMap/BatchAdaptiveSampling-3to3.ipynb @@ -0,0 +1,492 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Batch Adaptive Sampling (3-to-3 example)\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/fromfile3D.html#fromfile3dexample))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">**Note:** *This example shows how to generate adaptive samples in a specific way by implicitly defining an input event of interest. It does NOT show how to solve the stochastic inverse problem using these samples, which can be found by reading other examples. Thus, we only present the first few steps involved in discretizing the parameter and data spaces using a specific type of adaptive sampling. The user is referred to some other examples for filling in the remaining steps for solving the stochastic inverse problem following the construction of the adaptive samples.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will walk through the following [example](fromFile3D.py) that uses a linear interpolant of a 3-dimensional QoI map used to define a 3-dimensional data space. The parameter space is also 3-dimensional.\n", + "\n", + "This example specifically demonstrates the adaptive generation of samples using a goal-oriented adaptive sampling algorithm. This example is based upon the results shown in Section 8.6 of the manuscript [Definition and solution of a stochastic inverse problem for the Manning’s n parameter field in hydrodynamic models](http://dx.doi.org/10.1016/j.advwatres.2015.01.011) where the QoI map is given by $Q(\\lambda) = (q_1(\\lambda), q_5(\\lambda), q_2(\\lambda))$. We refer the reader to that example for more information about the physical interpretation of the parameter and data space, as well as the physical locations of the observation stations defining the QoI map." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note:** *In this example, we have used ADCIRC to generate data files based on a regular discretization of the parameter space whose sole purpose is to create an (accurate) surrogate QoI map defined as a piecewise linear interpolant. This is quite different from many of the other examples, but the use of the surrogate QoI map is immaterial. The user could also interface the sampler directly to ADCIRC, but this would require a copy of ADCIRC, the finite element mesh, and significant training on the use of this state-of-the-art shallow water equation code. The primary focus of this example is the generation of adaptive samples. If the user knows how to use the ADCIRC model, then the user may instead opt to significantly change Step (1) below to interface to ADCIRC instead of to our “model” defined in terms of the surrogate QoI map. Interfacing to ADCIRC directly would likely require the use of [PolyADCIRC](https://github.com/UT-CHG/PolyADCIRC).*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note:** *This example is very similar to [Example: Batch Adaptive Sampling (2-to-2 example)](BatchAdaptiveSampling-2to2.ipynb) which involved a 2-to-2 map. The user may want to modify either example to involve fewer QoI’s in the map (e.g., defining a 2-to-1 or 3-to-2 or 3-to-1 map). The example discussed in Section 8.6 of the paper referenced above discusses that the results for solving the stochastic inverse problem using a 3-to-3 map are almost identical to those using a 3-to-2 map.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generating a single set of adaptive samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step (0): Setting up the environment\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import the necessary modules:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import bet.sampling.adaptiveSampling as asam\n", + "import bet.postProcess.plotDomains as pDom\n", + "import scipy.io as sio\n", + "from scipy.interpolate import griddata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step (1): Define the interface to the model and goal-oriented adaptive sampler\n", + "\n", + "This is where we interface the adaptive sampler imported above to the model. In other examples, we have imported a Python interface to a computational model. In this example, we instead define the model as a (piecewise-defined) linear interpolant to the QoI map $Q(\\lambda) = (q_1(\\lambda), q_5(\\lambda), q_2(\\lambda))$ using data read from a `.mat` [file](../matfiles/Q_3D.mat):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "station_nums = [0, 4, 1] # 1, 5, 2\n", + "mdat = sio.loadmat('../matfiles/Q_3D')\n", + "Q = mdat['Q']\n", + "Q = Q[:, station_nums]\n", + "# Create experiment model\n", + "points = mdat['points']\n", + "def model(inputs):\n", + " interp_values = np.empty((inputs.shape[0], Q.shape[1]))\n", + " for i in xrange(Q.shape[1]):\n", + " interp_values[:, i] = griddata(points.transpose(), Q[:, i],\n", + " inputs)\n", + " return interp_values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we use the adaptive sampler defined by `rhoD_kernel`, which requires an identification of a data distribution used to modify the transition kernel for input samples. The idea is to place more samples in the parameter space that correspond to a contour event of higher probability as specified by the data distribution `rho_D` shown below.\n", + "\n", + "First, we create the `transition_set` with an initial step size ratio of 0.5 and a minimum, maximum step size ratio of `.5**5` and 1.0 respectively. Note that this algorithm only generates samples inside the parameter domain, `lam_domain` (see Step (2) below):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Create Transition Kernel\n", + "transition_set = asam.transition_set(.5, .5**5, 1.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we implicty designate a region of interest $\\Lambda_k = Q^{-1}(D_k)$ in $\\Lambda$ for some $D_k \\subset \\mathcal{D}$ through the use of the data distribution kernel. In this instance we choose our kernel $p_k(Q) = \\rho_\\mathcal{D}(Q)$, see `rhoD_kernel`.\n", + "\n", + "We choose some $\\lambda_{ref}$ and let $Q_{ref} = Q(\\lambda_{ref})$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "Q_ref = mdat['Q_true']\n", + "Q_ref = Q_ref[14, station_nums] # 15th/20" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We define a 3-D box, $R_{ref} \\subset \\mathcal{D}$ centered at $Q(\\lambda_{ref})$ with sides 15% the length of $q_1$, $q_5$, and $q_2$. Set $\\rho_\\mathcal{D}(q) = \\dfrac{\\mathbf{1}_{R_{ref}}(q)}{||\\mathbf{1}_{R_{ref}}||}$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "bin_ratio = 0.15\n", + "bin_size = (np.max(Q, 0)-np.min(Q, 0))*bin_ratio\n", + "# Create kernel\n", + "maximum = 1/np.product(bin_size)\n", + "def rho_D(outputs):\n", + " rho_left = np.repeat([Q_ref-.5*bin_size], outputs.shape[0], 0)\n", + " rho_right = np.repeat([Q_ref+.5*bin_size], outputs.shape[0], 0)\n", + " rho_left = np.all(np.greater_equal(outputs, rho_left), axis=1)\n", + " rho_right = np.all(np.less_equal(outputs, rho_right),axis=1)\n", + " inside = np.logical_and(rho_left, rho_right)\n", + " max_values = np.repeat(maximum, outputs.shape[0], 0)\n", + " return inside.astype('float64')*max_values\n", + "\n", + "kernel_rD = asam.rhoD_kernel(maximum, rho_D)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The basic idea is that when the region of interest has been “found” by some sample in a chain, the transition set is modified by the adaptive sampler (it is made smaller) so that more samples are placed within this event of interest.\n", + "\n", + "Given a (M, mdim) data vector `rhoD_kernel` expects that `rho_D` will return a ndarray of shape (M,).\n", + "\n", + "Next, we create the `sampler`. This `sampler` will create 80 independent sampling chains that are each 125 samples long:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Create sampler\n", + "chain_length = 125\n", + "num_chains = 80\n", + "num_samples = chain_length*num_chains\n", + "sampler = asam.sampler(num_samples, chain_length, model)\n", + "sample_save_file = 'sandbox3d'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note:**\n", + "* *In the first line of code above, change `chain_length` and `num_chains` to reduce the total number of forward solves.*\n", + "* *If `num_chains = 1` above, then this is no longer a “batch” sampling process where multiple chains are run simultaneously to “search for” the region of interest.*\n", + "* *Saves to `sandbox3d.mat`.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step (2) [and Step (3)]: Describe and (adaptively) sample the input (and output) space\n", + "The adaptive sampling of the input space requires feedback from the corresponding output samples, so the sets of samples are, in a sense, created simultaneously in order to define the discretization of the spaces used to solve the stochastic inverse problem. While this can always be the case, in other examples, we often sampled the input space completely in one step, and then propagated the samples through the model to generate the QoI samples in another step, and these two samples sets together were used to define the discretization object used to solve the stochastic inverse problem.\n", + "\n", + "The compact (bounded, finite-dimensional) paramter space for this example is:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "lam_domain = np.array([[-900, 1500], [.07, .15], [.1, .2]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We choose an initial sample type to seed the sampling chains, which in this case comes from using Latin-Hypercube sampling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "inital_sample_type = \"lhs\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we adaptively generate the samples using `generalized_chains()`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "(my_disc, all_step_ratios) = sampler.generalized_chains(lam_domain,\n", + " transition_set, kernel_rD, sample_save_file, inital_sample_type)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**[Optional]:**\n", + "\n", + "We may choose to visualize the results by executing the following code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Read in points_ref and plot results\n", + "ref_sample = mdat['points_true']\n", + "ref_sample = ref_sample[:, 14]\n", + "\n", + "# Show the samples in the parameter space\n", + "pDom.scatter_rhoD(my_disc, rho_D=rho_D, ref_sample=ref_sample, io_flag='input')\n", + "# Show the corresponding samples in the data space\n", + "pDom.scatter_rhoD(my_disc, rho_D=rho_D, ref_sample=Q_ref, io_flag='output')\n", + "# Show the data domain that corresponds with the convex hull of samples in the\n", + "# parameter space\n", + "pDom.show_data_domain_2D(my_disc, Q_ref=Q_ref)\n", + "\n", + "# Show multiple data domains that correspond with the convex hull of samples in\n", + "# the parameter space\n", + "pDom.show_data_domain_multi(my_disc, Q_ref=Q_ref, showdim='all')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hack to refresh html after changes within notebook\n", + "import random\n", + "__counter__ = random.randint(0,2e9)\n", + "\n", + "# displays saved visualizations\n", + "from IPython.display import HTML, display\n", + "display(HTML(\"\"+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"
Visualizations
\" ))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note:**\n", + "> *The user could simply run the example [plotDomains3D.py](plotDomains3D.py) to see the results for a previously generated set of adaptive samples.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Steps (4)-(5) [user]: Defining and solving a stochastic inverse problem\n", + "In the call to `sampler.generalized_chains` above, a discretization object is created and saved. The user may wish to follow some of the other examples (e.g., Example: Linear Map with Uniform Sampling or Example: Nonlinear Map with Uniform Sampling) along with the paper referenced above to describe a data distribution around a reference datum (Step (4)) and solve the stochastic inverse problem (Step (5)) using the adaptively generated discretization object by loading it from file. This can be done in a separate script (but do not forget to do Step (0) which sets up the environment before coding Steps (4) and (5))." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generating and comparing several sets of adaptive samples\n", + "In some instances the user may want to generate and compare several sets of adaptive samples using a surrogate model to determine what the best kernel, transition set, number of generalized chains, and chain length are before adaptively sampling a more computationally expensive model. See [sandbox_test_3D.py](sandbox_test_3D.py). The set up in sandbox_test_3D.py is very similar to the set up in [fromFile3D](fromFile3D.py) and is omitted for brevity." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note:**\n", + ">*If all of the above code in this notebook has been run, our current environment is already set up to compare several sets of adaptive samples and we can proceed with the following exploration.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can explore several types of kernels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kernel_mm = asam.maxima_mean_kernel(np.array([Q_ref]), rho_D)\n", + "kernel_rD = asam.rhoD_kernel(maximum, rho_D)\n", + "kernel_m = asam.maxima_kernel(np.array([Q_ref]), rho_D)\n", + "heur_list = [kernel_mm, kernel_rD, kernel_m]\n", + "\n", + "\n", + "# Set minima and maxima\n", + "param_domain = np.array([[-900, 1500], [.07, .15], [.1, .2]])\n", + "lam3 = 0.012\n", + "xmin = 1420\n", + "xmax = 1580\n", + "ymax = 1500\n", + "wall_height = -2.5\n", + "\n", + "# Get samples\n", + "# Run with varying kernels\n", + "gen_results = sampler.run_gen(heur_list, rho_D, maximum, param_domain,\n", + " transition_set, sample_save_file) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can explore `transition_set` with various inital, minimum, and maximum step size ratios:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run with varying transition sets bounds\n", + "init_ratio = [0.1, 0.25, 0.5]\n", + "min_ratio = [2e-3, 2e-5, 2e-8]\n", + "max_ratio = [.5, .75, 1.0]\n", + "tk_results = sampler.run_tk(init_ratio, min_ratio, max_ratio, rho_D,\n", + " maximum, param_domain, kernel_rD, sample_save_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can explore a single kernel with varying values of ratios for increasing and decreasing the step size (i.e. the size of the hyperrectangle to draw a new step from using a transition set):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "increase = [1.0, 2.0, 4.0]\n", + "decrease = [0.5, 0.5e2, 0.5e3]\n", + "tolerance = [1e-4, 1e-6, 1e-8]\n", + "incdec_results = sampler.run_inc_dec(increase, decrease, tolerance, rho_D,\n", + " maximum, param_domain, transition_set, sample_save_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note:**\n", + "> *The above examples just use a zip combination of the lists uses to define varying parameters for the kernels and transition sets. To explore the product of these lists you need to use numpy.meshgrid and numpy.ravel or a similar process.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">**Note:** *To compare the results in terms of yield or the total number of samples generated in the region of interest we could use `compare_yield` to display the results to screen, however this tool is being depreciated. Here compare_yield() simply would display to screen the `sample_quality` and `run_param` sorted by `sample_quality` and indexed by `sort_ind`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import bet.postProcess.postTools as ptools\n", + "# Compare the quality of several sets of samples\n", + "print \"Compare yield of sample sets with various kernels\"\n", + "ptools.compare_yield(gen_results[3], gen_results[2], gen_results[4])\n", + "print \"Compare yield of sample sets with various transition sets bounds\"\n", + "ptools.compare_yield(tk_results[3], tk_results[2], tk_results[4])\n", + "print \"Compare yield of sample sets with variouos increase/decrease ratios\"\n", + "ptools.compare_yield(incdec_results[3], incdec_results[2],incdec_results[4])\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/measure_methods/fromFile_ADCIRCMap/EstimatingParameterSpace.ipynb b/examples/measure_methods/fromFile_ADCIRCMap/EstimatingParameterSpace.ipynb new file mode 100644 index 00000000..872578a5 --- /dev/null +++ b/examples/measure_methods/fromFile_ADCIRCMap/EstimatingParameterSpace.ipynb @@ -0,0 +1,700 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Estimate the parameter space probabiliy density with a 1D data space\n", + "\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/Q_1D.html#q1d))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example the parameter space $\\Lambda \\subset \\mathbb{R}^2$ is 2 dimensional. This example demostrates three different methods to estimate $\\hat{\\rho}_{\\Lambda, j}$ where\n", + "$$P_\\Lambda \\approx \\sum_{\\mathcal{V}_j \\subset A} \\hat{\\rho}_{\\Lambda, j}.$$\n", + "\n", + "These methods are distinguished primarily by the way $\\mathcal{V}_j$ are defined and the approximation of the volume of $\\mathcal{V}_j$. See [Q_1D.py](Q_1D.py) for the example source code.\n", + "\n", + "First, import the necessary packages and modules:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import bet.sampling.basicSampling as bsam\n", + "import bet.calculateP.calculateP as calcP\n", + "import bet.calculateP.simpleFunP as sfun\n", + "import numpy as np\n", + "import scipy.io as sio\n", + "import bet.sample as sample" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the data where our parameter space is 2-dimensional and load a reference solution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import \"Truth\" (reference solution)\n", + "mdat = sio.loadmat('../matfiles/Q_2D')\n", + "Q = mdat['Q']\n", + "Q_ref = mdat['Q_true']\n", + "\n", + "# Import Data \n", + "points = mdat['points']\n", + "lam_domain = np.array([[0.07, .15], [0.1, 0.2]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will use the to the `points`, $\\lambda_{samples} = \\{ \\lambda^{(j) } \\}, j = 1, \\ldots, N$, to create an input `sample_set` object. These `points` are the points in parameter space where we solve the forward model to generate the data `Q` where $Q_j = Q(\\lambda^{(j)})$.\n", + "\n", + "Define the parameter domain $\\Lambda$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "lam_domain = np.array([[0.07, .15], [0.1, 0.2]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create input sample set objects:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "input_sample_set = sample.sample_set(points.shape[0])\n", + "input_sample_set.set_values(points.transpose())\n", + "input_sample_set.set_domain(lam_domain)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Methods for approximating $\\hat{\\rho}_{\\Lambda, j}$\n", + "For ease of use we have created a function, `postprocess(station_nums, ref_num)` so that we can loop through different QoI (maximum water surface height at various measurement stations) and reference solutions (point in data space around which we center a uniform probability solution. The three methods for approximating $\\hat{\\rho}_{\\Lambda, j}$ are combined in the postprocessing function. \n", + "\n", + "The function is defined as follows:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "def postprocess(station_nums, ref_num):\n", + "```\n", + "\n", + "Define the filename to save $\\hat{\\rho}_{\\Lambda, j}$ to:\n", + "\n", + "```python\n", + " filename = 'P_q'+str(station_nums[0]+1)+'_q'\n", + " if len(station_nums) == 3:\n", + " filename += '_q'+str(station_nums[2]+1)\n", + " filename += '_truth_'+str(ref_num+1)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the data space $\\mathcal{D} \\subset \\mathbb{R}^d$ where $d$ is the dimension of the data space:\n", + "\n", + "```python\n", + " data = Q[:, station_nums]\n", + " output_sample_set = sample.sample_set(data.shape[1])\n", + " output_sample_set.set_values(data)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the refernce solution. We define a region of interest, $R_{ref} \\subset \\mathcal{D}$ centered at $Q_{ref}$ that is 15% the length of $q_n$ (the QoI for station $n$). We set $\\rho_\\mathcal{D}(q) = \\dfrac{\\mathbf{1}_{R_{ref}}(q)}{||\\mathbf{1}_{R_{ref}}||}$ and then create a simple function approximation to this density:\n", + "\n", + "```python\n", + " q_ref = Q_ref[ref_num, station_nums]\n", + " output_probability_set = sfun.regular_partition_uniform_distribution_rectangle_scaled(\\\n", + " output_sample_set, q_ref, rect_scale=0.15,\n", + " cells_per_dimension=np.ones((data.shape[1],)))\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We generate 1e6 uniformly distributed points in $\\Lambda$. We call these points $\\lambda_{emulate} = \\{ \\lambda_j \\}_{j=1}^{10^6}$:\n", + "\n", + "```python\n", + " num_l_emulate = 1e4\n", + " set_emulated = bsam.random_sample_set('r', lam_domain, num_l_emulate)\n", + " my_disc = sample.discretization(input_sample_set, output_sample_set,\n", + " output_probability_set, emulated_input_sample_set=set_emulated)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Estimation Method 1:** Calculate $\\hat{\\rho}_{\\Lambda, j}$ where $\\mathcal{V}_j$ are the voronoi cells defined by $\\lambda_{emulate}$:\n", + "\n", + "```python\n", + " calcP.prob_on_emulated_samples(my_disc)\n", + " sample.save_discretization(my_disc, filename, \"prob_on_emulated_samples_solution\")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Estimation Method 2:** Calculate $\\hat{\\rho}_{\\Lambda, j}$ where $\\mathcal{V}_j$ are the voronoi cells defined by $\\lambda_{samples}$ assume that $\\lambda_{samples}$ are uniformly distributed and therefore have approximately the same volume:\n", + "\n", + "```python\n", + " input_sample_set.estimate_volume_mc()\n", + " calcP.prob(my_disc)\n", + " sample.save_discretization(my_disc, filename, \"prob_solution\")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Estimation Method 3:** Calculate $\\hat{\\rho}_{\\Lambda, j}$ where $\\mathcal{V}_j$ are the voronoi cells defined by $\\lambda_{samples}$ and we approximate the volume of $\\mathcal{V}_j$ using Monte Carlo integration. We use $\\lambda_{emulate}$ to estimate the volume of $\\mathcal{V}_j$.\n", + "\n", + "```python\n", + " calcP.prob_with_emulated_volumes(my_disc)\n", + " sample.save_discretization(my_disc, filename, \"prob_with_emulated_volumes_solution\")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Putting the above pieces together, the function `postprocess(station_nums, ref_num)` will be written as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def postprocess(station_nums, ref_num):\n", + " \n", + " filename = 'P_q'+str(station_nums[0]+1)+'_q'\n", + " if len(station_nums) == 3:\n", + " filename += '_q'+str(station_nums[2]+1)\n", + " filename += '_ref_'+str(ref_num+1)\n", + "\n", + " data = Q[:, station_nums]\n", + " output_sample_set = sample.sample_set(data.shape[1])\n", + " output_sample_set.set_values(data)\n", + " q_ref = Q_ref[ref_num, station_nums]\n", + "\n", + " # Create Simple function approximation\n", + " # Save points used to parition D for simple function approximation and the\n", + " # approximation itself (this can be used to make close comparisions...)\n", + " output_probability_set = sfun.regular_partition_uniform_distribution_rectangle_scaled(\\\n", + " output_sample_set, q_ref, rect_scale=0.15,\n", + " cells_per_dimension=np.ones((data.shape[1],)))\n", + "\n", + " num_l_emulate = 1e4\n", + " set_emulated = bsam.random_sample_set('r', lam_domain, num_l_emulate)\n", + " my_disc = sample.discretization(input_sample_set, output_sample_set,\n", + " output_probability_set, emulated_input_sample_set=set_emulated)\n", + "\n", + " print \"Finished emulating lambda samples\"\n", + "\n", + " # Calculate P on lambda emulate\n", + " print \"Calculating prob_on_emulated_samples\"\n", + " calcP.prob_on_emulated_samples(my_disc)\n", + " sample.save_discretization(my_disc, filename, \"prob_on_emulated_samples_solution\")\n", + "\n", + " # Calclate P on the actual samples with assumption that voronoi cells have\n", + " # equal size\n", + " input_sample_set.estimate_volume_mc()\n", + " print \"Calculating prob\"\n", + " calcP.prob(my_disc)\n", + " sample.save_discretization(my_disc, filename, \"prob_solution\")\n", + "\n", + " # Calculate P on the actual samples estimating voronoi cell volume with MC\n", + " # integration\n", + " calcP.prob_with_emulated_volumes(my_disc)\n", + " print \"Calculating prob_with_emulated_volumes\"\n", + " sample.save_discretization(my_disc, filename, \"prob_with_emulated_volumes_solution\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, having defined our postprocessing function, we calculate $\\hat{\\rho}_{\\Lambda, j}$ for three reference solutions and 3 QoI:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ref_nums = [6, 11, 15] # 7, 12, 16\n", + "stations = [1, 4, 5] # 2, 5, 6\n", + "\n", + "ref_nums, stations = np.meshgrid(ref_nums, stations)\n", + "ref_nums = ref_nums.ravel()\n", + "stations = stations.ravel()\n", + "\n", + "for tnum, stat in zip(ref_nums, stations):\n", + " postprocess([0], tnum)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Estimate the parameter space probabiliy density with a 2D data space\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/Q_2D.html))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example the parameter space $\\Lambda \\subset \\mathbb{R}^2$ is 2 dimensional. This example demostrates three different methods to estimate $\\hat{\\rho}_{\\Lambda, j}$ where\n", + "$$P_\\Lambda \\approx \\sum_{\\mathcal{V}_j \\subset A} \\hat{\\rho}_{\\Lambda, j}.$$\n", + "\n", + "These methods are distinguished primarily by the way $\\mathcal{V}_j$ are defined and the approximation of the volume of $\\mathcal{V}_j$. See [Q_2D.py](Q_2D.py) for the example source code. Since this example is essentially the same as the previous example in this notebook that estimates the parameter space probabiliy density with a 1D data space we will only highlight the differences between the two.\n", + "\n", + ">**Note:** *If the code from the previous example above has already been run, then the majority of environment has already been defined and the following code excerpts can be run as written.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, redefine the input sample set, here it is 2D rather than 1D:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Import \"Truth\"\n", + "mdat = sio.loadmat('../matfiles/Q_2D')\n", + "Q = mdat['Q']\n", + "Q_ref = mdat['Q_true']\n", + "\n", + "# Import Data\n", + "points = mdat['points']\n", + "lam_domain = np.array([[0.07, .15], [0.1, 0.2]]) # Note this is now 2D\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "input_sample_set = sample.sample_set(points.shape[0])\n", + "input_sample_set.set_values(points.transpose())\n", + "input_sample_set.set_domain(lam_domain)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Edit the postprocessing function, `postprocess(station_nums, ref_num)`, defined earlier in the following ways.\n", + "\n", + "First, change the save filename for the estimates of $\\hat{\\rho}_{\\Lambda, j}$:\n", + "\n", + "```python\n", + " filename = 'P_q'+str(station_nums[0]+1)+'_q'+str(station_nums[1]+1)\n", + " if len(station_nums) == 3:\n", + " filename += '_q'+str(station_nums[2]+1)\n", + " filename += '_truth_'+str(ref_num+1)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the data space $\\mathcal{D} \\subset \\mathbb{R}^d$ where $d$ is the dimension of the data space:\n", + "\n", + "```python\n", + " data = Q[:, station_nums]\n", + " output_sample_set = sample.sample_set(data.shape[1])\n", + " output_sample_set.set_values(data)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the refernce solution. We define a region of interest, $R_{ref} \\subset \\mathcal{D}$ centered at $Q_{ref}$ with sides 15% the length of $q_{station\\_num[0]}$ and $q_{station\\_num[1]}$ (the QoI for stations $n$). We set $\\rho_\\mathcal{D}(q) = \\dfrac{\\mathbf{1}_{R_{ref}}(q)}{||\\mathbf{1}_{R_{ref}}||}$ and then create a simple function approximation to this density:\n", + "\n", + "```python\n", + " q_ref = Q_ref[ref_num, station_nums]\n", + "\n", + " output_probability_set = sfun.regular_partition_uniform_distribution_rectangle_scaled(\\\n", + " output_sample_set, q_ref, rect_scale=0.15,\n", + " cells_per_dimension=np.ones((data.shape[1],)))\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As above, the postprocessing function, `postprocess(station_nums, ref_num)`, will estimate the parameter $\\hat{\\rho}_{\\Lambda, j}$ using the three different methods discussed earlier. The modified `postprocess(station_nums, ref_num)` function is shown in its entirety below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def postprocess(station_nums, ref_num):\n", + " \n", + " filename = 'P_q'+str(station_nums[0]+1)+'_q'+str(station_nums[1]+1)\n", + " if len(station_nums) == 3:\n", + " filename += '_q'+str(station_nums[2]+1)\n", + " filename += '_ref_'+str(ref_num+1)\n", + "\n", + " data = Q[:, station_nums]\n", + " output_sample_set = sample.sample_set(data.shape[1])\n", + " output_sample_set.set_values(data)\n", + " q_ref = Q_ref[ref_num, station_nums]\n", + "\n", + " # Create Simple function approximation\n", + " # Save points used to parition D for simple function approximation and the\n", + " # approximation itself (this can be used to make close comparisions...)\n", + " output_probability_set = sfun.regular_partition_uniform_distribution_rectangle_scaled(\\\n", + " output_sample_set, q_ref, rect_scale=0.15,\n", + " cells_per_dimension=np.ones((data.shape[1],)))\n", + "\n", + " num_l_emulate = 1e4\n", + " set_emulated = bsam.random_sample_set('r', lam_domain, num_l_emulate)\n", + " my_disc = sample.discretization(input_sample_set, output_sample_set,\n", + " output_probability_set, emulated_input_sample_set=set_emulated)\n", + "\n", + " print \"Finished emulating lambda samples\"\n", + "\n", + " # Calculate P on lambda emulate\n", + " print \"Calculating prob_on_emulated_samples\"\n", + " calcP.prob_on_emulated_samples(my_disc)\n", + " sample.save_discretization(my_disc, filename, \"prob_on_emulated_samples_solution\")\n", + "\n", + " # Calclate P on the actual samples with assumption that voronoi cells have\n", + " # equal size\n", + " input_sample_set.estimate_volume_mc()\n", + " print \"Calculating prob\"\n", + " calcP.prob(my_disc)\n", + " sample.save_discretization(my_disc, filename, \"prob_solution\")\n", + "\n", + " # Calculate P on the actual samples estimating voronoi cell volume with MC\n", + " # integration\n", + " calcP.prob_with_emulated_volumes(my_disc)\n", + " print \"Calculating prob_with_emulated_volumes\"\n", + " sample.save_discretization(my_disc, filename, \"prob_with_emulated_volumes_solution\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we calculate $\\hat{\\rho}_{\\Lambda, j}$ for three reference solutions and the QoI $( (q_1,q_2), (q_1, q_5)$, and $(q_1, q_6))$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Post-process and save P and emulated points\n", + "ref_nums = [6, 11, 15] # 7, 12, 16\n", + "stations = [1, 4, 5] # 2, 5, 6\n", + "\n", + "ref_nums, stations = np.meshgrid(ref_nums, stations)\n", + "ref_nums = ref_nums.ravel()\n", + "stations = stations.ravel()\n", + "\n", + "for tnum, stat in zip(ref_nums, stations):\n", + " postprocess([0, stat], tnum)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Estimate the parameter space probabiliy density with a 3D data space\n", + "\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/Q_3D.html))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In these examples the parameter space $\\Lambda \\subset \\mathbb{R}^3$ is 3 dimensional.\n", + "\n", + "This example demostrates how to estimate $\\hat{\\rho}_{\\Lambda, j}$ using `prob()` where\n", + "$$P_\\Lambda \\approx \\sum_{\\mathcal{V}_j \\subset A} \\hat{\\rho}_{\\Lambda, j}.$$\n", + "\n", + "See [Q_3D.py](Q_3D.py) for the example source code. Since example is essentially the same as the previous examples in this notebook for estimating the parameter space probabiliy density with a 1D and 2D data spaces, we will only highlight the differences between the two.\n", + "\n", + ">**Note:** *If the code from the previous example above has already been run, then the majority of environment has already been defined and the following code excerpts can be run as written.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, instead of loading data for a 2-dimensional parameter space we load data for a 3-dimensional data space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import \"Truth\"\n", + "mdat = sio.loadmat('../matfiles/Q_3D')\n", + "Q = mdat['Q']\n", + "Q_ref = mdat['Q_true']\n", + "\n", + "# Import Data\n", + "samples = mdat['points'].transpose()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We define the parameter domain $\\Lambda$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "lam_domain = np.array([[-900, 1200], [0.07, .15], [0.1, 0.2]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the input sample set, here it is 3D rather than 2D:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Create input, output, and discretization from data read from file\n", + "points = mdat['points']\n", + "input_sample_set = sample.sample_set(points.shape[0])\n", + "input_sample_set.set_values(points.transpose())\n", + "input_sample_set.set_domain(lam_domain)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In our postprocessing function, simply change the naming convention for the filename to save $\\hat{\\rho}_{\\Lambda, j}$:\n", + "\n", + "```python\n", + " filename = 'P_q'+str(station_nums[0]+1)+'_q'+str(station_nums[1]+1)\n", + " if len(station_nums) == 3:\n", + " filename += '_q'+str(station_nums[2]+1)\n", + " filename += '_ref_'+str(ref_num+1)\n", + "```\n", + "\n", + "The edited postprocessing function `postprocess(station_nums, ref_num)` is shown in its entirety below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def postprocess(station_nums, ref_num):\n", + " \n", + " filename = 'P_q'+str(station_nums[0]+1)+'_q'+str(station_nums[1]+1)\n", + " if len(station_nums) == 3:\n", + " filename += '_q'+str(station_nums[2]+1)\n", + " filename += '_ref_'+str(ref_num+1)\n", + "\n", + " data = Q[:, station_nums]\n", + " output_sample_set = sample.sample_set(data.shape[1])\n", + " output_sample_set.set_values(data)\n", + " q_ref = Q_ref[ref_num, station_nums]\n", + "\n", + " # Create Simple function approximation\n", + " # Save points used to parition D for simple function approximation and the\n", + " # approximation itself (this can be used to make close comparisions...)\n", + " output_probability_set = sfun.regular_partition_uniform_distribution_rectangle_scaled(\\\n", + " output_sample_set, q_ref, rect_scale=0.15,\n", + " cells_per_dimension=np.ones((data.shape[1],)))\n", + "\n", + " my_disc = sample.discretization(input_sample_set, output_sample_set,\n", + " output_probability_set)\n", + "\n", + " # Calclate P on the actual samples with assumption that voronoi cells have\n", + " # equal size\n", + " input_sample_set.estimate_volume_mc()\n", + " print \"Calculating prob\"\n", + " calcP.prob(my_disc)\n", + " sample.save_discretization(my_disc, filename, \"prob_solution\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example Solutions\n", + "\n", + "Finally, we calculate $\\hat{\\rho}_{\\Lambda, j}$ for the 15th reference solution at:\n", + "\n", + "* $Q = (q_1, q_5, q_2)$, \n", + "* $Q=(q_1, q_5)$, \n", + "* $Q=(q_1, q_5, q_{12})$,\n", + "* $Q=(q_1, q_9, q_7),$ and \n", + "* $Q=(q_1, q_9, q_{12})$.\n", + "\n", + "Try other reference solutions or other points in $Q$.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Post-process and save P and emulated points\n", + "ref_num = 14 # 15th reference solution\n", + "#ref_num = 15 # 16th reference solution\n", + "\n", + "# q1, q5, q2 \n", + "station_nums = [0, 4, 1] # 1, 5, 2\n", + "postprocess(station_nums, ref_num)\n", + "\n", + "\n", + "# q1, q5\n", + "# station_nums = [0, 4] # 1, 5\n", + "# postprocess(station_nums, ref_num)\n", + "\n", + "# q1, q5, q12\n", + "#station_nums = [0, 4, 11] # 1, 5, 12\n", + "#postprocess(station_nums, ref_num)\n", + "\n", + "\n", + "#station_nums = [0, 8, 6] # 1, 9, 7\n", + "#postprocess(station_nums, ref_num)\n", + "\n", + "\n", + "#station_nums = [0, 8, 11] # 1, 9, 12\n", + "#postprocess(station_nums, ref_num)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/measure_methods/linearMap/BET_linearMap_explained.ipynb b/examples/measure_methods/linearMap/BET_linearMap_explained.ipynb new file mode 100644 index 00000000..85f2c6f5 --- /dev/null +++ b/examples/measure_methods/linearMap/BET_linearMap_explained.ipynb @@ -0,0 +1,427 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Linear Map with Uniform Sampling\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/linearMapUniformSampling.html#linearmap))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will walk through the following [example](BET_linearMap.py). This example solves a stochastic inverse problem for a linear 3-to-2 map. We refer to the map as the QoI map, or just a QoI. We refer to the range of the QoI map as the data space. The 3-D input space is discretized with i.i.d. uniform random samples or a regular grid of samples. We refer to the input space as the parameter space, and use parameter to refer to a particular point (e.g., a particular random sample) in this space. A reference parameter is used to define a reference QoI datum and a uniform probability measure is defined on a small box centered at this datum. The measure on the data space is discretized either randomly or deterministically, and this discretized measure is then inverted by BET to determine a probability measure on the parameter space whose support contains the measurable sets of probable parameters. We often use emulation to estimate the measures of sets when random sampling is used. 1D and 2D marginals are calculated, smoothed, and plotted.\n", + "\n", + "The actual process is quite simple requiring a total of 5 steps to solve the stochastic inverse problem with BET excluding any post-processing the user may want. In general the user will probably not write code with various options as was done here for pedagogical purposes. We break down the actual example included with BET step-by-step below, but first, to showcase the overall simplicitly, we show the “entire” code (omitting setting the environment, post-processing, and commenting) required for solving the stochastic inverse problem using some default options:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python \n", + "sampler = bsam.sampler(my_model)\n", + "\n", + "input_samples = samp.sample_set(3)\n", + "input_samples.set_domain(np.repeat([[0.0, 1.0]], 3, axis=0))\n", + "input_samples = sampler.regular_sample_set(input_samples, num_samples_per_dim=[15, 15, 10])\n", + "input_samples.estimate_volume_mc()\n", + "\n", + "my_discretization = sampler.compute_QoI_and_create_discretization(input_samples)\n", + "\n", + "param_ref = np.array([0.5, 0.5, 0.5])\n", + "Q_ref = my_model(param_ref)\n", + "simpleFunP.regular_partition_uniform_distribution_rectangle_scaled(\n", + " data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25,\n", + " cells_per_dimension = 3)\n", + "\n", + "calculateP.prob(my_discretization)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (0): Setting up the Environment\n", + "Import the necessary modules:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import bet.calculateP.simpleFunP as simpleFunP\n", + "import bet.calculateP.calculateP as calculateP\n", + "import bet.postProcess.plotP as plotP\n", + "import bet.postProcess.plotDomains as plotD\n", + "import bet.sample as samp\n", + "import bet.sampling.basicSampling as bsam" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "## Step (1): Define interface to the model\n", + "\n", + "Import the Python script interface to the (simple Python) [model](myModel.py) that takes as input a numpy array of model input parameter samples, generated from the sampler (see below), evaluates the model to generate QoI samples, and returns the QoI samples:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from myModel import my_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the sampler that will be used to create the discretization object, which is the fundamental object used by BET to compute solutions to the stochastic inverse problem. The sampler and my_model is the interface of BET to the model, and it allows BET to create input/output samples of the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sampler = bsam.sampler(my_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (2): Describe and sample the input space\n", + "Initialize the (3-dimensional) input parameter sample set object and set the parameter domain to be a unit-cube:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "input_samples = samp.sample_set(3)\n", + "input_samples.set_domain(np.repeat([[0.0, 1.0]], 3, axis=0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (1):\n", + "Try with and without random sampling.\n", + "\n", + "* If using random sampling, try `num_samples = 1E3` and `num_samples = 1E4`. \n", + "* See what happens when `num_samples = 1E2`. \n", + "* Try using `'lhs'` instead of `'random'` in the `random_sample_set`.\n", + "\n", + "If using regular sampling, try different numbers of samples per dimension:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "randomSampling = False\n", + "if randomSampling is True:\n", + " input_samples = sampler.random_sample_set('random', input_samples, num_samples=1E3)\n", + "else:\n", + " input_samples = sampler.regular_sample_set(input_samples, num_samples_per_dim=[15, 15, 10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (2):\n", + "\n", + "A standard Monte Carlo (MC) assumption is that every Voronoi cell has the same volume. If a regular grid of samples was used, then the standard MC assumption is true.\n", + "\n", + "See what happens if the MC assumption is not assumed to be true, and if different numbers of points are used to estimate the volumes of the Voronoi cells:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "MC_assumption = True\n", + "if MC_assumption is False:\n", + " input_samples.estimate_volume(n_mc_points=1E5)\n", + "else:\n", + " input_samples.estimate_volume_mc()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (3): Generate QoI samples\n", + "Create the discretization object holding all the input (parameter) samples and output (QoI) samples using the sampler:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "my_discretization = sampler.compute_QoI_and_create_discretization(input_samples,\n", + " savefile = '3to2_discretization.txt.gz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point, all of the model information has been extracted for BET (with the possibly exception of evaluating the model to generate a reference QoI datum or a distribution of the QoI), so the model is no longer required for evaluation. The user could do Steps (0)-(3) in a separate script, and then simply load the discretization object as part of a separate BET script that does the remaining steps. When the model is expensive to evaluate, this is an attractive option since we can now solve the stochastic inverse problem (with many different distributions defined on the data space) without ever having to re-solve the model (so long as we are happy with the resolution provided by the current discretization of the parameter and data spaces)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (4): Describe the data distribution\n", + "This problem is nominally a “parameter identification under uncertainty” problem. Thus, we take a reference QoI datum (from one more model solve), and define a distribution “around” this datum." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (3):\n", + "Try different reference parameters that produce different reference QoI data.:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "param_ref = np.array([0.5, 0.5, 0.5])\n", + "#param_ref = np.array([0.75, 0.75, 0.5])\n", + "#param_ref = np.array([0.75, 0.75, 0.75])\n", + "#param_ref = np.array([0.5, 0.5, 0.75])\n", + "\n", + "Q_ref = my_model(param_ref)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the reference samples and discretization to generate plots (this is completely optional):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "plotD.scatter_2D_multi(input_samples, ref_sample= param_ref, showdim = 'all',\n", + " filename = 'linearMap_ParameterSamples',\n", + " file_extension = '.svg')\n", + "plotD.show_data_domain_2D(my_discretization, Q_ref = Q_ref, file_extension='.svg')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from IPython.display import SVG\n", + "SVG(\"q1_q2_domain_Q_cs.svg\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Further figures can be found in the generated figure file folder, [figs/](figs/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (4):\n", + "\n", + "Try different ways of discretizing the probability measure on the data space defined as a uniform probability measure on a rectangle centered at Q_ref whose size is determined by scaling the circumscribing box of the data space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "randomDataDiscretization = False\n", + "if randomDataDiscretization is False:\n", + " simpleFunP.regular_partition_uniform_distribution_rectangle_scaled(\n", + " data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25,\n", + " cells_per_dimension = 3)\n", + "else:\n", + " simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled(\n", + " data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25,\n", + " M=50, num_d_emulate=1E5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (5): Solve the stochastic inverse problem\n", + "\n", + "Calculate probablities on the parameter space (which are stored within the discretization object):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "calculateP.prob(my_discretization)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (6) [Optional]: Post-processing\n", + "\n", + "There are ways to determine “optimal” smoothing parameters (e.g., see CV, GCV, and other similar methods), but we have not incorporated these into the code as lower-dimensional marginal plots generally have limited value in understanding the structure of a high dimensional non-parametric probability measure.\n", + "\n", + "The user may want to change nbins or sigma in the plotP.* inputs (which influences the kernel density estimation with smaller values of sigma implying a density estimate that looks more like a histogram and larger values smoothing out the values more).\n", + "\n", + "In general, the user will have to tune these for any given problem especially when looking at marginals of higher-dimensional problems with parameter ranges that have disparate scales (assuming the parameters were not first normalized as part of a “un-dimensionalization” of the space, which is highly encouraged):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "(bins, marginals2D) = plotP.calculate_2D_marginal_probs(input_samples,\n", + " nbins = [10, 10, 10])\n", + "\n", + "marginals2D = plotP.smooth_marginals_2D(marginals2D, bins, sigma=0.2)\n", + "\n", + "plotP.plot_2D_marginal_probs(marginals2D, bins, input_samples, filename = \"linearMap\",\n", + " lam_ref=param_ref, file_extension = \".svg\", plot_surface=False)\n", + "\n", + "(bins, marginals1D) = plotP.calculate_1D_marginal_probs(input_samples,\n", + " nbins = [10, 10, 10])\n", + "marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=0.2)\n", + "\n", + "plotP.plot_1D_marginal_probs(marginals1D, bins, input_samples, filename = \"linearMap\",\n", + " lam_ref=param_ref, file_extension = \".svg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# hack to refresh html after changes within notebook\n", + "import random\n", + "__counter__ = random.randint(0,2e9)\n", + "\n", + "# displays 1D marginal probabilities\n", + "from IPython.display import HTML, display\n", + "display(HTML(\"\"+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"
1D Marginals
\" ))\n", + "\n", + "# displays 2D marginal probabilities\n", + "display(HTML(\"\"+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"
2D Marginals
\" ))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/measure_methods/nonlinearMap/NonLinearMapExample.ipynb b/examples/measure_methods/nonlinearMap/NonLinearMapExample.ipynb new file mode 100644 index 00000000..d810ad3d --- /dev/null +++ b/examples/measure_methods/nonlinearMap/NonLinearMapExample.ipynb @@ -0,0 +1,558 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Nonlinear Map with Uniform Sampling\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/nonlinearMapUniformSampling.html#nonlinearmap))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will walk through the following [example](nonlinearMapUniformSampling.py). This example generates uniform samples in a 2D input parameter space and evaluates a nonlinear map to either a 1D or 2D output data space. The purpose of this example is to study the effect of using different maps to solve the stochastic inverse problem. The nonlinear map is defined as either one or two quantities of interest (QoI) which are defined as spatial observations of the solution to the elliptic PDE\n", + "\n", + "\\begin{cases} -\\nabla \\cdot (A(\\lambda)\\cdot\\nabla u) &= f(x,y;\\lambda), \\ (x,y)\\in\\Omega, \\\\ u|_{\\partial \\Omega} &= 0, \\end{cases}\n", + "\n", + "where\n", + "\n", + "\\begin{split}A(\\lambda)=\\left(\\begin{array}{cc} 1/\\lambda_1^2 & 0 \\\\ 0 & 1/\\lambda_2^2 \\end{array}\\right),\\end{split}\n", + "\n", + "$$ f(x,y;\\lambda) = \\pi^2 \\sin(\\pi x\\lambda_1)\\sin(\\pi y \\lambda_2), $$\n", + "\n", + "and\n", + "\n", + "$$ \\Omega=[0,1]\\times[0,1]. $$\n", + "\n", + "The user can change the number and placement of the QoI by editing this information in [myModel](myModel.py). A reference parameter is used to define a reference QoI datum and a uniform probability measure is defined on a small box centered at this datum. The measure on the data space is discretized either randomly or deterministically, and this discretized measure is then inverted by BET to determine a probability measure on the parameter space whose support contains the measurable sets of probable parameters. We often use emulation to estimate the measures of sets when random sampling is used. 1D and 2D marginals are calculated, smoothed, and plotted.\n", + "\n", + "The actual process is quite simple requiring a total of 5 steps to solve the stochastic inverse problem with BET excluding any post-processing the user may want. In general the user will probably not write code with various options as was done here for pedagogical purposes. We break down the actual example included with BET step-by-step below, but first, to showcase the overall simplicitly, we show the “entire” code (omitting setting the environment, post-processing, and commenting) required for solving the stochastic inverse problem using some default options:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "\n", + "sampler = bsam.sampler(my_model)\n", + "\n", + "input_samples = samp.sample_set(2)\n", + "input_samples.set_domain(np.array([[3.0, 6.0],\n", + " [1.0, 5.0]]))\n", + "input_samples = sampler.regular_sample_set(input_samples, num_samples_per_dim=[50, 50])\n", + "input_samples.estimate_volume_mc()\n", + "\n", + "my_discretization = sampler.compute_QoI_and_create_discretization(input_samples)\n", + "\n", + "param_ref = np.array([5.5, 4.5])\n", + "Q_ref = my_model(param_ref)\n", + "simpleFunP.regular_partition_uniform_distribution_rectangle_scaled(\n", + " data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25,\n", + " cells_per_dimension = 3)\n", + "\n", + "calculateP.prob(my_discretization)\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (0): Setting up the environment\n", + "Import the necessary modules:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import bet.calculateP.simpleFunP as simpleFunP\n", + "import bet.calculateP.calculateP as calculateP\n", + "import bet.postProcess.plotP as plotP\n", + "import bet.postProcess.plotDomains as plotD\n", + "import bet.sample as samp\n", + "import bet.sampling.basicSampling as bsam" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (1): Define interface to the model\n", + "Import the Python script interface to the Python [model](myModel.py) that takes as input a numpy array of model input parameter samples, generated from the sampler (see below), evaluates the model to generate QoI samples, and returns the QoI samples:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from myModel import my_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the sampler that will be used to create the discretization object, which is the fundamental object used by BET to compute solutions to the stochastic inverse problem. The sampler and my_model is the interface of BET to the model, and it allows BET to create input/output samples of the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sampler = bsam.sampler(my_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (1):\n", + "Explore changes to the quantity of interest (QoI) by modifying the [model file](myModel.py). These edits to the file myModel.py can be made here in this Jupyter Notebook. \n", + "\n", + "First, load the file myModel.py by typing the magic command `%load myModel.py` at the top of the cell below and then running the cell. This will load an exact copy of the file in the cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# %load myModel.py\n", + "# Copyright (C) 2016 The BET Development Team\n", + "\n", + "# -*- coding: utf-8 -*-\n", + "import numpy as np\n", + "import math as m\n", + "\n", + "'''\n", + "Suggested changes for user:\n", + "\n", + "Try setting QoI_num = 2.\n", + "\n", + "Play around with the x1, y1, and/or, x2, y2 values to try and\n", + "\"optimize\" the QoI to give the highest probability region\n", + "on the reference parameter above.\n", + "\n", + "Hint: Try using QoI_num = 1 and systematically varying the\n", + "x1 and y1 values to find QoI with contour structures (as inferred\n", + "through the 2D marginal plots) that are nearly orthogonal.\n", + "\n", + "Some interesting pairs of QoI to compare are:\n", + "(x1,y1)=(0.5,0.5) and (x2,y2)=(0.25,0.25)\n", + "(x1,y1)=(0.5,0.5) and (x2,y2)=(0.15,0.15)\n", + "(x1,y1)=(0.5,0.5) and (x2,y2)=(0.25,0.15)\n", + "'''\n", + "# Choose the number of QoI\n", + "QoI_num = 1\n", + "\n", + "# Specify the spatial points to take measurements of solution defining the QoI\n", + "if QoI_num == 1:\n", + " x1 = 0.5\n", + " y1 = 0.5\n", + " x = np.array([x1])\n", + " y = np.array([y1])\n", + "else:\n", + " x1 = 0.5\n", + " y1 = 0.15\n", + " x2 = 0.15\n", + " y2 = 0.25\n", + " x = np.array([x1, x2])\n", + " y = np.array([y1, y2])\n", + "\n", + "class QoI_component(object):\n", + " def __init__(self, x, y):\n", + " self.x = x\n", + " self.y = y\n", + " def eval(self, parameter_samples):\n", + " if parameter_samples.shape == (2,):\n", + " lam1 = parameter_samples[0]\n", + " lam2 = parameter_samples[1]\n", + " else:\n", + " lam1 = parameter_samples[:,0]\n", + " lam2 = parameter_samples[:,1]\n", + " z = np.sin(m.pi * self.x * lam1) * np.sin(m.pi * self.y * lam2)\n", + " return z\n", + "\n", + "# Specify the QoI maps\n", + "if QoI_num == 1:\n", + " def QoI_map(parameter_samples):\n", + " Q1 = QoI_component(x[0], y[0])\n", + " return np.array([Q1.eval(parameter_samples)]).transpose()\n", + "else:\n", + " def QoI_map(parameter_samples):\n", + " Q1 = QoI_component(x[0], y[0])\n", + " Q2 = QoI_component(x[1], y[1])\n", + " return np.array([Q1.eval(parameter_samples), Q2.eval(parameter_samples)]).transpose()\n", + "\n", + "# Define a model that is the QoI map\n", + "def my_model(parameter_samples):\n", + " QoI_samples = QoI_map(parameter_samples)\n", + " return QoI_samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In myModel.py file loaded in the cell above find the section of code which handles the QoI (the quantities of interest):\n", + "```python\n", + "# Choose the number of QoI\n", + "QoI_num = 1\n", + "\n", + "# Specify the spatial points to take measurements of solution defining the QoI\n", + "if QoI_num == 1:\n", + " x1 = 0.5\n", + " y1 = 0.5\n", + " x = np.array([x1])\n", + " y = np.array([y1])\n", + "else:\n", + " x1 = 0.5\n", + " y1 = 0.15\n", + " x2 = 0.15\n", + " y2 = 0.25\n", + " x = np.array([x1, x2])\n", + " y = np.array([y1, y2])\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Try different choices of QoI by changing the `x1`, `y1`, and/or, `x2`, `y2` (if you have set `QoI_num = 2`) in order to “optimize” the QoI so that the solution to the stochastic inverse problem is a small region of high probability centered around the reference parameter (see below for more information on choosing the reference parameter in the main script).\n", + "\n", + "It is a good idea to start with `QoI_num = 1` and systematically vary the `x1` and `y1` values to find QoI with contour structures (as inferred through the 2D marginal plots) that are nearly orthogonal.\n", + "\n", + "Some interesting pairs of QoI to compare are:\n", + "\n", + "* `(x1,y1)=(0.5,0.5)` and `(x2,y2)=(0.25,0.25)`\n", + "* `(x1,y1)=(0.5,0.5)` and `(x2,y2)=(0.15,0.15)`\n", + "* `(x1,y1)=(0.5,0.5)` and `(x2,y2)=(0.25,0.15)`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When you have made the appropriate edits to myModel.py, save the file by deleting the first line in the cell (`# %load myModel.py`) and then typing and running the magic command `%%writefile myModel.py`. This will write the edits you have made to the myModel.py file.\n", + "\n", + "*Note: the `%%writefile myModel.py` magic command must be at the top of the cell to write the file properly. To reload the file, just delete the writefile magic command and type the magic command %load myModel.py to the top of the cell.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (2): Describe and sample the input space\n", + "Initialize the (2-dimensional) input parameter sample set object and set the parameter domain (chosen here so that the QoI maps have interesting features):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "input_samples = samp.sample_set(2)\n", + "input_samples.set_domain(np.array([[3.0, 6.0],\n", + " [1.0, 5.0]]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (2):\n", + "Try with and without random sampling.\n", + "\n", + "If using **random** sampling:\n", + "\n", + "* Try `num_samples = 1E3` and `num_samples = 1E4` \n", + "* See what happens when `num_samples = 1E2`\n", + "* Try using `'lhs'` instead of `'random'` in the `random_sample_set`\n", + "\n", + "If using **regular** sampling, try different numbers of samples per dimension:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "randomSampling = False\n", + "if randomSampling is True:\n", + " input_samples = sampler.random_sample_set('random', input_samples, num_samples=1E4)\n", + "else:\n", + " input_samples = sampler.regular_sample_set(input_samples, num_samples_per_dim=[50, 50])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (3):\n", + "A standard Monte Carlo (MC) assumption is that every Voronoi cell has the same volume. If a regular grid of samples was used, then the standard MC assumption is true.\n", + "\n", + "See what happens if the MC assumption is not assumed to be true, and if different numbers of points are used to estimate the volumes of the Voronoi cells:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "MC_assumption = True\n", + "if MC_assumption is False:\n", + " input_samples.estimate_volume(n_mc_points=1E5)\n", + "else:\n", + " input_samples.estimate_volume_mc()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (3): Generate QoI samples\n", + "Create the discretization object holding all the input (parameter) samples and output (QoI) samples using the sampler:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "my_discretization = sampler.compute_QoI_and_create_discretization(input_samples,\n", + " savefile = 'NonlinearExample.txt.gz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point, all of the model information has been extracted for BET (with the possibly exception of evaluating the model to generate a reference QoI datum or a distribution of the QoI), so the model is no longer required for evaluation. The user could do Steps (0)-(3) in a separate script, and then simply load the discretization object as part of a separate BET script that does the remaining steps. When the model is expensive to evaluate, this is an attractive option since we can now solve the stochastic inverse problem (with many different distributions defined on the data space) without ever having to re-solve the model (so long as we are happy with the resolution provided by the current discretization of the parameter and data spaces)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (4): Describe the data distribution\n", + "This problem is nominally a “parameter identification under uncertainty” problem. Thus, we take a reference QoI datum (from one more model solve), and define a distribution “around” this datum." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (4):\n", + "Try different reference parameters that produce different reference QoI data.:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "param_ref = np.array([5.5, 4.5])\n", + "Q_ref = my_model(param_ref)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the reference samples and discretization to generate plots (this is completely optional):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "plotD.scatter_2D(input_samples, ref_sample = param_ref,\n", + " filename = 'nonlinearMapParameterSamples.svg')\n", + "if Q_ref.size == 2:\n", + " plotD.show_data_domain_2D(my_discretization, Q_ref = Q_ref, file_extension=\"svg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import SVG\n", + "SVG(\"nonlinearMapParameterSamples.svg\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (5):\n", + "Try different ways of discretizing the probability measure on the data space defined as a uniform probability measure on a rectangle centered at Q_ref whose size is determined by scaling the circumscribing box of the data space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "randomDataDiscretization = False\n", + "if randomDataDiscretization is False:\n", + " simpleFunP.regular_partition_uniform_distribution_rectangle_scaled(\n", + " data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25,\n", + " cells_per_dimension = 3)\n", + "else:\n", + " simpleFunP.uniform_partition_uniform_distribution_rectangle_scaled(\n", + " data_set=my_discretization, Q_ref=Q_ref, rect_scale=0.25,\n", + " M=50, num_d_emulate=1E5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (5): Solve the stochastic inverse problem\n", + "Calculate probablities on the parameter space (which are stored within the discretization object):\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "calculateP.prob(my_discretization)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (6) [Optional]: Post-processing\n", + "There are ways to determine “optimal” smoothing parameters (e.g., see CV, GCV, and other similar methods), but we have not incorporated these into the code as lower-dimensional marginal plots generally have limited value in understanding the structure of a high dimensional non-parametric probability measure.\n", + "\n", + "The user may want to change `nbins` or `sigma` in the `plotP.*` inputs (which influences the kernel density estimation with smaller values of `sigma` implying a density estimate that looks more like a histogram and larger values smoothing out the values more).\n", + "\n", + "In general, the user will have to tune these for any given problem especially when looking at marginals of higher-dimensional problems with parameter ranges that have disparate scales (assuming the parameters were not first normalized as part of a “un-dimensionalization” of the space, which is highly encouraged):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "(bins, marginals2D) = plotP.calculate_2D_marginal_probs(input_samples,\n", + " nbins = [20, 20])\n", + "\n", + "marginals2D = plotP.smooth_marginals_2D(marginals2D, bins, sigma=0.5)\n", + "\n", + "plotP.plot_2D_marginal_probs(marginals2D, bins, input_samples, filename = \"nonlinearMap\",\n", + " lam_ref = param_ref, file_extension = \".svg\", plot_surface=False)\n", + "\n", + "(bins, marginals1D) = plotP.calculate_1D_marginal_probs(input_samples,\n", + " nbins = [20, 20])\n", + "\n", + "marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=0.5)\n", + "\n", + "plotP.plot_1D_marginal_probs(marginals1D, bins, input_samples, filename = \"nonlinearMap\",\n", + " lam_ref = param_ref, file_extension = \".svg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hack to refresh html after changes within notebook\n", + "import random\n", + "__counter__ = random.randint(0,2e9)\n", + "\n", + "# displays 1D marginal probabilities\n", + "from IPython.display import HTML, display\n", + "display(HTML(\"\"+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"
1D Marginals
2D Marginals
\" ))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/measure_methods/validationExample/ValidationExample.ipynb b/examples/measure_methods/validationExample/ValidationExample.ipynb new file mode 100644 index 00000000..091e7994 --- /dev/null +++ b/examples/measure_methods/validationExample/ValidationExample.ipynb @@ -0,0 +1,486 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Linear Map with Uniform Sampling\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/validation.html#validation))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will walk through the following [example](linearMap.py). This 2D linear example shows that geometrically distinct QoI can recreate a probability measure on the input parameter space used to define the output probability measure. The user can explore various discretization effects.\n", + "\n", + "1D and 2D marginals are calculated, smoothed, and plotted. The actual process is quite simple requiring a total of 5 steps to solve the stochastic inverse problem with BET excluding any post-processing the user may want. The most complicated part of this problem is Step (4) defining the user distribution on the data space from propagated samples. In general the user will probably not write code with various options as was done here for pedagogical purposes. \n", + "\n", + "We break down the actual example included with BET step-by-step below, but first, to showcase the overall simplicitly, we show the “entire” code (omitting setting the environment, post-processing, and commenting) required for solving the stochastic inverse problem using some default options:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "sampler = bsam.sampler(my_model)\n", + "\n", + "input_samples = samp.sample_set(2)\n", + "input_samples.set_domain(np.repeat([[0.0, 1.0]], 2, axis=0))\n", + "input_samples = sampler.regular_sample_set(input_samples, num_samples_per_dim=[30, 30])\n", + "input_samples.estimate_volume_mc()\n", + "\n", + "my_discretization = sampler.compute_QoI_and_create_discretization(input_samples)\n", + "\n", + "num_samples_discretize_D = 10\n", + "num_iid_samples = 1E5\n", + "Partition_set = samp.sample_set(2)\n", + "Monte_Carlo_set = samp.sample_set(2)\n", + "Partition_set.set_domain(np.repeat([[0.0, 1.0]], 2, axis=0))\n", + "Monte_Carlo_set.set_domain(np.repeat([[0.0, 1.0]], 2, axis=0))\n", + "Partition_discretization = sampler.create_random_discretization('random',\n", + " Partition_set,\n", + " num_samples=num_samples_discretize_D)\n", + "Monte_Carlo_discretization = sampler.create_random_discretization('random',\n", + " Monte_Carlo_set,\n", + " num_samples=num_iid_samples)\n", + "simpleFunP.user_partition_user_distribution(my_discretization,\n", + " Partition_discretization,\n", + " Monte_Carlo_discretization)\n", + "\n", + "calculateP.prob(my_discretization)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (0): Setting up the environment\n", + "Import the necessary modules:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import bet.calculateP.simpleFunP as simpleFunP\n", + "import bet.calculateP.calculateP as calculateP\n", + "import bet.postProcess.plotP as plotP\n", + "import bet.postProcess.plotDomains as plotD\n", + "import bet.sample as samp\n", + "import bet.sampling.basicSampling as bsam" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (1): Define the interface to the model\n", + "\n", + "Import the Python script interface to the (simple Python) [model](myModel.py) that takes as input a numpy array of model input parameter samples, generated from the sampler (see below), evaluates the model to generate QoI samples, and returns the QoI samples:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from myModel import my_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the sampler that will be used to create the discretization object, which is the fundamental object used by BET to compute solutions to the stochastic inverse problem. The sampler and my_model is the interface of BET to the model, and it allows BET to create input/output samples of the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sampler = bsam.sampler(my_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (2): Describe and sample input space\n", + "Initialize the (2-dimensional) input parameter sample set object and set the parameter domain to be a unit-square:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "input_samples = samp.sample_set(2)\n", + "input_samples.set_domain(np.repeat([[0.0, 1.0]], 2, axis=0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (1):\n", + "Try with and without random sampling.\n", + "\n", + "If using **random** sampling, try:\n", + "* Setting `num_samples = 1E3` and `num_samples = 1E4` \n", + "* See what happens when `num_samples = 1E2`\n", + "* Using `'lhs'` instead of `'random'` in the `random_sample_set`\n", + "\n", + "If using **regular** sampling, try different numbers of samples per dimension." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "randomSampling = False\n", + "if randomSampling is True:\n", + " input_samples = sampler.random_sample_set('random', input_samples, num_samples=1E3)\n", + "else:\n", + " input_samples = sampler.regular_sample_set(input_samples, num_samples_per_dim=[30, 30])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (2):\n", + "A standard Monte Carlo (MC) assumption is that every Voronoi cell has the same volume. If a regular grid of samples was used, then the standard MC assumption is true.\n", + "\n", + "See what happens if the MC assumption is not assumed to be true, and if different numbers of points are used to estimate the volumes of the Voronoi cells:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "MC_assumption = True\n", + "if MC_assumption is False:\n", + " input_samples.estimate_volume(n_mc_points=1E5)\n", + "else:\n", + " input_samples.estimate_volume_mc()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (3): Generate QoI samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the discretization object holding all the input (parameter) samples and output (QoI) samples using the sampler:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "my_discretization = sampler.compute_QoI_and_create_discretization(input_samples,\n", + " savefile = 'Validation_discretization.txt.gz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point, all of the model information has been extracted for BET, so the model is no longer required for evaluation. The user could do Steps (0)-(3) in a separate script, and then simply load the discretization object as part of a separate BET script that does the remaining steps. When the model is expensive to evaluate, this is an attractive option since we can now solve the stochastic inverse problem (with many different distributions defined on the data space) without ever having to re-solve the model (so long as we are happy with the resolution provided by the current discretization of the parameter and data spaces)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (4): Describe the data distribution" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This problem is nominally a “parameter distribution estimation” problem and not a “parameter identification under uncertainty” problem (e.g., see [Example: Linear Map with Uniform Sampling](../linearMap/LinearMapExample.ipynb) or almost any of the other examples). Thus, unlike most other examples, the distribution on data space is not coming from uncertain data but rather variable input parameters that vary according to a fixed distribution. The goal is to determine this distribution by inverting the observed distribution on the data space (via discretizing the data space and binning samples)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Suggested changes for user exploration (3):\n", + "Compute the output distribution simple function approximation by propagating a different set of samples to implicitly define a Voronoi discretization of the data space, corresponding to an implicitly defined set of contour events defining a discretization of the input parameter space. The probabilities of the Voronoi cells in the data space (and thus the probabilities of the corresponding contour events in the input parameter space) are determined by Monte Carlo sampling using a set of i.i.d. uniform samples to bin into these cells." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* See the effect of using different values for `num_samples_discretize_D`. \n", + "\n", + "* Choosing `num_samples_discretize_D = 1` produces exactly the right answer and is equivalent to assigning a uniform probability to each data sample above (why?). \n", + "\n", + "* Try setting this to 2, 5, 10, 50, and 100. Can you explain what you are seeing? \n", + "\n", + "* To see an exaggerated effect, try using random sampling above with `n_samples` set to `1E2`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_samples_discretize_D = 1\n", + "num_iid_samples = 1E5\n", + "\n", + "Partition_set = samp.sample_set(2)\n", + "Monte_Carlo_set = samp.sample_set(2)\n", + "\n", + "Partition_set.set_domain(np.repeat([[0.0, 1.0]], 2, axis=0))\n", + "Monte_Carlo_set.set_domain(np.repeat([[0.0, 1.0]], 2, axis=0))\n", + "\n", + "Partition_discretization = sampler.create_random_discretization('random',\n", + " Partition_set,\n", + " num_samples=num_samples_discretize_D)\n", + "\n", + "Monte_Carlo_discretization = sampler.create_random_discretization('random',\n", + " Monte_Carlo_set,\n", + " num_samples=num_iid_samples)\n", + "\n", + "simpleFunP.user_partition_user_distribution(my_discretization,\n", + " Partition_discretization,\n", + " Monte_Carlo_discretization)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (5): Solve the stochastic inverse problem\n", + "Calculate probablities on the parameter space (which are stored within the discretization object):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "calculateP.prob(my_discretization)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (6) [Optional]: Post-processing\n", + "Show some plots of the different sample sets:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "plotD.scatter_2D(my_discretization._input_sample_set, filename = 'Parameter_Samples.svg')\n", + "plotD.scatter_2D(my_discretization._output_sample_set, filename = 'QoI_Samples.svg')\n", + "plotD.scatter_2D(my_discretization._output_probability_set, filename = 'Data_Space_Discretization.svg')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hack to refresh html after changes within notebook\n", + "import random\n", + "__counter__ = random.randint(0,2e9)\n", + "\n", + "# displays 1D marginal probabilities\n", + "from IPython.display import HTML, display\n", + "display(HTML(\"\"+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"+\n", + " \"
Parameter Samples
QoI Samples
Data Space Discretization
\" ))\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are ways to determine “optimal” smoothing parameters (e.g., see CV, GCV, and other similar methods), but we have not incorporated these into the code as lower-dimensional marginal plots generally have limited value in understanding the structure of a high dimensional non-parametric probability measure.\n", + "\n", + "The user may want to change `nbins` or `sigma` in the `plotP.*` inputs (which influences the kernel density estimation with smaller values of `sigma` implying a density estimate that looks more like a histogram and larger values smoothing out the values more).\n", + "\n", + "In general, the user will have to tune these for any given problem especially when looking at marginals of higher-dimensional problems with parameter ranges that have disparate scales (assuming the parameters were not first normalized as part of a “un-dimensionalization” of the space, which is highly encouraged):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# calculate 2d marginal probs\n", + "(bins, marginals2D) = plotP.calculate_2D_marginal_probs(input_samples,\n", + " nbins = [30, 30])\n", + "\n", + "# plot 2d marginals probs\n", + "plotP.plot_2D_marginal_probs(marginals2D, bins, input_samples, filename = \"validation_raw\",\n", + " file_extension = \".svg\", plot_surface=False)\n", + "\n", + "# smooth 2d marginals probs (optional)\n", + "marginals2D = plotP.smooth_marginals_2D(marginals2D, bins, sigma=0.1)\n", + "\n", + "# plot 2d marginals probs\n", + "plotP.plot_2D_marginal_probs(marginals2D, bins, input_samples, filename = \"validation_smooth\",\n", + " file_extension = \".svg\", plot_surface=False)\n", + "\n", + "# calculate 1d marginal probs\n", + "(bins, marginals1D) = plotP.calculate_1D_marginal_probs(input_samples,\n", + " nbins = [30, 30])\n", + "\n", + "# plot 1d marginal probs\n", + "plotP.plot_1D_marginal_probs(marginals1D, bins, input_samples, filename = \"validation_raw\",\n", + " file_extension = \".svg\")\n", + "\n", + "# smooth 1d marginal probs (optional)\n", + "marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=0.1)\n", + "\n", + "# plot 1d marginal probs\n", + "plotP.plot_1D_marginal_probs(marginals1D, bins, input_samples, filename = \"validation_smooth\",\n", + " file_extension = \".svg\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1D Marginal Probabilities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hack to refresh html after changes within notebook\n", + "import random\n", + "__counter__ = random.randint(0,2e9)\n", + "\n", + "# displays 1D marginal probabilities\n", + "from IPython.display import HTML, display\n", + "display(HTML(\"\"+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"
Raw
Raw
Smooth
Smooth
\" ))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2D Marginial Probabilities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hack to refresh html after changes within notebook\n", + "import random\n", + "__counter__ = random.randint(0,2e9)\n", + "\n", + "# displays 1D marginal probabilities\n", + "from IPython.display import HTML, display\n", + "display(HTML(\"\"+\n", + " \"\"% __counter__+\n", + " \"\"% __counter__+\n", + " \"
Raw
Smooth
\" ))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/sensitivity/heatplate/OptimizeTempatureMeasurements.ipynb b/examples/sensitivity/heatplate/OptimizeTempatureMeasurements.ipynb new file mode 100644 index 00000000..5fa659cc --- /dev/null +++ b/examples/sensitivity/heatplate/OptimizeTempatureMeasurements.ipynb @@ -0,0 +1,278 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Optimizing space-time measurements of temperature on a thin plate\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/chooseOptQoIs_2d.html#chooseqois))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Consider a thin 2-dimensional (square) metal plate constructed by welding together two rectangular metal plates of similar alloy types together. The alloy types differ due to variations in the manufacturing of the rectangular plates, so the thermal diffusivity is different on the left and right sides of the resulting square plate. We want to quantify uncertainties in these thermal diffusivities using the heat equation to model experiments where the square plates are subject to an external, localized, source at the center of the plate. Assuming we have exactly two contact thermometers with which to record exactly two temperature measurements during the experiment, the question is the following: what are the optimal placements of these thermometers in space-time?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See the manuscript [Experimental Design : Optimizing Quantities of Interest to Reliably Reduce the Uncertainty in Model Input Parameters](http://arxiv.org/abs/1601.06702) for details involving this problem and the simulations used to generate the data in Section 5.1. Here, we take the simulated data from the model problem saved as *.mat files that contains parameter samples chosen in clusters around 16 random points in the input space, and the corresponding QoIs (data) from the points in space shown in Figure 6 of the manuscript at the 50 time steps of the simulation (resulting in 1000 total different possible QoIs to choose from in space-time). We use the clusters of samples to compute gradients of the QoI using either radial basis function, forward, or centered finite difference schemes. These gradients are used to compute the average skewness in the possible 2D maps. We then choose the optimal set of 2 QoIs to use in the inverse problem by minimizing average skewness." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We do not solve the stochastic inverse problem in this example. We simply show the process of choosing the optimal QoI according to the criteria of minimizing the average skewness property (to see more about skewness, the interested reader should refer to [Definition and solution of a stochastic inverse problem for the Manning’s n parameter field in hydrodynamic models](http://dx.doi.org/10.1016/j.advwatres.2015.01.011) where the concept was initially introduced)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This [example](chooseOptQoIs_2d.py) takes in samples, specifically chosen in clusters around 16 random points in Lambda, and corresponding QoIs (data) from a simulation modeling the variations in temperature of a thin plate forced by a localized source. It then calculates the gradients using a Radial Basis Function (or Forward Finite Difference or Centered Finite Difference) scheme and uses the gradient information to choose the optimal set of 2 QoIs to use in the inverse problem. This optimality is with respect to the skewness of the gradient vectors." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (0): Setting up the environment\n", + "Import the necessary modules:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import scipy.io as sio\n", + "import bet.sensitivity.gradients as grad\n", + "import bet.sensitivity.chooseQoIs as cqoi\n", + "import bet.Comm as comm\n", + "import bet.sample as sample" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (0*): Understanding your data and computing derivatives" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Computing the skewness (or other criteria such as scaling of measures of inverse sets described in [Experimental Design : Optimizing Quantities of Interest to Reliably Reduce the Uncertainty in Model Input Parameters](http://arxiv.org/abs/1601.06702)) requires a sensitivity analysis. If the code used to generate possible QoI data does not also produce derivatives with respect to model parameters (e.g., by using adjoints), then we can use several different types of finite differencing. Assuming the user wants to work strictly with random sampling (to possibly re-use samples in the parameter space for solving the resulting stochastic inverse problem), then we can compute derivatives using a radial basis function finite difference scheme. Otherwise, we can use typical finite differencing schemes (forward or centered) on regular grids of parameter samples. Understanding the code, derivative capabilities, and/or the types of sampling in the parameter space is crucial to setting up how the gradients of QoI are computed/loaded into this code. We provide data files for different types of sampling in the parameter space in `BET/examples/`:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Select the type of finite difference scheme as either RBF, FFD, or CFD\n", + "fd_scheme = 'RBF'\n", + "\n", + "# Import the data from the FEniCS simulation (RBF or FFD or CFD clusters)\n", + "if fd_scheme.upper() in ['RBF', 'FFD', 'CFD']:\n", + " file_name = 'heatplate_2d_16clusters' + fd_scheme.upper() + '_1000qoi.mat'\n", + " matfile = sio.loadmat(file_name)\n", + "else:\n", + " print('no data files for selected finite difference scheme')\n", + " exit()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (1): Define the space of possible QoI maps\n", + "\n", + "In Figure 6 of the manuscript at [Experimental Design : Optimizing Quantities of Interest to Reliably Reduce the Uncertainty in Model Input Parameters](http://arxiv.org/abs/1601.06702), we see that there are 20 spatial points considered and 50 time steps for a total of 1000 different QoI. Since we assume we can only choose 2 of the possible QoI to define a particular QoI map, then we can define a space $\\mathcal{Q}$ of possible QoI maps by this set of 1000 choose 2 possible combinations of measurements.\n", + "\n", + "However, we can define a $\\mathcal{Q}$ of smaller cardinality by restricting the possible maps subject to certain considerations. The QoI are indexed so that the QoI corresponding to indices\n", + "\n", + " (i-1)*20 to i*20\n", + "\n", + "for i between 1 and 50 corresponds to the 20 labeled QoI from Figure 6 at time step i.\n", + "\n", + "Using this information, we can check QoI either across the entire range of all space-time locations (`indexstart = 0`, `indexstop = 1000`), or, we can check the QoI at a particular time (e.g., setting `indexstart=0` and `indexstop = 20` considers all the spatial QoI only at the first time step).\n", + "\n", + "In general, `indexstart` can be any integer between 0 and 998 and `indexstop` must be at least 2 greater than `indexstart` (so between 2 and 1000 subject to the additional constraint that `indexstop` $\\geq$ `indexstart + 2` to ensure that we check at least a single pair of QoI.):" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "indexstart = 0\n", + "indexstop = 20\n", + "qoiIndices = range(indexstart, indexstop)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (2): Create the discretization object from the input and output samples\n", + "Load the sampled parameter and QoI values:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Initialize the necessary sample objects\n", + "input_samples = sample.sample_set(2)\n", + "output_samples = sample.sample_set(1000)\n", + "\n", + "# Set the input sample values from the imported file\n", + "input_samples.set_values(matfile['samples'])\n", + "\n", + "# Set the data fromthe imported file\n", + "output_samples.set_values(matfile['data'])\n", + "\n", + "# Create the cluster discretization\n", + "cluster_discretization = sample.discretization(input_samples, output_samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (3): Compute the gradients of all the maps in $\\mathcal{Q}$\n", + "Using whichever finite difference scheme we have chosen for our sample set based on Step (0*) above, we now compute the gradients of each component of the QoI maps defining $\\mathcal{Q}$:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Calculate the gradient vectors at each of the 16 centers for each of the\n", + "# QoI maps\n", + "if fd_scheme.upper() in ['RBF']:\n", + " center_discretization = grad.calculate_gradients_rbf(cluster_discretization,\n", + " normalize=False)\n", + "elif fd_scheme.upper() in ['FFD']:\n", + " center_discretization = grad.calculate_gradients_ffd(cluster_discretization)\n", + "else:\n", + " center_discretization = grad.calculate_gradients_cfd(cluster_discretization)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (4): Compute skewness properties of the maps and display information\n", + "We now examine the geometric property of average skewness on the possible QoI maps. The skewness defines an ordering on $\\mathcal{Q}$ that is useful in selecting the optimal QoI map. We can also compute skewness properties for any particular QoI map we want individually. The first step is extracting the subset of samples for which we actually computed derivatives, which are the centers of the clusters determined above:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The average skewness of the QoI map defined by indices 0 and 4 is 1.10345068021\n", + "The 10 smallest condition numbers are in the first column, the corresponding sets of QoIs are in the following columns.\n", + "[[ 1.0000029 18. 19. ]\n", + " [ 1.00000313 17. 19. ]\n", + " [ 1.00000348 16. 18. ]\n", + " [ 1.00000369 16. 17. ]\n", + " [ 1.00000488 8. 18. ]\n", + " [ 1.00000527 8. 17. ]\n", + " [ 1.0000062 12. 19. ]\n", + " [ 1.00000695 12. 16. ]\n", + " [ 1.00000761 8. 12. ]\n", + " [ 1.00000798 9. 18. ]]\n" + ] + } + ], + "source": [ + "input_samples_centers = center_discretization.get_input_sample_set()\n", + "\n", + "# Choose a specific set of QoIs to check the average skewness of\n", + "index1 = 0\n", + "index2 = 4\n", + "(specific_skewness, _) = cqoi.calculate_avg_skewness(input_samples_centers,\n", + " qoi_set=[index1, index2])\n", + "if comm.rank == 0:\n", + " print 'The average skewness of the QoI map defined by indices ' + str(index1) + \\\n", + " ' and ' + str(index2) + ' is ' + str(specific_skewness)\n", + "\n", + "# Compute the skewness for each of the possible QoI maps determined by choosing\n", + "# any two QoI from the set defined by the indices selected by the\n", + "# ``indexstart`` and ``indexend`` values\n", + "skewness_indices_mat = cqoi.chooseOptQoIs(input_samples_centers, qoiIndices,\n", + " num_optsets_return=10, measure=False)\n", + "\n", + "qoi1 = skewness_indices_mat[0, 1]\n", + "qoi2 = skewness_indices_mat[0, 2]\n", + "\n", + "if comm.rank == 0:\n", + " print 'The 10 smallest condition numbers are in the first column, the \\\n", + "corresponding sets of QoIs are in the following columns.'\n", + " print skewness_indices_mat[:10, :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/sensitivity/linear/LinearMapMinimizeScale.ipynb b/examples/sensitivity/linear/LinearMapMinimizeScale.ipynb new file mode 100644 index 00000000..3362566d --- /dev/null +++ b/examples/sensitivity/linear/LinearMapMinimizeScale.ipynb @@ -0,0 +1,380 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example: Linear Map Choose QoIs to minimize scale of inverses in measure\n", + "([From BET Documentation](http://ut-chg.github.io/BET/examples/example_rst_files/linear_measure_binratio.html#linear-sensitivity))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we consider a simple [example](linear_measure_binratio.py) where a parameter space is given by a 5-dimensional hypercube and the goal is to choose an optimal QoI map from a space of possible QoI maps, denoted by $\\mathcal{Q}$, where each QoI map is linear. We use this simple example to demonstrate the use of the code to optimally choose which possible QoI map does the best job of “scaling” inverse sets to smaller sets. The idea is that if we generally consider a set of high probability in a particular data space defined by the range of a QoI map, we would prefer that the inverse of this set is as small as possible in order to try and identify the parameter responsible for the data. This only makes sense for stochastic inverse problems framed within the context of parameter identification under uncertainty. In other words, when the problem is that the data are uncertain due to measurement uncertainty and there is a true/exact parameter responsible for whichever uncertain data is observed, then this is the type of problem for which this optimization criteria is most appropriate.\n", + "\n", + "This example generates uniform random samples in the unit hypercube and corresponding QoIs (data) generated by a linear map Q. We then calculate the gradients using an RBF scheme and use the gradient information to choose the optimal set of 2 (3, 4, ... input_dim) QoIs to use in the inverse problem. This example also demonstrates how we can define $\\mathcal{Q}$ implicitly by creating a \"larger\" vector-valued QoI map in the case where the cardinality is finite and determined by selecting any arbitrary subset of a defined collection of scalar QoI." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (0): Setting up the environment\n", + "Import the necessary modules:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import bet.sensitivity.gradients as grad\n", + "import bet.sensitivity.chooseQoIs as cqoi\n", + "import bet.calculateP.simpleFunP as simpleFunP\n", + "import bet.calculateP.calculateP as calculateP\n", + "import bet.postProcess.postTools as postTools\n", + "import bet.Comm as comm\n", + "import bet.sample as sample" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (1): Define the space of possible QoI maps\n", + "We have a 5 dimensional hypercube for the parameter space and we assume that the space of possible QoI, $\\mathcal{Q}$, is defined by some random linear maps. Specifically, we randomly generate 10 individual scalar linear QoI maps and then consider $\\mathcal{Q}$ to be defined by any 2, 3, ..., `input_dim` dimensional vectors from this set of 10:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Setup the info for the spaces\n", + "input_dim = 5\n", + "QoI_scalar_map_num = 10\n", + "num_samples = 1E5\n", + "num_centers = 10" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, rather than define $\\mathcal{Q}$ explicitly, we consider a larger vector-valued map defined by `Q` that contains all of the scalar QoI maps. We later determine the optimal QoI maps in terms of the components of this larger map `Q`:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Let the map Q be a random matrix of size (QoI_scalar_map_num, input_dim)\n", + "np.random.seed(0)\n", + "Q = np.random.random([QoI_scalar_map_num, input_dim])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (2): Create the discretization object from the input and output samples\n", + "We randomly sample the input space and generate the corresponding QoI values:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\ProgramData\\Anaconda2\\lib\\site-packages\\ipykernel\\__main__.py:6: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n" + ] + } + ], + "source": [ + "# Initialize some sample objects we will need\n", + "input_samples = sample.sample_set(input_dim)\n", + "output_samples = sample.sample_set(QoI_scalar_map_num)\n", + "\n", + "# Choose random samples in parameter space to solve the model\n", + "input_samples.set_values(np.random.uniform(0, 1, [num_samples, input_dim]))\n", + "\n", + "# Compute the output values with the map Q\n", + "output_samples.set_values(Q.dot(input_samples.get_values().transpose()).\\\n", + " transpose())\n", + "\n", + "cluster_discretization = sample.discretization(input_samples, output_samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (3): Compute the gradients of all the maps in $\\mathcal{Q}$\n", + "We calculate the gradients using an RBF-FD scheme. We set `normalize=True` because we assume that the sizes of the inverse sets to be inverted in the data space are scaled relative to the size of the circumsribing box of the data space. Thus, all uncertainties are in a sense relative to the sensitivity of the overall QoI map:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "center_discretization = grad.calculate_gradients_rbf(cluster_discretization,\n", + " num_centers, normalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (4): Compute scaling properties of the maps and choose the best QoI" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With these gradient vectors, we are now ready to choose an optimal set of QoIs to use in the inverse problem, based on minimizing the measures of inverse sets defined by the scaling effect of the map. The most robust method for this is `:meth:~bet.sensitivity.chooseQoIs.chooseOptQoIs_large` which returns the best set of 2, 3, 4 ... until `input_dim`. This method returns a list of matrices. Each matrix has 10 rows, the first column representing the expected scaling of the cross-sections of the contour event associated with inverting a rectangular box, and the rest of the columns the corresponding QoI indices.:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "input_samples_center = center_discretization.get_input_sample_set()\n", + "best_sets = cqoi.chooseOptQoIs_large(input_samples_center, measure=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step (5)[Optional]: Seeing the effect of the optimal QoI choice\n", + "At this point we have determined the optimal set of QoIs to use in the inverse problem. Now we compare the support of the inverse solution using different sets of these QoIs. The user can select which `QoI_indices` to choose from each time:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "QoI_indices = best_sets[0][0, 1:].astype(int) # Chooses the optimal set of 2 QoI\n", + "# QoI_indices = best_sets[0][1,1:].astype(int) # Chooses the second most optimal set of 2 QoI\n", + "# QoI_indices = best_sets[0][9,1:].astype(int) # Chooses the least optimal set of 2 QoI\n", + "# QoI_indices = best_sets[3][0,1:].astype(int) # Chooses the optimal set of 5 QoI\n", + "# QoI_indices = best_sets[3][1,1:].astype(int) # Chooses the second most optimal set of 5 QoI" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Setup the associated set of output samples from the choice of QoI defined by the QoI_indices:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "output_samples._dim = len(QoI_indices)\n", + "output_samples.set_values(output_samples.get_values()[:, QoI_indices])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the reference datum in the output space to correspond to the center of the input space. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "Q_ref = Q[QoI_indices, :].dot(0.5 * np.ones(input_dim))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`rect_scale` defines the amount of uncertainty in the measured datum relative to the scale of a circumscribing box of the data space:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "rect_scale = 0.25" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make the MC assumption and compute the volumes of each Voronoi cell:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "input_samples.estimate_volume_mc()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the discretization object:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "my_discretization = sample.discretization(input_sample_set=input_samples,\n", + " output_sample_set=output_samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compute the simple function approximation and then compute the solution to the stochastic inverse problem:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Find the simple function approximation\n", + "simpleFunP.regular_partition_uniform_distribution_rectangle_scaled(\n", + " data_set=my_discretization, Q_ref=Q_ref, rect_scale=rect_scale,\n", + " cells_per_dimension=1)\n", + "\n", + "# Compute the solution to the stochastic inverse problem\n", + "calculateP.prob(my_discretization)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Determine which samples have non-zero probability, which, due to the MC assumption on the volumes, can be used to approximate the ratio of the measure of the parameter space defined by the support of the probability density function solving the inverse problem:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The approximate percentage of the measure of the parameter space defined\n", + "by the support of the inverse density associated with the choice of QoI map is\n", + "0.31134\n" + ] + } + ], + "source": [ + "(_, _, indices_in_inverse) = \\\n", + " postTools.sample_highest_prob(top_percentile=1.0,\n", + " sample_set=input_samples, sort=True)\n", + "\n", + "# Print the approximate percentage of the measure of the parameter space defined\n", + "# by the support of the inverse density\n", + "if comm.rank == 0:\n", + " print 'The approximate percentage of the measure of the parameter space defined'\n", + " print 'by the support of the inverse density associated with the choice of QoI map is'\n", + " print np.sum(input_samples.get_volumes()[indices_in_inverse])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}