Merge pull request #122 from lcgraham/master

Merge workshop additions
UT-CHG · Jul 4, 2015 · 1a9fe94 · 1a9fe94
2 parents d2c974c + c2d9ffc
commit 1a9fe94
Show file tree

Hide file tree

Showing 20 changed files with 305 additions and 34 deletions.
diff --git a/bet/calculateP/calculateP.py b/bet/calculateP/calculateP.py
@@ -113,7 +113,7 @@ def prob(samples, data, rho_D_M, d_distr_samples, d_Tree=None):
     :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples
     :rtype: tuple of :class:`~numpy.ndarray` of sizes (num_samples,),
         (num_samples,), (ndim, num_l_emulate), (num_samples,), (num_l_emulate,)
-    :returns: (P, lam_vol, lambda_emulate, io_ptr) where P is the
+    :returns: (P, lam_vol, io_ptr) where P is the
         probability associated with samples, and lam_vol the volumes associated
         with the samples, io_ptr a pointer from data to M bins.
 

diff --git a/bet/calculateP/voronoiHistogram.py b/bet/calculateP/voronoiHistogram.py
@@ -48,7 +48,7 @@ def center_and_layer1_points_binsize(center_pts_per_edge, center, r_size,
     rect_domain = np.column_stack([center - .5*rect_width,
         center + .5*rect_width])
     if np.all(np.greater(r_size, rect_width)):
-        msg = "The hyperrectangle defined by this size is larger than the"
+        msg = "The hyperrectangle defined by this size is larger than the "
         msg += "original domain."
         print msg
 

diff --git a/doc/examples/example_rst_files/Q_1D_serial.rst b/doc/examples/example_rst_files/Q_1D_serial.rst
@@ -14,9 +14,8 @@ This example demostrates three different methods to estimate
     P_\Lambda \approx \sum_{\mathcal{V}_j \subset A} \hat{\rho}_{\Lambda, j}.
 
 These methods are distinguished primarily by the way :math:`\mathcal{V}_j` are
-defined and the approximation of the volume of :math:`\mathcal{V}_j`. See
-:download:`Q_1D_serial.py
-<../../../examples/fromFile_ADCIRCMap/Q_1D_serial.py>` for the example source code.
+defined and the approximation of the volume of :math:`\mathcal{V}_j`. See `Q_1D_serial.py
+<https://github.com/UT-CHG/BET/blob/master/examples/fromFile_ADCIRCMap/Q_1D_serial.py>`_ for the example source code.
 
 First, import the necessary packages and modules::
 

diff --git a/doc/examples/example_rst_files/Q_2D.rst b/doc/examples/example_rst_files/Q_2D.rst
@@ -15,8 +15,8 @@ This example demostrates three different methods to estimate
 
 These methods are distinguished primarily by the way :math:`\mathcal{V}_j` are
 defined and the approximation of the volume of :math:`\mathcal{V}_j`. See
-:download:`Q_2D_serial.py
-<../../../examples/fromFile_ADCIRCMap/Q_2D_serial.py>` for the example source code. Since
+`Q_2D_serial.py
+<https://github.com/UT-CHG/BET/blob/master/examples/fromFile_ADCIRCMap/Q_2D_serial.py>`_ for the example source code. Since
 this example is essentially the same as :ref:`q1D` we will only highlight the
 differences between the two.
 

diff --git a/doc/examples/example_rst_files/Q_3D.rst b/doc/examples/example_rst_files/Q_3D.rst
@@ -16,7 +16,7 @@ This example demostrates how to estimate :math:`\hat{\rho}_{\Lambda, j}` using
 
     P_\Lambda \approx \sum_{\mathcal{V}_j \subset A} \hat{\rho}_{\Lambda, j}.
 
-See :download:`Q_3D_serial.py <../../../examples/fromFile_ADCIRCMap/Q_3D_serial.py>`
+See `Q_3D_serial.py <https://github.com/UT-CHG/BET/blob/master/examples/fromFile_ADCIRCMap/Q_3D_serial.py>`_
 for the example source code. Since example is essentially the same as
 :ref:`q2D` we will only highlight the differences between the two.
 
@@ -44,7 +44,7 @@ This example demostrates how to estimate :math:`\hat{\rho}_{\Lambda, j}` using
 
     P_\Lambda \approx \sum_{\mathcal{V}_j \subset A} \hat{\rho}_{\Lambda, j}.
 
-See :download:`Q_3D_parallel.py <../../../examples/fromFile_ADCIRCMap/Q_3D_serial.py>`
+See `Q_3D_parallel.py <../../../examples/fromFile_ADCIRCMap/Q_3D_serial.py>`_
 for the example source code. Since example is essentially the same as
 :ref:`q2D` we will only highlight the differences between the two.
 

diff --git a/doc/examples/example_rst_files/contaminantTransport.rst b/doc/examples/example_rst_files/contaminantTransport.rst
@@ -0,0 +1,163 @@
+.. _contaminantTransport:
+
+
+============================================================================
+Example: Concentration of Contaminant in Wells Based on Transport Parameters
+============================================================================
+
+We will walk through the following `example
+<https://github.com/UT-CHG/BET/tree/master/examples/contaminantTransport>`_. 
+This example takes uniformly distributed samples of parameters and
+output data from a simple groundwater contaminant transport model,
+and calculates solutions to the stochastic inverse problem.
+The parameter domain is 5D, where the uncertain parameters are the x and y 
+locations of the source, the horizontal dispersivity, the flow angle,
+and the contaminant flux. There are 11 measured QoIs in the data space 
+available. By changing the choice of QoIs that we use to solve the stochastic
+inverse problem, we see the effect of geometric distinctness. 
+Probabilities in the parameter space are 
+calculated using the MC assumption.  1D and 2D marginals are calculated,
+smoothed, and plotted.
+
+Import the necessary modules::
+
+    import numpy as np
+    import bet.calculateP as calculateP
+    import bet.postProcess as postProcess
+    import bet.calculateP.simpleFunP as simpleFunP
+    import bet.calculateP.calculateP as calculateP
+    import bet.postProcess.plotP as plotP
+    import bet.postProcess.plotDomains as plotD
+
+
+Import the samples, data, reference data, and reference parameters::
+
+  lam_domain = np.loadtxt("files/lam_domain.txt.gz") #parameter domain
+  ref_lam = np.loadtxt("files/lam_ref.txt.gz") #reference parameter set
+  Q_ref = np.loadtxt("files/Q_ref.txt.gz") #reference QoI set
+  samples = np.loadtxt("files/samples.txt.gz") # uniform samples in parameter domain
+  data = np.loadtxt("files/data.txt.gz") # data from model
+
+Choose which subset of the 11 QoIs to use for inversion::
+
+  QoI_indices=[7,8,9,10] # Indices for output data with which you want to invert
+
+Choose the bin ratio for the uniform output probability::
+
+  bin_ratio = 0.25 #ratio of length of data region to invert
+
+Slice data and form approximate PDF for output for plotting::
+
+  data = data[:,QoI_indices]
+  Q_ref=Q_ref[QoI_indices]
+
+  dmax = data.max(axis=0)
+  dmin = data.min(axis=0)
+  dscale = bin_ratio*(dmax-dmin)
+  Qmax = Q_ref + 0.5*dscale
+  Qmin = Q_ref -0.5*dscale
+  def rho_D(x):
+     return np.all(np.logical_and(np.greater(x,Qmin), np.less(x,Qmax)),axis=1)
+
+Plot 2D projections of the data domain::
+
+  plotD.show_data(data, Q_ref = Q_ref, rho_D=rho_D, showdim=2)
+
+Suggested changes for user (1)
+------------------------------
+
+Change the ``QoI_indices`` and note how it changes the plots of the data
+domain. The white points are ones with zero probability and the dark points
+are those with nonzero probability. 
+
+
+Suggested changes for user (2)
+------------------------------
+
+Try different ways of discretizing the probability measure on
+:math:`\mathcal{D}` defined as a uniform probability measure on a rectangle
+(since :math:`\mathcal{D}` is 2-dimensional).
+
+*   unif_unif creates a uniform measure on a hyperbox with dimensions relative   to the size of the circumscribed hyperbox of the set :math:`\mathcal{D}`  using the bin_ratio. A total of M samples are drawn within a slightly larger  scaled hyperbox to discretize this measure defining M total generalized  contour events in Lambda.  The reason a slightly larger scaled hyperbox is  used to draw the samples to discretize :math:`\mathcal{D}` is because  otherwise every generalized contour event will have non-zero probability  which obviously defeats the purpose of "localizing" the probability within a  subset of :math:`\mathcal{D}`.
+
+*   uniform_hyperrectangle uses the same measure defined in the same way as  unif_unif, but the difference is in the discretization which is on a regular  grid defined by ``center_pts_per_edge``.  If ``center_pts_per_edge = 1``,  then the contour event corresponding to the entire support of  :math:`\rho_\mathcal{D}` is approximated as a single event. This is done by  carefully placing a regular 3x3 grid (since :math:`dim(\mathcal{D})=2` in this  case) of points in :math:`\mathcal{D}` with the center point of the grid in  the center of the support of the measure and the other points placed outside  of the rectangle defining the support to define a total of 9 contour events  with 8 of them having exactly zero probability.
+
+Create a simple function approximation of the probablity measure on
+:math:`\mathcal{D}`::
+
+    deterministic_discretize_D = True
+
+    if deterministic_discretize_D == True:
+      (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.uniform_hyperrectangle(data=data,
+                                                  Q_ref=Q_ref, bin_ratio=0.2, center_pts_per_edge = 1)
+    else:
+      (d_distr_prob, d_distr_samples, d_Tree) = simpleFunP.unif_unif(data=data,
+                                                  Q_ref=Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E5)
+
+  
+Calculate probablities using the MC assumption::
+
+  (P,  lam_vol, io_ptr) = calculateP.prob(samples=samples,
+                                        data=data,
+                                        rho_D_M=d_distr_prob,
+                                        d_distr_samples=d_distr_samples)
+
+                                                                                                                                                  
+Calculate 2D marginal probs  - Suggested changes for user (3)
+-------------------------------------------------------------
+
+At this point, the only thing that should change in the plotP.* inputs
+should be either the nbins values or sigma (which influences the kernel
+density estimation with smaller values implying a density estimate that
+looks more like a histogram and larger values smoothing out the values
+more).
+
+There are ways to determine "optimal" smoothing parameters (e.g., see CV, GCV,
+and other similar methods), but we have not incorporated these into the code
+as lower-dimensional marginal plots have limited value in understanding the
+structure of a high dimensional non-parametric probability measure.
+
+Plot the marginal probabilities::
+
+    (bins, marginals2D) = plotP.calculate_2D_marginal_probs(P_samples = P, samples = samples, lam_domain = lam_domain, nbins = [10, 10, 10])
+
+Smooth 2d marginals probs (optional)::
+
+    marginals2D = plotP.smooth_marginals_2D(marginals2D,bins, sigma=1.0)
+
+Plot 2d marginals probs::
+
+    plotP.plot_2D_marginal_probs(marginals2D, bins, lam_domain, filename = "contaminant_map", interactive=False, lam_ref=ref_lam, lambda_labels=labels)
+
+Calculate 1d marginal probs::
+
+    (bins, marginals1D) = plotP.calculate_1D_marginal_probs(P_samples = P, samples = samples, lam_domain = lam_domain, nbins = [10, 10, 10])
+
+Smooth 1d marginal probs (optional)::
+
+    marginals1D = plotP.smooth_marginals_1D(marginals1D, bins, sigma=1.0)
+
+Plot 1d marginal probs::
+
+    plotP.plot_1D_marginal_probs(marginals1D, bins, lam_domain, filename = "contaminant_map", interactive=False, lam_ref=ref_lam, lambda_labels=labels)
+
+Sort samples by highest probability density and take highest x percent::
+
+  (num_samples, P_high, samples_high, lam_vol_high, data_high)= postTools.sample_highest_prob(top_percentile=percentile, P_samples=P, samples=samples, lam_vol=lam_vol,data = data,sort=True)
+
+Print the number of these samples  and the ratio of the volume they take up::
+
+  print (numsamples, np.sum(lam_vol_high)
+
+
+Suggested changes for user (4):
+-------------------------------
+Notice how the marginal probabilites change with different choices of  ``QoI_indices``.
+Try choosing only 2 or 3, instead of 4, indices and notice the higher-dimensionality of the structure in the 2d marginals. Notice how some QoI concentrate the probability into smaller regions. These QoI are more geometrically distinct. 
+
+Notice that the volume that the high-probability samples take up is smaller with more geometrically distinct QoIs.
+
+Suggested changes for user (5):
+-------------------------------
+Change ``percentile`` to values between 1.0 and 0.0. Notice that while the region of nonzero probabibilty may have a significant volume, much of this volume contains relatively low probability. Change the value to 0.95, 0.9, 0.75, and 0.5 and notice the volume decrease significantly. 
+
diff --git a/doc/examples/example_rst_files/domains2D.rst b/doc/examples/example_rst_files/domains2D.rst
@@ -10,8 +10,8 @@ goal-oriented adaptive sampling algorithm.
 Generating a single set of adaptive samples
 -------------------------------------------
 
-We will walk through the following :download:`example
-<../../../examples/fromFile_ADCIRCMap/plotDomains2D.py>` that visualizes
+We will walk through the following `example
+<https://github.com/UT-CHG/BET/blob/master/examples/fromFile_ADCIRCMap/plotDomains2D.py>`_ that visualizes
 samples on a regular grid for a 2-dimensional parameter space. 
 
 The modules required by this example are::

diff --git a/doc/examples/example_rst_files/domains3D.rst b/doc/examples/example_rst_files/domains3D.rst
@@ -10,8 +10,8 @@ goal-oriented adaptive sampling algorithm.
 Generating a single set of adaptive samples
 -------------------------------------------
 
-We will walk through the following :download:`example
-<../../../examples/fromFile_ADCIRCMap/plotDomains3D.py>` that visualizes
+We will walk through the following `example
+<https://github.com/UT-CHG/BET/blob/master/examples/fromFile_ADCIRCMap/plotDomains3D.py>`_ that visualizes
 samples on a regular grid for a 2-dimensional parameter space. 
 
 The modules required by this example are::

diff --git a/doc/examples/example_rst_files/fromfile2D.rst b/doc/examples/example_rst_files/fromfile2D.rst
@@ -10,8 +10,8 @@ goal-oriented adaptive sampling algorithm.
 Generating a single set of adaptive samples
 -------------------------------------------
 
-We will walk through the following :download:`example
-<../../../examples/fromFile_ADCIRCMap/fromFile2D.py>` that uses a linear interpolant of
+We will walk through the following `example
+<https://github.com/UT-CHG/BET/blob/master/examples/fromFile_ADCIRCMap/fromFile2D.py>`_ that uses a linear interpolant of
 the QoI map :math:`Q(\lambda) = (q_1(\lambda), q_6(\lambda))` for a
 2-dimensional data space. The parameter space in this example is also
 2-dimensional. 
@@ -116,18 +116,17 @@ In some instances the user may want to generate and compare several sets of
 adaptive samples using a surrogate model to determine what the best kernel,
 transition set, number of generalized chains, and chain length are before
 adaptively sampling a more computationally expensive model. See
-:download:`sandbox_test_2D.py <../../../examples/fromFile_ADCIRCMap/sandbox_test_2D.py>`. The set up in
-:download:`sandbox_test_2D.py <../../../examples/fromFile_ADCIRCMap/sandbox_test_2D.py>` is very similar to the
-set up in :download:`fromFile2D <../../../examples/fromFile_ADCIRCMap/fromFile2D.py>` and is
+`sandbox_test_2D.py <https://github.com/UT-CHG/BET/tree/master/examples/fromFile_ADCIRCMap/sandbox_test_2D.py>`_. The set up in
+sandbox_test_2D.py <https://github.com/UT-CHG/BET/tree/master/examples/fromFile_ADCIRCMap/sandbox_test_2D.py>`_ is very similar to the
+set up in `fromFile2D <https://github.com/UT-CHG/BET/tree/master/examples/fromFile_ADCIRCMap/fromFile2D.py>`_ and is
 omitted for brevity.
 
 We can explore several types of kernels::
 
     kernel_mm = asam.maxima_mean_kernel(np.array([Q_ref]), rho_D)
-    kernel_rD = asam.rhoD_kernel(maximum, rho_D)
     kernel_m = asam.maxima_kernel(np.array([Q_ref]), rho_D)
-    kernel_md = asam.multi_dist_kernel()
-    kern_list = [kernel_mm, kernel_rD, kernel_m, kernel_md]
+    kernel_rD = asam.rhoD_kernel(maximum, rho_D)
+    kern_list = [kernel_mm, kernel_rD, kernel_m]
     # Get samples
     # Run with varying kernels
     gen_results = sampler.run_gen(kern_list, rho_D, maximum, param_min,

diff --git a/doc/examples/example_rst_files/fromfile3D.rst b/doc/examples/example_rst_files/fromfile3D.rst
@@ -11,7 +11,7 @@ Generating a single set of adaptive samples
 -------------------------------------------
 
 We will walk through the following :download:`example
-<../../../examples/fromFile_ADCIRCMap/sandbox_test_3D.py>` that uses a linear interpolant
+<https://github.com/UT-CHG/BET/blob/master/examples/fromFile_ADCIRCMap/fromFile3D.py>`_ that uses a linear interpolant
 of the QoI map :math:`Q(\lambda) = (q_1(\lambda), q_5(\lambda), q_2(\lambda))`
 for a 3-dimensional data space. The parameter space in this example is also
 3-dimensional. 
@@ -117,18 +117,17 @@ In some instances the user may want to generate and compare several sets of
 adaptive samples using a surrogate model to determine what the best kernel,
 transition set, number of generalized chains, and chain length are before
 adaptively sampling a more computationally expensive model. See
-:download:`sandbox_test_2D.py <../../../examples/fromFile_ADCIRCMap/sandbox_test_2D.py>`. The set up in
-:download:`sandbox_test_2D.py <../../../examples/fromFile_ADCIRCMap/sandbox_test_2D.py>` is very similar to the
-set up in :download:`fromFile2D <../../../examples/fromFile_ADCIRCMap/fromFile2D.py>` and is
+`sandbox_test_2D.py <https://github.com/UT-CHG/BET/tree/master/examples/fromFile_ADCIRCMap/sandbox_test_2D.py>`_. The set up in
+`sandbox_test_2D.py <https://github.com/UT-CHG/BET/tree/master/examples/fromFile_ADCIRCMap/sandbox_test_2D.py>`_ is very similar to the
+set up in `fromFile2D <https://github.com/UT-CHG/BET/tree/master/examples/fromFile_ADCIRCMap/fromFile2D.py>`_ and is
 omitted for brevity.
 
 We can explore several types of kernels::
 
     kernel_mm = asam.maxima_mean_kernel(np.array([Q_ref]), rho_D)
     kernel_rD = asam.rhoD_kernel(maximum, rho_D)
     kernel_m = asam.maxima_kernel(np.array([Q_ref]), rho_D)
-    kernel_md = asam.multi_dist_kernel()
-    kern_list = [kernel_mm, kernel_rD, kernel_m, kernel_md]
+    kern_list = [kernel_mm, kernel_rD, kernel_m]
     # Get samples
     # Run with varying kernels
     gen_results = sampler.run_gen(kern_list, rho_D, maximum, param_min,

diff --git a/doc/examples/example_rst_files/linearMapUniformSampling.rst b/doc/examples/example_rst_files/linearMapUniformSampling.rst
@@ -5,8 +5,8 @@
 Example: Linear Map with Uniform Sampling
 ===========================
 
-We will wailk through the following :download:`example
-<../../../examples/linearMap/linearMapUniformSampling.py>`. This example
+We will wailk through the following `example
+<https://github.com/UT-CHG/BET/blob/master/examples/linearMap/linearMapUniformSampling.py>`_. This example
 generates uniform samples in a 3D input parameter space and evaluates a 
 linear map to a 2D data space. The stochastic inverse problem solved here
 is one of parameter identification under uncertainty. A reference parameter

diff --git a/doc/examples/example_rst_files/nonlinearMapUniformSampling.rst b/doc/examples/example_rst_files/nonlinearMapUniformSampling.rst
@@ -4,8 +4,8 @@
 Example: Nonlinear Map with Uniform Sampling
 ============================================
 
-We will wailk through the following :download:`example
-<../../../examples/nonlinearMap/nonlinearMapUniformSampling.py>`. 
+We will wailk through the following `example
+<https://github.com/UT-CHG/BET/blob/master/examples/nonlinearMap/nonlinearMapUniformSampling.py>`_. 
 This example generates uniform samples in a 2D input parameter space 
 and evaluates a nonlinear map to either a 1D or 2D output data space.
 The nonlinear map is defined as either one or two quantities of 

diff --git a/doc/examples/example_rst_files/validation.rst b/doc/examples/example_rst_files/validation.rst
@@ -4,8 +4,8 @@
 Example: Validation
 ============================================
 
-We will wailk through the following :download:`example
-<../../../examples/validationExample/linearMap.py>`. 
+We will wailk through the following `example
+<https://github.com/UT-CHG/BET/blob/master/examples/validationExample/linearMap.py>`_. 
 This 2D linear example shows that geometrically distinct QoI can
 recreate a probability measure on the input parameter space
 used to define the output probability measure. The user can

diff --git a/doc/examples/overview.rst b/doc/examples/overview.rst
@@ -22,7 +22,7 @@ directory.
 Validation example
 =======================================
 
-See :ref:`validation` for an example .
+See :ref:`validation` for an example.
 
 
 Linear Map Example
@@ -73,3 +73,7 @@ Examples Estimating :math:`P_\Lambda`
     * :ref:`q1D`
     * :ref:`q2D`
     * :ref:`q3D`
+
+Contaminant Transport Example:
+==============================
+See :ref:`contaminantTransport` for an example.