rgiordan · rgiordan · Sep 5, 2019 · Sep 6, 2019 · Sep 6, 2019 · Sep 6, 2019
diff --git a/README.md b/README.md
@@ -223,6 +223,41 @@ In the figure "Negative binomial example" above, suppose we had decided that `ca
 
 For a more in-depth discussion of the relationship between sensitivity and robustness, see Appendix C of our paper [3].
 
+# Development
+
+The following commands are to be run from the root of a clone of the
+repository.
+
+To install a local version, in `R`, run
+
+```
+> library(devtools)
+> install_local("rstansensitivity", force=TRUE)
+```
+
+To run the R unit tests, execute the following in your shell:
+
+```
+$ cd rstansensitivity/tests
+$ ./run_tests.R
+```
+
+To run the python tests, execute the following in your shell:
+
+```
+$ rstansensitivity/inst/generate_models_unittest.py
+```
+
+To build the documentation (and exports), run the following commands in `R` from
+the `rstansensitivity` directory:
+
+```
+> library(roxygen2)
+> library(pkgdown)
+> roxygen2::roxygenize()
+> pkgdown::build_site()
+```
+
 # References
 
 [1]: Local posterior robustness with parametric priors: Maximum and average sensitivity, Basu, Jammalamadaka, Rao, Liu (1996)

diff --git a/rstansensitivity/DESCRIPTION b/rstansensitivity/DESCRIPTION
@@ -1,12 +1,12 @@
 Package: rstansensitivity
 Title: Tools for calculating hyperparameter sensitivity in Stan.
-Version: 0.0.0.9000
-Authors@R: person("Ryan", "Giordano", email = "rgiordano@berkeley.edu", role = c("aut", "cre"))
-Maintainer@R: person("Ryan", "Giordano", email = "rgiordano@berkeley.edu", role = c("aut", "cre"))
+Version: 0.0.1.0000
+Authors@R: person("Ryan", "Giordano", email = "rgiordan@mit.edu", role = c("aut", "cre"))
+Maintainer@R: person("Ryan", "Giordano", email = "rgiordan@mit.edu", role = c("aut", "cre"))
 Description: Use Stan's autodifferentiation capabilities to automatically calculate Monte Carlo estimates of local sensitivity of posterior means to hyperparameters.
-Depends: R (>= 3.2.3), rstan, dplyr, reshape2
+Depends: R (>= 3.2.3), rstan, dplyr, reshape2, tidybayes
 License: ALv2
 Encoding: UTF-8
 LazyData: true
-Suggests: testthat
-RoxygenNote: 6.0.1
+Suggests: testthat, pkgdown, roxygen2
+RoxygenNote: 6.1.1
diff --git a/rstansensitivity/NAMESPACE b/rstansensitivity/NAMESPACE
@@ -7,14 +7,19 @@ export(GetHyperparameterDataFrame)
 export(GetImportanceSamplingFromModelFit)
 export(GetMCMCDataFrame)
 export(GetSamplingModelFilename)
+export(GetSensitivityFromGrads)
 export(GetSensitivityStandardErrors)
 export(GetStanSensitivityFromModelFit)
+export(GetStanSensitivityMatricesFromModelFit)
 export(GetStanSensitivityModel)
 export(GetTidyResult)
+export(GetTidySensitivityResults)
+export(GetTidyStanSummary)
+export(GetWeightMatrixSecondDerivative)
+export(LoadStanData)
 export(PlotSensitivities)
 export(PredictSensitivityFromStanData)
 export(SetSensitivityParameterList)
 export(SummarizeSensitivityMatrices)
+export(TidyColumn)
 export(TransformSensitivityResult)
-export(GetCovarianceSE)
-export(GetNormalizedCovarianceSE)
diff --git a/rstansensitivity/R/covariance_standard_errors_lib.R b/rstansensitivity/R/covariance_standard_errors_lib.R
@@ -1,3 +1,5 @@
+# Utilities using `mcmcse` to evaluate standard errors for correlations.
+
 library(mcmcse)
 
 
@@ -11,7 +13,10 @@ GetCovarianceSE <- function(x_draws, y_draws, fix_mean=FALSE) {
     arg_draws <- cbind(x_draws * y_draws, x_draws, y_draws)
     arg_cov_mat <- mcmcse::mcse.multi(arg_draws)$cov
     grad_g <- c(1, -1 * y_mean, -1 * x_mean)
-    g_se <- as.numeric(sqrt(t(grad_g) %*% arg_cov_mat %*% grad_g / nrow(arg_draws)))
+    g_se <- as.numeric(
+        sqrt(t(grad_g) %*%
+        arg_cov_mat %*%
+        grad_g / nrow(arg_draws)))
   }
   return(g_se)
 }
@@ -27,6 +32,7 @@ PackNormalizedCovariancePar <- function(x_draws, y_draws) {
 }
 
 GetNormalizedCovarianceGradient <- function(par) {
+  # We use this to apply the delta method to normalizes sensitivities.
   mean_xy <- par[1]
   mean_x2 <- par[2]
   mean_x <- par[3]
@@ -76,6 +82,7 @@ GetNormalizedCovarianceSE <- function(x_draws, y_draws) {
 GetSensitivityStandardErrors <- function(
     draws_mat, grad_mat, fix_mean=FALSE, normalized=FALSE) {
 
+  # TODO: fix the order of these arguments to match everything else.
   if (normalized && fix_mean) {
     stop("You cannot specify both fix_mean and normalized.")
   }

diff --git a/rstansensitivity/R/result_processing_lib.R b/rstansensitivity/R/result_processing_lib.R
@@ -1,15 +1,34 @@
-library(ggplot2)
-library(dplyr)
-library(reshape2)
+
+CheckDimensions <- function(grad_mat, draws_mat) {
+    if (ncol(grad_mat) != nrow(draws_mat)) {
+        stop(paste0(
+            "The dimensions of `grad_mat` must be ",
+            "# hyper parameters x # MCMC draws, and the dimensions ",
+            "of draws_mat must be # draws x # parameters."))
+    }
+}
 
 
+#' Get a sensitivity matrix from a matrix of partial derivatives and
+#' a matrix of draws.
+#' @param grad_mat A matrix of gradients of the log probability with respect
+#'   to hyperparameters.  The rows are hyperparameters, and the columns are
+#'   draws.
+#' @param draws_mat A matrix of draws of parameters, the means of which
+#'   are the quantities of interest.  The rows are draws, and the columns
+#'   are parameters.
+#' @return A matrix of estimated derivatives, dE[parameter | hyper] / dhyper.
+#'   The rows are hyperparameters and the columns are parameters.
+#' @export
 GetSensitivityFromGrads <- function(grad_mat, draws_mat) {
   # This should in fact match cov() but without having to transpose,
   # which gives a speedup.
 
   # Should match:
   # sens_mat <- cov(t(grad_mat), draws_mat)
 
+  CheckDimensions(grad_mat, draws_mat)
+
   grad_means <- rowMeans(grad_mat)
   draw_means <- colMeans(draws_mat)
 
@@ -53,183 +72,3 @@ TransformSensitivityResult <- function(sens_result, transform_matrix) {
       transform_matrix %*% transform_sens_result$grad_mat
     return(transform_sens_result)
 }
-
-
-SensitivityMatrixToDataframe <- function(
-    sens_mat, hyperparameter_names, parameter_names) {
-  colnames(sens_mat) <- parameter_names
-  return(data.frame(sens_mat) %>%
-    mutate(hyperparameter=hyperparameter_names) %>%
-    melt(id.var="hyperparameter") %>%
-    rename(parameter=variable))
-}
-
-
-#' Make a tidy dataframe out of a sensitivity matrix and its standard errors.
-#' @param sens_mat A matrix of sensitivities.
-#' @param sens_se A matrix of standard errors of \code{sens_mat}.
-#' @param measure What to call these sensitivites.
-#' @param num_se The number of standard errors for the upper and lower bounds.
-#' @return A tidy dataframe with columns for the parameters, hyperparameters,
-#' sensitivities, and their standard errors.
-#' @export
-SummarizeSensitivityMatrices <- function(
-    sens_mat, sens_se, measure, num_se=2) {
-
-    sens_df <- rbind(
-      SensitivityMatrixToDataframe(
-        sens_mat,
-        hyperparameter_names=rownames(sens_mat),
-        parameter_names=colnames(sens_mat)) %>%
-        mutate(measure=measure),
-      SensitivityMatrixToDataframe(
-        sens_se,
-        hyperparameter_names=rownames(sens_se),
-        parameter_names=colnames(sens_se)) %>%
-        mutate(measure=paste(measure, "se", sep="_")),
-      SensitivityMatrixToDataframe(
-        sens_mat + num_se * sens_se,
-        hyperparameter_names=rownames(sens_mat),
-        parameter_names=colnames(sens_mat)) %>%
-        mutate(measure=paste(measure, "upper", sep="_")),
-      SensitivityMatrixToDataframe(
-        sens_mat - num_se * sens_se,
-        hyperparameter_names=rownames(sens_mat),
-        parameter_names=colnames(sens_mat)) %>%
-        mutate(measure=paste(measure, "lower", sep="_"))
-      ) %>%
-      dcast(hyperparameter + parameter ~ measure) %>%
-      filter(parameter != "lp__")
-    return(sens_df)
-}
-
-#' Process the results of GetStanSensitivityFromModelFit into a
-#' tidy dataframe with standard errors.
-#'
-#' @param sens_result The output of \code{GetStanSensitivityFromModelFit}.
-#' @param num_se The number of standard errors for the upper and lower bounds.
-#' @return A dataframe summarizing the sensitivity of the model posterior means
-#' to the hyperparameters.  The reported standard errors are based on a
-#' multivariate normal and delta method approximation which may not be
-#' accurate for highly variable or non-normal draws. The standard errors should
-#' be interpreted with caution.
-#' \itemize{
-#'     \item{hyperparameter: }{The name of the hyperparameter.}
-#'     \item{parameter: }{The name of the parameter.}
-#'     \item{sensitivity:}
-#'     {The MCMC estimate of \code{dE[parameter | X, hyperparameter] /  d hyperparameter}.}
-#'     \item{sensitivity_se: }
-#'     {The estimated Monte Carlo error of \code{sensitivity}.}
-#'     \item{normalized_sensitivity: }
-#'     {The MCMC estimate of \code{sensitivity / sd(parameter}).}
-#'     \item{normalized_sensitivity_se: }
-#'     {The estimated Monte Carlo error of \code{normalized_sensitivity}.}
-#' }
-#' @export
-GetTidyResult <- function(sens_result, num_se=2) {
-    sens_se <- GetSensitivityStandardErrors(
-        sens_result$draws_mat, sens_result$grad_mat,
-        fix_mean=FALSE, normalized=FALSE)
-    norm_sens_se <- GetSensitivityStandardErrors(
-        sens_result$draws_mat, sens_result$grad_mat,
-        fix_mean=FALSE, normalized=TRUE)
-    sens_df <- SummarizeSensitivityMatrices(
-        sens_result$sens_mat, sens_se,
-        measure="sensitivity", num_se=num_se)
-    sens_norm_df <- SummarizeSensitivityMatrices(
-        sens_result$sens_mat_normalized, norm_sens_se,
-        measure="normalized_sensitivity", num_se=num_se)
-
-    result <- inner_join(
-        sens_df,sens_norm_df, by=c("hyperparameter", "parameter"))
-
-  return(result)
-}
-
-
-#' Make a barchart from the output of \code{GetTidyResult}.
-#'
-#' @param sens_df The output of \code{GetTidyResult}.
-#' @param normalized Whether to display the normalized sensitivities.
-#' @param se_num The number of standard errors to plot with the errorbars.
-#' @return A \code{ggplot} object showing the sensitivites of each parameter
-#' to each hyperparameter.
-#' @export
-PlotSensitivities <- function(sens_df, normalized=TRUE, se_num=2) {
-    if (normalized) {
-        y_axis_label <- "Normalized sensitivity"
-        sens_df_plot <-
-            mutate(sens_df, s=normalized_sensitivity,
-                   s_se=normalized_sensitivity_se)
-    } else {
-        y_axis_label <- "Unnormalized sensitivity"
-        sens_df_plot <-
-            mutate(sens_df, s=sensitivity, s_se=sensitivity_se)
-    }
-    return(
-    ggplot(sens_df_plot) +
-      geom_bar(aes(x=parameter, y=s, fill=hyperparameter),
-                   stat="identity", position="dodge") +
-      geom_errorbar(aes(x=parameter,
-                        ymin=s - se_num  * s_se,
-                        ymax=s + se_num  * s_se,
-                        group=hyperparameter),
-                    position=position_dodge(0.9), width=0.2) +
-      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
-      scale_fill_discrete(name="Hyperparameter") +
-      ylab(y_axis_label) + xlab("Parameter")
-    )
-}
-
-
-#' Convert a Stan sampling result to a form that can be joined with tidy
-#' sensitivity results.
-#'
-#' @param sampling_result The output of \code{stan::sampling}
-#' @param cols The columns of the summary to keep.
-#' @return A data frame with the MCMC reuslts that can be joined with tidy
-#' sensitivity results.
-#' @export
-GetMCMCDataFrame <- function(
-    sampling_result, cols=c("mean", "se_mean", "sd", "n_eff", "Rhat")) {
-  mcmc_result <- as.data.frame(rstan::summary(sampling_result)$summary)
-  mcmc_result$parameter <- make.names(rownames(mcmc_result))
-  rownames(mcmc_result) <- NULL
-  mcmc_result <-
-    select(mcmc_result, "parameter", cols) %>%
-    filter(parameter != "lp__")
-  return(mcmc_result)
-}
-
-
-#' Use the linear approximation to predict the sensitivity to a new
-#' stan data list.
-#'
-#' @param stan_sensitivity_list The output of \code{GetStanSensitivityModel}
-#' @param stan_result The output of \code{GetStanSensitivityFromModelFit}
-#' @param stan_data The original stan data at which
-#' \code{stan_sensitivity_list} was calculated.
-#' @param stan_data_perturb A new stan data file with different hyperparameters.
-#' @param description A hyperparameter name to describe this perturbation.
-#' @return A tidy sensitivity dataframe where the sensitivity is in the
-#' direction of the difference between the hyperparameters in the two stan
-#' data lists.
-#' @export
-PredictSensitivityFromStanData <- function(
-    stan_sensitivity_list, sens_result, stan_data, stan_data_perturb,
-    description="perturbation") {
-
-    hyperparameter_df <-
-      inner_join(
-          GetHyperparameterDataFrame(stan_sensitivity_list, stan_data) %>%
-               rename(hyperparameter_val_orig=hyperparameter_val),
-          GetHyperparameterDataFrame(stan_sensitivity_list, stan_data_perturb),
-               by="hyperparameter") %>%
-      mutate(hyperparameter_diff=hyperparameter_val - hyperparameter_val_orig)
-    linear_comb <- matrix(hyperparameter_df$hyperparameter_diff, nrow=1)
-    colnames(linear_comb) <- hyperparameter_df$hyperparameter
-    linear_comb <- linear_comb[, rownames(sens_result$sens_mat), drop=FALSE]
-    rownames(linear_comb) <- description
-    sens_result_pert <- TransformSensitivityResult(sens_result, linear_comb)
-    return(GetTidyResult(sens_result_pert))
-}