diff --git a/.DS_Store b/.DS_Store index f5ba376..780b2cb 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.Rbuildignore b/.Rbuildignore index 4f75f8e..abb96c4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,14 +1,15 @@ -^data-raw$ -^Meta$ -^doc$ -^docs$ -^_pkgdown\.yml$ -^codecov\.yml$ -^.*\.Rproj$ -^\.Rproj\.user$ -ignore/ -R_ignore/ -^index.md -^\.github$ -^README\.Rmd$ -test_scripts/ +^data-raw$ +^Meta$ +^doc$ +^docs$ +^_pkgdown\.yml$ +^codecov\.yml$ +^.*\.Rproj$ +^\.Rproj\.user$ +ignore/ +R_ignore/ +^index.md +^\.github$ +^README\.Rmd$ +test_scripts/ +LICENSE.md diff --git a/.github/workflows/develop_build.yaml b/.github/workflows/develop_build.yaml index 863e601..28f49a7 100644 --- a/.github/workflows/develop_build.yaml +++ b/.github/workflows/develop_build.yaml @@ -1,81 +1,81 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -on: - push: - branches: - - develop - pull_request: - branches: - - develop - -name: develop_build - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v1 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: + - develop + pull_request: + branches: + - develop + +name: develop_build + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@v1 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + while read -r cmd + do + eval sudo $cmd + done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("rcmdcheck") + shell: Rscript {0} + + - name: Check + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@main + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check diff --git a/DESCRIPTION b/DESCRIPTION index 17941eb..3c1df4d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,43 +1,39 @@ -Package: drjacoby -Type: Package -Title: Flexible Markov Chain Monte Carlo via Reparameterization -Version: 1.5.3 -Authors@R: c( - person("Bob", "Verity", email = "r.verity@imperial.ac.uk", role = c("aut", "cre")), - person("Pete", "Winskill", email = "p.winskill@imperial.ac.uk", role = c("aut")) - ) -Description: drjacoby is an R package for performing Bayesian inference via - Markov chain monte carlo (MCMC). In addition to being highly flexible it - implements some advanced techniques that can improve mixing in tricky - situations. -License: MIT + file LICENSE -Encoding: UTF-8 -LazyData: true -RoxygenNote: 7.1.2 -LinkingTo: - Rcpp -Imports: - Rcpp, - coda, - parallel, - ggplot2, - RcppXPtrUtils, - usethis, - tools, - rlang, - cowplot, - dplyr, - magrittr, - readr, - data.table -SystemRequirements: C++11 -Suggests: - testthat, - covr, - knitr, - rmarkdown, - microbenchmark, - gridExtra -BugReports: https://github.com/mrc-ide/drjacoby/issues -VignetteBuilder: knitr -Depends: R (>= 3.1.0) +Package: drjacoby +Type: Package +Title: Flexible Markov Chain Monte Carlo via Reparameterization +Version: 1.5.3 +Authors@R: c( + person("Bob", "Verity", email = "r.verity@imperial.ac.uk", role = c("aut", "cre")), + person("Pete", "Winskill", email = "p.winskill@imperial.ac.uk", role = c("aut")) + ) +Description: drjacoby is an R package for performing Bayesian inference via + Markov chain monte carlo (MCMC). In addition to being highly flexible it + implements some advanced techniques that can improve mixing in tricky + situations. +License: MIT + file LICENSE +Encoding: UTF-8 +LazyData: true +RoxygenNote: 7.1.2 +LinkingTo: + Rcpp +Imports: + Rcpp, + coda, + parallel, + ggplot2, + usethis, + tools, + rlang, + cowplot, + dplyr, + magrittr +Suggests: + testthat, + covr, + knitr, + rmarkdown, + microbenchmark, + gridExtra +BugReports: https://github.com/mrc-ide/drjacoby/issues +VignetteBuilder: knitr +Depends: R (>= 3.1.0) diff --git a/LICENSE b/LICENSE index 796e1f8..e70c1c1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,2 @@ -MIT License - -Copyright (c) 2019 MRC Centre for Outbreak Analysis and Modelling - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +YEAR: 2024 +COPYRIGHT HOLDER: MRC Centre for Outbreak Analysis and Modelling diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..1c446fa --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 MRC Centre for Outbreak Analysis and Modelling + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/R/diagnostic.R b/R/diagnostic.R index 45b3c9b..2287fc1 100644 --- a/R/diagnostic.R +++ b/R/diagnostic.R @@ -1,94 +1,94 @@ -#------------------------------------------------ -#' Gelman-Rubin statistic -#' -#' Estimate sthe Gelman-Rubin (rhat) convergence statistic for a single parameter -#' across multiple chains. Basic method, assuming all chains are of equal length -#' -#' @references Gelman, A., and D. B. Rubin. 1992. -#' Inference from Iterative Simulation Using Multiple Sequences. -#' Statistical Science 7: 457–511. -#' @references \url{https://astrostatistics.psu.edu/RLectures/diagnosticsMCMC.pdf} -#' -#' @param par_matrix Matrix (interations x chains) -#' @param chains number of chains -#' @param samples number of samples -#' -#' @return Gelman-Rubin statistic -gelman_rubin <- function(par_matrix, chains, samples){ - - # Check that >1 chains and >1 samples - assert_gr(chains, 1) - assert_gr(samples, 1) - - # Coerce to matrix - par_matrix <- as.data.frame(par_matrix) - - # Mean over all samples - all_mean <- mean(par_matrix[,2]) - - # Mean of each chain - chain_mean <- tapply(par_matrix[,2], par_matrix[,1], mean) - - # Variance of each chain - chain_var <- tapply(par_matrix[,2], par_matrix[,1], stats::var) - W <- (1 / chains) * sum(chain_var) - B <- samples / (chains - 1) * sum((chain_mean - all_mean)^2) - V <- (1 - 1 / samples) * W + (1 / samples) * B - round(sqrt(V / W), 4) -} - -#------------------------------------------------ -#' @title Estimate autocorrelation -#' -#' @inheritParams plot_autocorrelation -#' @param x Single chain. -acf_data <- function(x, lag){ - stats::acf(x, plot = FALSE, lag.max = lag)$acf -} - -#------------------------------------------------ -# check that geweke p-value non-significant at alpha significance level on -# values x[1:n] -#' @importFrom coda mcmc -#' @noRd -test_convergence <- function(x, n, alpha = 0.01) { - - # check inputs - assert_vector_numeric(x) - assert_single_pos_int(n) - assert_single_bounded(alpha) - - # fail if n = 1 - if (n == 1) { - return(FALSE) - } - - # fail if ESS too small - ESS <- try(coda::effectiveSize(x[1:n]), silent = TRUE) - if (class(ESS) == "try-error") { - return(FALSE) - } - if (ESS < 10) { - return(FALSE) - } - - # fail if geweke p-value < threshold - g <- geweke_pvalue(mcmc(x[1:n])) - ret <- (g > alpha) - - # return - return(ret) -} - -#------------------------------------------------ -# geweke_pvalue -# return p-value of Geweke's diagnostic convergence statistic, estimated from -# package coda -#' @importFrom stats pnorm -#' @importFrom coda geweke.diag -#' @noRd -geweke_pvalue <- function(x) { - ret <- 2*pnorm(abs(coda::geweke.diag(x)$z), lower.tail = FALSE) - return(ret) -} - +#------------------------------------------------ +#' Gelman-Rubin statistic +#' +#' Estimate sthe Gelman-Rubin (rhat) convergence statistic for a single parameter +#' across multiple chains. Basic method, assuming all chains are of equal length +#' +#' @references Gelman, A., and D. B. Rubin. 1992. +#' Inference from Iterative Simulation Using Multiple Sequences. +#' Statistical Science 7: 457–511. +#' @references \url{https://astrostatistics.psu.edu/RLectures/diagnosticsMCMC.pdf} +#' +#' @param par_matrix Matrix (interations x chains) +#' @param chains number of chains +#' @param samples number of samples +#' +#' @return Gelman-Rubin statistic +gelman_rubin <- function(par_matrix, chains, samples){ + + # Check that >1 chains and >1 samples + assert_gr(chains, 1) + assert_gr(samples, 1) + + # Coerce to matrix + par_matrix <- as.data.frame(par_matrix) + + # Mean over all samples + all_mean <- mean(par_matrix[,2]) + + # Mean of each chain + chain_mean <- tapply(par_matrix[,2], par_matrix[,1], mean) + + # Variance of each chain + chain_var <- tapply(par_matrix[,2], par_matrix[,1], stats::var) + W <- (1 / chains) * sum(chain_var) + B <- samples / (chains - 1) * sum((chain_mean - all_mean)^2) + V <- (1 - 1 / samples) * W + (1 / samples) * B + round(sqrt(V / W), 4) +} + +#------------------------------------------------ +#' @title Estimate autocorrelation +#' +#' @inheritParams plot_autocorrelation +#' @param x Single chain. +acf_data <- function(x, lag){ + stats::acf(x, plot = FALSE, lag.max = lag)$acf +} + +#------------------------------------------------ +# check that geweke p-value non-significant at alpha significance level on +# values x[1:n] +#' @importFrom coda mcmc +#' @noRd +test_convergence <- function(x, n, alpha = 0.01) { + + # check inputs + assert_vector_numeric(x) + assert_single_pos_int(n) + assert_single_bounded(alpha) + + # fail if n = 1 + if (n == 1) { + return(FALSE) + } + + # fail if ESS too small + ESS <- try(coda::effectiveSize(x[1:n]), silent = TRUE) + if (inherits(ESS, "try-error")) { + return(FALSE) + } + if (ESS < 10) { + return(FALSE) + } + + # fail if geweke p-value < threshold + g <- geweke_pvalue(mcmc(x[1:n])) + ret <- (g > alpha) + + # return + return(ret) +} + +#------------------------------------------------ +# geweke_pvalue +# return p-value of Geweke's diagnostic convergence statistic, estimated from +# package coda +#' @importFrom stats pnorm +#' @importFrom coda geweke.diag +#' @noRd +geweke_pvalue <- function(x) { + ret <- 2*pnorm(abs(coda::geweke.diag(x)$z), lower.tail = FALSE) + return(ret) +} + diff --git a/R_ignore/.DS_Store b/R_ignore/.DS_Store index f7ed7b1..245f826 100644 Binary files a/R_ignore/.DS_Store and b/R_ignore/.DS_Store differ diff --git a/R_ignore/deploy.R b/R_ignore/deploy.R new file mode 100644 index 0000000..93e27b3 --- /dev/null +++ b/R_ignore/deploy.R @@ -0,0 +1,47 @@ + +# deploy.R +# +# Author: Bob Verity +# Date: 2024-06-19 +# +# Purpose: +# Test package functions. + +# ------------------------------------------------------------------ + +library(tidyverse) + +# define R loglike function +r_loglike <- function(params, data, misc) { + sum(dnorm(data$x, mean = params["mu"], sd = params["sigma"], log = TRUE)) +} + +# define R logprior function +r_logprior <- function(params, misc) { + dunif(params["mu"], -10, 10, log = TRUE) + dnorm(params["sigma"], 0, 1, log = TRUE) +} + +# source C++ likelihood and prior +#cpp11::cpp_source("ignore/source/normal_model.cpp") + + +# ------------------------------------------------------------------ + +set.seed(1) + +# define data +x <- rnorm(1e2, mean = 1, sd = 4) + +# define parameters dataframe +df_params <- define_params(name = "mu", min = -10, max = 10, + name = "sigma", min = 0, max = Inf) + + +mcmc <- run_mcmc(data = list(x = x), + df_params = df_params, + loglike = r_loglike, + logprior = r_logprior, + burnin = 1e3, + samples = 1e3) + +plot_par(mcmc, show = "mu", phase = "burnin")