Skip to content

Commit

Permalink
Merge pull request #33 from rapidsurveys:dev
Browse files Browse the repository at this point in the history
refactor boot BW; fix #31
  • Loading branch information
ernestguevarra authored Jan 5, 2025
2 parents 1588a79 + 17caa16 commit 8dd7d0e
Show file tree
Hide file tree
Showing 8 changed files with 253 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
/revdep/library.noindex

README.html

docs
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: bbw
Type: Package
Title: Blocked Weighted Bootstrap
Version: 0.2.1.9000
Version: 0.2.2.9000
Authors@R: c(
person("Mark", "Myatt",
email = "[email protected]", role = c("aut", "cph")),
Expand All @@ -22,6 +22,10 @@ License: GPL-3
Depends: R (>= 3.0.1)
Imports:
car,
doParallel,
foreach,
parallel,
parallelly,
withr
Suggests:
knitr,
Expand Down
11 changes: 11 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,20 @@
export(bootBW)
export(bootClassic)
export(bootPROBIT)
export(boot_bw)
export(boot_bw_sample_cluster)
export(boot_bw_sample_clusters)
export(boot_bw_sample_within_cluster)
export(boot_bw_sample_within_clusters)
export(boot_bw_weight)
export(recode)
importFrom(car,bcPower)
importFrom(car,powerTransform)
importFrom(doParallel,registerDoParallel)
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
importFrom(parallel,makeCluster)
importFrom(parallelly,availableCores)
importFrom(stats,na.omit)
importFrom(stats,pnorm)
importFrom(stats,runif)
Expand Down
4 changes: 4 additions & 0 deletions R/bbw.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@
#' @importFrom car powerTransform
#' @importFrom car bcPower
#' @importFrom withr with_options
#' @importFrom parallelly availableCores
#' @importFrom parallel makeCluster
#' @importFrom foreach foreach %dopar%
#' @importFrom doParallel registerDoParallel
#'
"_PACKAGE"

Expand Down
146 changes: 146 additions & 0 deletions R/boot_bw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#'
#' Blocked weighted bootstrap - vectorised and parallel
#'
#' @inheritParams bootBW
#' @param index Logical. Should index values be returned or a list of
#' data.frames. Default to FALSE.
#' @param cluster_df A [data.frame()] or a list of [data.frame()]s for selected
#' clusters
#' @param p A random probability of selection
#' @param cores The number of computer cores to use/number of child processes
#' will be run simultaneously.
#'
#' @returns A [data.frame()] with:
#' * ncol = length(outputColumns)
#' * nrow = replicates
#' * names = outputColumns
#'
#' @examples
#' boot_bw(
#' x = indicatorsHH, w = villageData, statistic = bootClassic,
#' params = "anc1", replicates = 49
#' )
#'
#' @export
#' @rdname boot_bw
#'

boot_bw_weight <- function(w) {
## Scale weights and accumulate weights
w$weight <- w$pop / sum(w$pop)
w$cumWeight <- cumsum(w$weight)

## Return w
w
}

#'
#' @export
#' @rdname boot_bw
#'

boot_bw_sample_cluster <- function(p, w) {
## Select cluster based on p ----
psu <- w$psu[which.max(w$cumWeight >= p)]

## Return psu ----
psu
}

#'
#' @export
#' @rdname boot_bw
#'

boot_bw_sample_clusters <- function(x, w, index = FALSE) {
## Get number of clusters ----
nClusters <- nrow(w)

## Get vector of random probabilities ----
p <- runif(n = nClusters)

## Select clusters based on p ----
selected_clusters <- lapply(
X = p,
FUN = boot_bw_sample_cluster,
w = w
) |>
unlist()

if (index) {
## Return selected_clusters ----
selected_clusters
} else {
lapply(
X = selected_clusters,
FUN = function(y, z) subset(z, subset = psu == y),
z = x
)
}
}


#'
#' @export
#' @rdname boot_bw
#'

boot_bw_sample_within_cluster <- function(cluster_df) {
cluster_size <- seq_len(nrow(cluster_df))

index <- sample(cluster_size, replace = TRUE)

cluster_df[index, ]
}


#'
#' @export
#' @rdname boot_bw
#'

boot_bw_sample_within_clusters <- function(cluster_df) {
lapply(
X = cluster_df,
FUN = boot_bw_sample_within_cluster
) |>
do.call(rbind, args = _)
}


#'
#' @export
#' @rdname boot_bw
#'

boot_bw <- function(x, w, statistic,
params, outputColumns = params,
replicates = 399,
cores = parallelly::availableCores(omit = 1)) {
## Get cumulative weights for clusters ----
w <- boot_bw_weight(w)

## Setup parallelism ----
cl <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cl)

## Resample ----
boot <- foreach::foreach(seq_len(replicates), .combine = rbind) %dopar% {
sampled_clusters <- boot_bw_sample_clusters(x = x, w = w)

xBW <- boot_bw_sample_within_clusters(sampled_clusters)

statistic(xBW, params)
}

## Rename output data.frame ----
boot <- as.data.frame(boot)
row.names(boot) <- NULL
names(boot) <- outputColumns

## Stop parallelism ----
parallel::stopCluster(cl)

## Return boot ----
boot
}
20 changes: 8 additions & 12 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,26 +1,22 @@
Accion
Arimond
Centers
codecov
CMD
CodeFactor
DL
doi
Faim
FeFol
Gelbach
Handwashing
ICFI
IYCF
JB
LCL
lifecycle
Lifecycle
ORCID
org
outputColumns
PSU
PSUs
RapidSurveys
Ruel
th
CMD
nrow
doi
ncol
CodeFactor
nrow
outputColumns
th
75 changes: 75 additions & 0 deletions man/boot_bw.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,12 @@ reference:
contents:
- bbw

- title: Bootstrap estimator
- title: Bootstrap resampling
contents:
- bootBW
- boot_bw

- title: Bootstrap statistics
- title: Bootstrap estimator
contents:
- bootClassic
- bootPROBIT
Expand All @@ -61,4 +62,3 @@ reference:
- indicatorsCH2
- indicatorsHH
- villageData

0 comments on commit 8dd7d0e

Please sign in to comment.