Skip to content

Commit

Permalink
Update code for Revision 1.
Browse files Browse the repository at this point in the history
  • Loading branch information
ntthung committed May 7, 2021
1 parent ae99ec3 commit fc5f3c3
Show file tree
Hide file tree
Showing 140 changed files with 1,877 additions and 38,277 deletions.
53 changes: 19 additions & 34 deletions R/init.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ theme_set(my_theme)
monthLab <- month.abb
names(monthLab) <- 1:12

lambdaLab <- function(x) sapply(x, function(v) paste0('\u03bb = ', substr(v, 1, 1)))

dark2 <- RColorBrewer::brewer.pal(3, 'Dark2')[c(2, 1, 3)]

pal <- wesanderson::wes_palette('Cavalcanti1')[c(2, 1)]
Expand Down Expand Up @@ -94,46 +92,33 @@ hinkley <- function(x, type = 2) {
(mean(x, na.rm = TRUE) - median(x, na.rm = TRUE)) / scale
}

roundDT <- function(DT, digits = 2, type = 'numeric') {
DT[, lapply(.SD,
function(x) {
if (is.numeric(x)) {
rd <- round(x, digits)
if (type == 'numeric') rd else sprintf(glue("%.{digits}f"), rd)
} else x
})]
}

power_set <- function(N, min.size = 1, max.size = N) {
if (max.size > N) max.size <- N
if (min.size > N) NULL else {
subsets <- foreach(k = 1:N) %dopar% combn(1:N, k, simplify = FALSE)
subsets <- unlist(subsets, recursive = FALSE)
lengths <- sapply(subsets, length)
subsets[lengths %between% c(min.size, max.size)]
roundDT <- function(DT, digits = 2, type = 'numeric') {
if (length(digits) == 1) {
DT[, lapply(.SD,
function(x) {
if (is.numeric(x)) {
rd <- round(x, digits)
if (type == 'numeric') rd else sprintf(glue("%.{digits}f"), rd)
} else x
})]
} else {
DT[, mapply(function(x, k) {
if (is.numeric(x)) {
rd <- round(x, k)
if (type == 'numeric') rd else sprintf(glue("%.{k}f"), rd)
} else x
},
x = .SD,
k = digits,
SIMPLIFY = FALSE)]
}
}

'%ni%' <- Negate('%in%')

lapplyrbind <- function(x, fun, ..., id = NULL) rbindlist(lapply(x, fun, ...), idcol = id)

# Megadroughts

mgd <- data.table(sta = c(1756, 1790, 1876),
fin = c(1768, 1796, 1878),
name = c('Strange Parallels Drought',
'East India Drought',
'Victorian Great Drought'))

standardize <- function(x, ...) (x - mean(x, ...)) / sd(x, ...)

boxcox_lambda <- function(x) {
bc <- MASS::boxcox(x ~ 1, plotit = FALSE)
bc$x[which.max(bc$y)]
}

boxcox <- function(x, lambda = NULL) {
if (is.null(lambda)) lambda <- boxcox_lambda(x)
if (abs(lambda) < 0.01) log(x) else (x^lambda - 1) / lambda
}
85 changes: 81 additions & 4 deletions R/input-selection-functions.R → R/input_selection_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,13 @@ wPCA <- function(X, rho = 1, p = 0, min.variance = 0.95, use.eigen = TRUE, retur
#' @param n.site.min Minimum number of sites to be kept for each season
#' @inheritParams cv_mb
#' @param Xpool The input matrix of all the sites in the pool
#' @param use.robust.mean Whether the arithmetic mean (if set to FALSE) or the Tukey's biweight robust mean is used to etermine the final value from cross-validation. Default is FALSE.
#' @param use.robust.mean Whether the arithmetic mean (if set to FALSE) or the Tukey's biweight robust mean is used to determine the final value from cross-validation. Default is FALSE.
#' @export
cv_site_selection <- function(choice, pool, n.site.min = 5,
Xpool, instQ, cv.folds, start.year,
lambda = 1, log.trans, force.standardize = FALSE,
use.robust.mean = FALSE) {

season <- N <- site <- NULL
seasons <- levels(instQ$season)
poolSubset <- pool[choice == 1L]
num.targets <- length(seasons)
Expand All @@ -108,16 +107,94 @@ cv_site_selection <- function(choice, pool, n.site.min = 5,

pcList <- lapply(seasons, function(s) {
Qa <- instQ[s]
X <- Xpool[, poolSubset[s, site]]
X <- Xpool[, poolSubset[s, site2]]
PC <- wPCA(X, use.eigen = FALSE, return.matrix = TRUE)
sv <- input_selection(
PC[instInd, , drop = FALSE],
Qa$Qa, 'leaps backward', nvmax = 8)
PC[, sv, drop = FALSE]
})

cvFval <- cv_mb(instQ, pcList, cv.folds, start.year, lambda, log.trans, force.standardize, return.type = 'mb')
cvFval <- cv_mb(instQ, pcList, cv.folds, start.year, lambda, log.trans, force.standardize, return.type = 'fval')

if (use.robust.mean) -tbrm(cvFval) else -mean(cvFval)
} else -1e12
}

# This function is a custom-made version of mbr::mb_reconstruction(), in order to extract the parameters
# In a near future this option to extract the parameters will be incorporated into mb_reconstruction()

mb_par <- function(instQ, pc.list, start.year, lambda = 1,
log.trans = NULL, force.standardize = FALSE) {

# Setup
years <- start.year:max(instQ$year)
instInd <- which(years %in% instQ$year)
seasons <- levels(instQ$season)
N <- length(seasons)
sInd <- 1:(N-1)
Y <- instQ$Qa

XList <- lapply(pc.list, mbr:::prepend_ones)
XListInst <- lapply(XList, function(x) x[instInd, , drop = FALSE])
X <- as.matrix(Matrix:::.bdiag(XList))
XTrain <- as.matrix(Matrix:::.bdiag(XListInst))

# NOTES:
# Working with matrices are much faster than with data.table
# Important to keep drop = FALSE; otherwise, when there is only one PC, a vector is returned.
# Use column form so that we can use c() later

# Calibration -------------------------------------

if (is.null(log.trans)) {

if (force.standardize) {

# Scale the observation
Y <- matrix(Y, ncol = N)
Y <- colScale(Y)
cm <- attributes(Y)[['scaled:center']] # column mean
csd <- attributes(Y)[['scaled:scale']] # column sd
Y <- c(Y)

# Multiply X by sigma_s / sigma_q before making A
ratio <- csd / csd[N]
Xscaled <- lapply(sInd, function(k) XListInst[[k]] * ratio[k])
A <- cbind(do.call(cbind, Xscaled), -XListInst[[N]])
} else {
cm <- NULL
csd <- NULL
A <- cbind(do.call(cbind, XListInst[sInd]), -XListInst[[N]])
}

# Analytical solution when there is no transformation
XTX <- crossprod(XTrain)
XTY <- crossprod(XTrain, Y)
ATA <- crossprod(A)
beta <- solve(XTX + lambda * ATA, XTY)

} else { # Numerical solution

Y <- matrix(Y, ncol = N)
Y[, log.trans] <- log(Y[, log.trans])
if (length(log.trans) < N || force.standardize) {
Y <- colScale(Y)
cm <- attributes(Y)[['scaled:center']]
csd <- attributes(Y)[['scaled:scale']]
} else {
cm <- NULL
csd <- NULL
}
Y <- c(Y)

log.seasons <- which(log.trans < N)
log.ann <- max(log.trans) == N

beta <- mbr:::mb_fit(XTrain, Y, lambda, cm, csd, log.seasons, log.ann, N, sInd)
}

# Prediction ------------------------------------------

beta
}
Loading

0 comments on commit fc5f3c3

Please sign in to comment.