From 08fd784eabf48230347287404f386ef685937fe8 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 9 Oct 2021 16:51:29 +0200 Subject: [PATCH 01/42] added PQN normalization and corresponding tests --- R/prep.R | 48 +++++++++++++++++++++++++++++++++----- man/prep.norm.Rd | 14 ++++++++++- tests/testthat/test-prep.R | 35 +++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 7 deletions(-) diff --git a/R/prep.R b/R/prep.R index ef43e85..0e80282 100755 --- a/R/prep.R +++ b/R/prep.R @@ -108,10 +108,12 @@ prep.snv <- function(data) { #' @param data #' a matrix with data values #' @param type -#' type of normalization \code{"area"}, \code{"length"}, \code{"sum"}, \code{"snv"}, or \code{"is"}. +#' type of normalization \code{"area"}, \code{"length"}, \code{"sum"}, \code{"snv"}, \code{"is"}, or \code("pqn"). #' @param col.ind #' indices of columns (can be either integer or logical valuws) for normalization to internal #' standard peak. +#' @param ref.spectrum +#' reference spectrum for PQN normalization, if not provided a mean spectrum for data is used #' #' @details #' The \code{"area"}, \code{"length"}, \code{"sum"} types do preprocessing to unit area (sum of @@ -122,11 +124,21 @@ prep.snv <- function(data) { #' of this peak. If `col.ind` points on several adjucent vales, the rows are normalized to the area #' under the peak - sum of the intensities. #' +#' The \code{"pqn"} is Probabilistic Quotient Normalization as described in [1]. In this case you also +#' need to provide a reference spectrum (e.g. mean or median of spectra for some reference samples). If +#' reference spectrum is not provided it will be computed as mean of the spectra to be +#' preprocessed (parameter \code{data}). +#' +#' @references +#' 1. F. Dieterle, A. Ross, H. Senn. Probabilistic Quotient Normalization as Robust Method to +#' Account for Dilution of Complex Biological Mixtures. Application in 1 H NMR Metabonomics. +#' Anal. Chem. 2006, 78, 4281–4290. +#' #' @return #' data matrix with normalized values #' #' @export -prep.norm <- function(data, type = "area", col.ind = NULL) { +prep.norm <- function(data, type = "area", col.ind = NULL, ref.spectrum = NULL) { if (type == "snv") return(prep.snv(data)) @@ -142,21 +154,45 @@ prep.norm <- function(data, type = "area", col.ind = NULL) { stop("Values for 'col.ind' seem to be wrong.") } - f <- function(data, type, col.ind) { + if (type == "pqn" && is.null(ref.spectrum)) { + ref.spectrum <- apply(data, 2, mean) + } + + pqn <- function(data, ref.spectrum) { + + if (length(ref.spectrum) != ncol(data)) { + stop("prep.norm: 'ref.spectrum' should have the same number of values as the number of columns in 'data'.") + } + + # 1. unit area normalization + ref.spectrum <- as.numeric(ref.spectrum) + ref.spectrum <- ref.spectrum / sum(abs(ref.spectrum)) + + # 2. compute and return median quotients for each spectrum + return(apply(sweep(data, 2, ref.spectrum, "/"), 1, median)) + } + + f <- function(data, type, col.ind, ref.spectrum) { + + # preliminary normalize the dataset to unit sum + if (type == "pqn") { + data <- prep.norm(data, type = "area") + } w <- switch( type, "area" = apply(abs(data), 1, sum), "length" = sqrt(apply(data^2, 1, sum)), "sum" = apply(data, 1, sum), - "is" = apply(data[, col.ind, drop = FALSE], 1, sum) + "is" = apply(data[, col.ind, drop = FALSE], 1, sum), + "pqn" = pqn(data, ref.spectrum) ) if (is.null(w)) stop("Wrong value for argument 'type'.") return(sweep(data, 1, w, "/")) } - return(prep.generic(data, f, type = type, col.ind = col.ind)) + return(prep.generic(data, f, type = type, col.ind = col.ind, ref.spectrum = ref.spectrum)) } #' Savytzky-Golay filter @@ -558,7 +594,7 @@ getImplementedPrepMethods <- function() { method = prep.norm, params = list(type = "area", col.ind = NULL), params.info = list( - type = "type of normalization ('area', 'sum', 'length', 'is', 'snv').", + type = "type of normalization ('area', 'sum', 'length', 'is', 'snv', 'pqn').", col.ind = "indices of columns (variables) for normalization to internal standard peak." ), info = "Normalization." diff --git a/man/prep.norm.Rd b/man/prep.norm.Rd index 5e93c5e..32670e4 100644 --- a/man/prep.norm.Rd +++ b/man/prep.norm.Rd @@ -4,7 +4,7 @@ \alias{prep.norm} \title{Normalization} \usage{ -prep.norm(data, type = "area", col.ind = NULL) +prep.norm(data, type = "area", col.ind = NULL, ref.spectrum = NULL) } \arguments{ \item{data}{a matrix with data values} @@ -13,6 +13,8 @@ prep.norm(data, type = "area", col.ind = NULL) \item{col.ind}{indices of columns (can be either integer or logical valuws) for normalization to internal standard peak.} + +\item{ref.spectrum}{reference spectrum for PQN normalization, if not provided a mean spectrum for data is used} } \value{ data matrix with normalized values @@ -28,4 +30,14 @@ does the Standard Normal Variate normalization, similar to \code{\link{prep.snv} parameter `col.ind`. If the position is a single value, the rows are normalized to the height of this peak. If `col.ind` points on several adjucent vales, the rows are normalized to the area under the peak - sum of the intensities. + +The \code{"pqn"} is Probabilistic Quotient Normalization as described in [1]. In this case you also +need to provide a reference spectrum (e.g. mean or median of spectra for some reference samples). If +reference spectrum is not provided it will be computed as mean of the spectra to be +preprocessed (parameter \code{data}). +} +\references{ +1. F. Dieterle, A. Ross, H. Senn. Probabilistic Quotient Normalization as Robust Method to +Account for Dilution of Complex Biological Mixtures. Application in 1 H NMR Metabonomics. +Anal. Chem. 2006, 78, 4281–4290. } diff --git a/tests/testthat/test-prep.R b/tests/testthat/test-prep.R index c0d7531..e933445 100644 --- a/tests/testthat/test-prep.R +++ b/tests/testthat/test-prep.R @@ -150,6 +150,41 @@ test_that("Normalization to IS peak works (one value)", { expect_equivalent(pspectra1, pspectra2) }) +test_that("PQN normalization works correctly", { + data(simdata) + spectra <- simdata$spectra.c + ref.spectrum1 <- apply(simdata$spectra.c[c(1, 20, 30, 80, 80, 100), ], 2, mean) + ref.spectrum2 <- apply(spectra, 2, mean) + + expect_error(prep.norm(spectra, "pqn", ref.spectrum = 1)) + expect_error(prep.norm(spectra, "pqn", ref.spectrum = ref.spectrum1[, 1:10, drop = FALSE])) + + # manual preprocessing of spectra + ref.spectrum1 <- ref.spectrum1 / sum(abs(ref.spectrum1)) + ref.spectrum2 <- ref.spectrum2 / sum(abs(ref.spectrum2)) + pspectra2 <- pspectra1 <- matrix(0, nrow(spectra), ncol(spectra)) + + for (i in seq_len(nrow(spectra))) { + s <- spectra[i, ] / sum(abs(spectra[i, ])) + + q1 <- s / ref.spectrum1 + pspectra1[i, ] <- s / median(q1) + + q2 <- s / ref.spectrum2 + pspectra2[i, ] <- s / median(q2) + } + + par(mfrow = c(2, 2)) + mdaplot(prep.norm(spectra, type = "sum"), type = "l") + mdaplot(pspectra1, type = "l") + mdaplot(pspectra2, type = "l") + mdaplot(prep.norm(spectra, type = "pqn"), type = "l") + + expect_equivalent(prep.norm(spectra, type = "pqn", ref.spectrum = ref.spectrum1), pspectra1) + expect_equivalent(prep.norm(spectra, type = "pqn", ref.spectrum = ref.spectrum2), pspectra2) + expect_equivalent(prep.norm(spectra, type = "pqn"), pspectra2) +}) + test_that("Kubelka-Munk works correctly", { spectra <- simdata$spectra.c spectra <- spectra - min(spectra) + 0.01 * max(spectra) From f7c497342694eaca52613559bb863e827fcfc0e1 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 9 Oct 2021 16:51:47 +0200 Subject: [PATCH 02/42] roxygen version is updated --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9013bcb..a6dd65a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ Description: Projection based methods for preprocessing, Encoding: UTF-8 License: MIT + file LICENSE Imports: methods, graphics, grDevices, stats, Matrix -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 Suggests: testthat NeedsCompilation: no Packaged: 2019-05-24 11:03:33 UTC; svkucheryavski From 9e78c6e2202f8c1fb1c9a93a237dd153b6f8fc0b Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 9 Oct 2021 16:55:14 +0200 Subject: [PATCH 03/42] updates to news --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index bc35708..f014227 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +v. Coming +======== + +* added [PQN](https://doi.org/10.1021/ac051632c) normalization method to `prep.norm()` function. + + v.0.12.0 ======== This release is mostly about preprocessing - added some new methods, improved the existent once and implemented a possibility to combine preprocessing methods together (including parameter values) and apply them all together in a correct sequence. See [preprocessing section](https://mda.tools/docs/preprocessing.html) in the tutorials for details From d20062c77918467351e21c24a780c7c5a9b95d09 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 16 Oct 2021 14:24:15 +0200 Subject: [PATCH 04/42] added tests for computing vipscores for PLS2 model --- inst/testdata/pls-vipscores.csv | 1 + inst/testdata/pls2-vipscores.csv | 20 ++++++++++++++++++++ tests/testthat/test-pls.R | 20 +++++++++++++------- 3 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 inst/testdata/pls2-vipscores.csv diff --git a/inst/testdata/pls-vipscores.csv b/inst/testdata/pls-vipscores.csv index 27d3bc3..90606c7 100644 --- a/inst/testdata/pls-vipscores.csv +++ b/inst/testdata/pls-vipscores.csv @@ -9,3 +9,4 @@ 1.3532 0.41075 0.24069 + diff --git a/inst/testdata/pls2-vipscores.csv b/inst/testdata/pls2-vipscores.csv new file mode 100644 index 0000000..ae69f50 --- /dev/null +++ b/inst/testdata/pls2-vipscores.csv @@ -0,0 +1,20 @@ +1.2671 +1.2949 +1.0898 +1.0064 +1.0368 +0.3689 +1.2343 +1.2086 +0.4838 +0.2964 +0.7180 +0.6928 +0.6460 +2.3307 +0.6128 +0.7728 +0.6622 +0.7537 +1.0538 +0.2556 \ No newline at end of file diff --git a/tests/testthat/test-pls.R b/tests/testthat/test-pls.R index f9c3dd9..3a5ba34 100644 --- a/tests/testthat/test-pls.R +++ b/tests/testthat/test-pls.R @@ -479,6 +479,19 @@ test_that("vipscores for people data (A = 4) identical to once computed in MATLA expect_equivalent(vipscores(m, ncomp = 4), as.matrix(vip), tolerance = 10^-4) }) +test_that("vipscores for people data (A = 4) identical to once computed in MATLAB for PLS2", { + data(people) + X <- people[, -c(4, 6)] + Y <- people[, c(4, 6)] + m <- pls(X, Y, 8, center = TRUE, scale = TRUE, cv = 1) + + f <- system.file("testdata", "pls2-vipscores.csv", package = "mdatools") + vip <- read.csv(f, header = FALSE)[[1]] + #dim(vip) <- c(ncol(X), 2) + + expect_equivalent(vipscores(m, ncomp = 4), matrix(vip, ncol = 2), tolerance = 10^-4) +}) + ###################################### # Block 6. Tests selratio() method # @@ -523,13 +536,6 @@ for (i in seq_along(datasets)) { print(v) } -#test_that("vipscores for people data (A = 4) identical to once computed in MATLAB", { -# d <- datasets[[1]] -# m <- pls(d$xc, d$yc, d$ncomp, center = d$center, scale = d$scale, cv = 10) -# -# vip <- as.matrix(read.csv("../matlab/pls-vipscores.csv", header = FALSE)) -# expect_equivalent(vipscores(m, ncomp = 4), vip, tolerance = 10^-4) -#}) ######################################### # Block 7. Tests for outlier detection # From f02550eaa8019bbda1dc8d7859f8b33a0e55d334 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 16 Oct 2021 14:24:39 +0200 Subject: [PATCH 05/42] fixed a bug in vipscores() --- R/pls.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/pls.R b/R/pls.R index e750caf..503edcf 100755 --- a/R/pls.R +++ b/R/pls.R @@ -1792,10 +1792,10 @@ vipscores <- function(obj, ncomp = obj$ncomp.selected) { weights <- obj$weights[, comp, drop = FALSE] yloads <- obj$yloadings[, comp, drop = FALSE]; - # get eigenvalues and multiply them to degrees of freedom + # get eigenvalues xeigenvals <- obj$xeigenvals[comp] - # get number and indices of variables and adjust dimension for regcoeffs + # get number and indices of variables nvar <- nrow(weights) var_ind <- seq_len(nvar) @@ -1808,12 +1808,12 @@ vipscores <- function(obj, ncomp = obj$ncomp.selected) { # prepare matrix for vipscores vipscores <- matrix(0, nrow = nvar, ncol = nrow(yloads)) - # normalize scores - wnorm <- weights %*% diag(1 / sqrt(colSums(weights^2)), nrow = ncomp, ncol = ncomp) + # normalize weights + wnorm <- sweep(weights, 2, sqrt(colSums(weights^2)), "/") # compute sum of squares for explained y variance and normalize it ssq <- yloads^2 %*% diag(xeigenvals, nrow = ncomp, ncol = ncomp) - ssq <- ssq %*% diag(1 / rowSums(ssq), nrow = ncomp, ncol = ncomp) + ssq <- sweep(ssq, 1, rowSums(ssq), "/") # compute VIP scores vipscores[var_ind, ] <- sqrt(nvar * wnorm^2 %*% t(ssq)) From 26b6d46c646f07375239c125a12735751971fc1f Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 16 Oct 2021 14:24:48 +0200 Subject: [PATCH 06/42] added info about last changes --- NEWS.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index f014227..954ba32 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ -v. Coming -======== +v. 0.12.1 +========= + +* fixed a bug in `vipscores()` which could lead to a bit higher values for PLS2 models. * added [PQN](https://doi.org/10.1021/ac051632c) normalization method to `prep.norm()` function. From 7202c40cd973bc5659bdb7859d3f41cd079f470b Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 16 Oct 2021 14:25:37 +0200 Subject: [PATCH 07/42] updated DESCRIPTION with number of the coming version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a6dd65a..cbcee84 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mdatools Title: Multivariate Data Analysis for Chemometrics -Version: 0.12.0 -Date: 2021-09-12 +Version: 0.12.1 +Date: 2021-10-16 Author: Sergey Kucheryavskiy () Maintainer: Sergey Kucheryavskiy Description: Projection based methods for preprocessing, From 44bddd5345fc3097ac1711927ca7c09a550147f8 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 16 Oct 2021 14:36:08 +0200 Subject: [PATCH 08/42] fixed several typos in help text --- R/prep.R | 2 +- man/prep.norm.Rd | 2 +- tests/testthat/test-prep.R | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/prep.R b/R/prep.R index 0e80282..f91a2dc 100755 --- a/R/prep.R +++ b/R/prep.R @@ -108,7 +108,7 @@ prep.snv <- function(data) { #' @param data #' a matrix with data values #' @param type -#' type of normalization \code{"area"}, \code{"length"}, \code{"sum"}, \code{"snv"}, \code{"is"}, or \code("pqn"). +#' type of normalization \code{"area"}, \code{"length"}, \code{"sum"}, \code{"snv"}, \code{"is"}, or \code{"pqn"}. #' @param col.ind #' indices of columns (can be either integer or logical valuws) for normalization to internal #' standard peak. diff --git a/man/prep.norm.Rd b/man/prep.norm.Rd index 32670e4..7990453 100644 --- a/man/prep.norm.Rd +++ b/man/prep.norm.Rd @@ -9,7 +9,7 @@ prep.norm(data, type = "area", col.ind = NULL, ref.spectrum = NULL) \arguments{ \item{data}{a matrix with data values} -\item{type}{type of normalization \code{"area"}, \code{"length"}, \code{"sum"}, \code{"snv"}, or \code{"is"}.} +\item{type}{type of normalization \code{"area"}, \code{"length"}, \code{"sum"}, \code{"snv"}, \code{"is"}, or \code{"pqn"}.} \item{col.ind}{indices of columns (can be either integer or logical valuws) for normalization to internal standard peak.} diff --git a/tests/testthat/test-prep.R b/tests/testthat/test-prep.R index e933445..2d1540b 100644 --- a/tests/testthat/test-prep.R +++ b/tests/testthat/test-prep.R @@ -174,11 +174,11 @@ test_that("PQN normalization works correctly", { pspectra2[i, ] <- s / median(q2) } - par(mfrow = c(2, 2)) - mdaplot(prep.norm(spectra, type = "sum"), type = "l") - mdaplot(pspectra1, type = "l") - mdaplot(pspectra2, type = "l") - mdaplot(prep.norm(spectra, type = "pqn"), type = "l") + # par(mfrow = c(2, 2)) + # mdaplot(prep.norm(spectra, type = "sum"), type = "l") + # mdaplot(pspectra1, type = "l") + # mdaplot(pspectra2, type = "l") + # mdaplot(prep.norm(spectra, type = "pqn"), type = "l") expect_equivalent(prep.norm(spectra, type = "pqn", ref.spectrum = ref.spectrum1), pspectra1) expect_equivalent(prep.norm(spectra, type = "pqn", ref.spectrum = ref.spectrum2), pspectra2) From efb8169cf0e21fd8757c6ebaa78581552052c525 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 8 Jul 2022 13:53:35 +0200 Subject: [PATCH 09/42] added better implementation of SIMPLISMA algorithm and renamed the old one --- R/pls.R | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/R/pls.R b/R/pls.R index 503edcf..916e787 100755 --- a/R/pls.R +++ b/R/pls.R @@ -1477,6 +1477,78 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { npred <- ncol(x) nresp <- ncol(y) + # initial estimation + S <- crossprod(x, y) + M <- crossprod(x) + C <- diag(1, npred, npred) + + # prepare space for results + B <- array(0, dim = c(npred, ncomp, nresp)) + R <- matrix(0, nrow = npred, ncol = ncomp) + P <- matrix(0, nrow = npred, ncol = ncomp) + Q <- matrix(0, nrow = nresp, ncol = ncomp) + + + # loop for each components + for (a in seq_len(ncomp)) { + + r <- svd(S, nu = 1, nv = 0)$u + t <- x %*% r + + tnorm <- sqrt(sum(t * t)) + t <- t / tnorm + r <- r / tnorm + + q <- crossprod(S, r) + p <- M %*% r + v <- C %*% p + v <- v / sqrt(sum(v * v)) + + R[, a] <- r + Q[, a] <- q + P[, a] <- p + + # coefficients are computed for each a from 1 to A + B[, a, ] <- tcrossprod(R[, seq_len(a), drop = FALSE], Q[, seq_len(a), drop = FALSE]) + + C <- C - tcrossprod(v) + M <- M - tcrossprod(p) + S <- C %*% S + } + + return(list(coeffs = B, weights = R, xloadings = P, yloadings = Q, ncomp = a)) +} + +#' SIMPLS algorithm (old implementation) +#' +#' @description +#' SIMPLS algorithm for calibration of PLS model (old version) +#' +#' @param x +#' a matrix with x values (predictors) +#' @param y +#' a matrix with y values (responses) +#' @param ncomp +#' number of components to calculate +#' @param cv +#' logical, is model calibrated during cross-validation or not +#' +#' @return +#' a list with computed regression coefficients, loadings and scores for x and y matrices, +#' and weights. +#' +#' @references +#' [1]. S. de Jong. SIMPLS: An Alternative approach to partial least squares regression. +#' Chemometrics and Intelligent Laboratory Systems, 18, 1993 (251-263). +#' +pls.simplsold <- function(x, y, ncomp, cv = FALSE) { + + x <- as.matrix(x) + y <- as.matrix(y) + + npred <- ncol(x) + nresp <- ncol(y) + # initial estimation A <- crossprod(x, y) M <- crossprod(x, x) From ba13624bf38c56c1269e209b1513812f4a9e0f1a Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 9 Jul 2022 20:53:39 +0200 Subject: [PATCH 10/42] new implementation of simpls alrorithm and PLS code refactoring --- R/pls.R | 288 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 196 insertions(+), 92 deletions(-) diff --git a/R/pls.R b/R/pls.R index 916e787..e819c71 100755 --- a/R/pls.R +++ b/R/pls.R @@ -505,6 +505,161 @@ setDistanceLimits.pls <- function(obj, lim.type = obj$lim.type, alpha = obj$alph return(obj) } +#' Compute predictions for response values +#' +#' @param x +#' matrix with predictors, already preprocessed (e.g. mean centered) and cleaned +#' @param coeffs +#' array with regression coefficients +#' @param ycenter +#' `ycenter` property of PLS model +#' @param yscale +#' `yscale` property of PLS model +#' @param ynames +#' vector with names of the responses +#' @param y.attrs +#' list with response attributes (e.g. from reference values if any) +#' @param objnames +#' vector with names of objects (rows of x) +#' @param compnames +#' vector with names used for components +#' +#' @return array with predicted y-values +pls.getpredictions <- function(x, coeffs, ycenter, yscale, ynames = NULL, y.attrs = NULL, objnames = NULL, compnames = NULL) { + + yp <- apply(coeffs, 3, function(x, y)(y %*% x), x) + dim(yp) <- c(nrow(x), dim(coeffs)[2], dim(coeffs)[3]) + + # unscale predicted y values + yp <- if (is.numeric(yscale)) sweep(yp, 3, yscale, "*") else yp + + # uncenter predicted y values + yp <- if (is.numeric(ycenter)) sweep(yp, 3, ycenter, "+") else yp + + # set up all attributes and names + yp <- mda.setattr(yp, y.attrs, "row") + attr(yp, "name") <- "Response values, predicted" + dimnames(yp) <- list(objnames, compnames, ynames) + + return (yp) +} + +#' Compute object with decomposition of y-values +#' +#' @param y +#' matrix with responses, already preprocessed (e.g. mean centered) and cleaned +#' @param yscores +#' matrix with Y-scores +#' @param xscores +#' matrix with X-scores +#' @param yloadings +#' matrix with Y-loadings +#' @param yeigenvals +#' matrix with eigenvalues for Y +#' @param ynames +#' vector with names of the responses +#' @param y.attrs +#' list with response attributes (e.g. from reference values if any) +#' @param x.attrs +#' list with preditors attributes +#' @param objnames +#' vector with names of objects (rows of x) +#' @param compnames +#' vector with names used for components +#' +#' @return array `ldecomp` object for y-values (or NULL if y is not provided) +pls.getydecomp <- function(y, yscores, xscores, yloadings, yeigenvals, ynames = NULL, y.attrs = NULL, x.attrs = NULL, objnames = NULL, compnames = NULL) { + + # if reference y-values are not provided, no ydecomp can be computed + if (is.null(y)) return (NULL) + + # compute resuduals + yresiduals <- y - tcrossprod(xscores, yloadings) + + # set names + rownames(yscores) <- rownames(yresiduals) <- objnames + colnames(yscores) <- compnames + colnames(yresiduals) <- ynames + + # set attributes + yscores <- mda.setattr(yscores, y.attrs, "row") + yresiduals <- mda.setattr(yresiduals, y.attrs) + + # set attributes + yscores <- mda.setattr(yscores, x.attrs, "row") + yresiduals <- mda.setattr(yresiduals, y.attrs) + + + # attr(yscores, "exclrows") <- attr(yresiduals, "exclrows") <- y.attrs$exclrows + attr(yscores, "name") <- "Y-scores" + attr(yscores, "xaxis.name") <- "Components" + attr(yresiduals, "name") <- "Residuals" + + # create ydecomp object (we use xscores as residuals for different components are computed + # as xscores %*% t(yloadings)), but then we assign correct residuals + ydecomp <- ldecomp(scores = xscores, loadings = yloadings, residuals = yresiduals, eigenvals = yeigenvals) + ydecomp$scores <- yscores + + return (ydecomp) +} + +#' Compute object with decomposition of x-values +#' +#' @param x +#' matrix with predictors, already preprocessed (e.g. mean centered) and cleaned +#' @param xscores +#' matrix with X-scores +#' @param xloadings +#' matrix with X-loadings +#' @param xeigenvals +#' matrix with eigenvalues for X +#' @param xnames +#' vector with names of the predictors +#' @param x.attrs +#' list with preditors attributes +#' @param objnames +#' vector with names of objects (rows of x) +#' @param compnames +#' vector with names used for components +#' +#' @return array `ldecomp` object for x-values +pls.getxdecomp <- function(x, xscores, xloadings, xeigenvals, xnames = NULL, x.attrs = NULL, objnames = NULL, compnames = NULL) { + + # compute x-residuals + xresiduals <- x - tcrossprod(xscores, xloadings) + + # set attributes + xscores <- mda.setattr(xscores, x.attrs, "row") + xresiduals <- mda.setattr(xresiduals, x.attrs) + attr(xscores, "name") <- "X-scores" + attr(xscores, "xaxis.name") <- "Components" + attr(xresiduals, "name") <- "Residuals" + + # set names + rownames(xscores) <- rownames(xresiduals) <- objnames + colnames(xscores) <- compnames + colnames(xresiduals) <- xnames + + # create and return xdecomp object + xdecomp <- ldecomp(scores = xscores, residuals = xresiduals, loadings = xloadings, eigenvals = xeigenvals) + return (xdecomp) +} + +#' Compute matrix with X-scores +#' +#' @param x +#' matrix with predictors, already preprocessed and cleaned +#' @param weights +#' matrix with PLS weights +#' @param xloadings +#' matrix with X-loadings +#' +#' @return matrix with X-scores +pls.getxscores <- function(x, weights, xloadings) { + xscores <- x %*% (weights %*% solve(crossprod(xloadings, weights))) +} + + #' PLS predictions #' #' @description @@ -533,11 +688,11 @@ predict.pls <- function(object, x, y = NULL, cv = FALSE, ...) { # get names prednames <- rownames(object$xloadings) respnames <- rownames(object$yloadings) + compnames <- colnames(object$xloadings) objnames <- rownames(x) # preprocess x and calculate scores, total and full variance x.attrs <- mda.getattr(x) - y.attrs <- mda.getattr(y) # set names for y-axis (rows if it is empty) if (is.null(x.attrs$yaxis.name)) { @@ -559,29 +714,30 @@ predict.pls <- function(object, x, y = NULL, cv = FALSE, ...) { # autoscale x x <- prep.autoscale(x, center = object$xcenter, scale = object$xscale) - # compute x scores and residuals - xscores <- x %*% (object$weights %*% solve(crossprod(object$xloadings, object$weights))) - xresiduals <- x - tcrossprod(xscores, object$xloadings) + # get predicted y-values + yp <- pls.getpredictions(x, object$coeffs$values, object$ycenter, object$yscale, respnames, x.attrs, objnames, compnames) - # set attributes - xscores <- mda.setattr(xscores, x.attrs, "row") - xresiduals <- mda.setattr(xresiduals, x.attrs) - attr(xscores, "name") <- "X-scores" - attr(xscores, "xaxis.name") <- "Components" - attr(xresiduals, "name") <- "Residuals" + # if predictions for cross-validation - return + if (cv) { + return(list(y.pred = yp)) + } - # set names - rownames(xscores) <- rownames(xresiduals) <- objnames - colnames(xscores) <- colnames(object$xloadings) - colnames(xresiduals) <- prednames + # compute xdecomp + xscores <- pls.getxscores(x, object$weights, object$xloadings) + xdecomp <- pls.getxdecomp(x, xscores, object$xloadings, object$xeigenvals, prednames, x.attrs, objnames, compnames) + xdecomp$ncomp.selected <- object$ncomp.selected - # make predictions - yp <- apply(object$coeffs$values, 3, function(x, y)(y %*% x), x) - dim(yp) <- c(nrow(x), ncomp, dim(object$coeffs$values)[3]) + # add u0 and Nu parameters as arguments, so the orthogonal distance values can be normalized + attr(xdecomp$Q, "u0") <- object$Qlim[3, ] + attr(xdecomp$Q, "Nu") <- object$Qlim[4, ] - # if reference values are provided calculate and set up names for ydecomp - y.ref <- NULL + # add u0 and Nu parameters as arguments, so the score distance values can be normalized + attr(xdecomp$T2, "u0") <- object$T2lim[3, ] + attr(xdecomp$T2, "Nu") <- object$T2lim[4, ] + + # compute ydecomp if y-values are provided ydecomp <- NULL + y.ref <- y if (!is.null(y)) { if (is.null(dim(y))) dim(y) <- c(length(y), 1) @@ -594,80 +750,24 @@ predict.pls <- function(object, x, y = NULL, cv = FALSE, ...) { stop("Wrong number of columns in matrix with response values (y).") } - # keep the original y values as reference - y.ref <- y + y.attrs <- mda.getattr(y) + y.attrs$exclrows <- x.attrs$exclrows # autoscale y-values y <- prep.autoscale(y, center = object$ycenter, scale = object$yscale) - - # compute and orthogonalize y-scores yscores <- as.matrix(y) %*% object$yloadings - for (a in seq_len(ncomp)) { - for (n in 1:2) { - for (j in seq_len(a - 1)) { - yscores[, a] <- yscores[, a] - - tcrossprod(xscores[, j], yscores[, a]) %*% xscores[, j] - } - } - } - # compute y-residuals - yresiduals <- y - yp[, ncomp, ] - - # set names - rownames(yscores) <- rownames(yresiduals) <- objnames - colnames(yscores) <- colnames(object$yloadings) - colnames(yresiduals) <- respnames - - # set attributes - yscores <- mda.setattr(yscores, x.attrs, "row") - yresiduals <- mda.setattr(yresiduals, y.attrs) - attr(yscores, "exclrows") <- attr(yresiduals, "exclrows") <- x.attrs$exclrows - attr(yscores, "name") <- "Y-scores" - attr(yscores, "xaxis.name") <- "Components" - attr(yresiduals, "name") <- "Residuals" - - # create ydecomp object (we use xscores as residuals for different components are computed - # as xscores %*% t(yloadings)), but then we assign correct residuals - ydecomp <- ldecomp(scores = xscores, loadings = object$yloadings, residuals = yresiduals, - eigenvals = object$yeigenvals, ncomp.selected = object$ncomp.selected) - ydecomp$scores <- yscores + # below we use xdecomp$scores instead of xscores to provide all names and attributes + ydecomp <- pls.getydecomp(y, yscores, xdecomp$scores, object$yloadings, object$yeigenvals, + respnames, y.attrs, x.attrs, objnames, compnames) + ydecomp$ncomp.selected <- object$ncomp.selected + + # add u0 and Nu parameters as arguments, so the z-distance values can be normalized attr(ydecomp$Q, "u0") <- object$Zlim[3, ] attr(ydecomp$Q, "Nu") <- object$Zlim[4, ] } - # unscale predicted y values - yp <- if (is.numeric(object$yscale)) sweep(yp, 3, object$yscale, "*") else yp - - # uncenter predicted y values - yp <- if (is.numeric(object$ycenter)) sweep(yp, 3, object$ycenter, "+") else yp - - # if predictions for cross-validation - return - if (cv) { - return(list(y.pred = yp)) - } - - # set up all attributes and names - yp <- mda.setattr(yp, x.attrs, "row") - attr(yp, "exclrows") <- x.attrs$exclrows - attr(yp, "name") <- "Response values, predicted" - dimnames(yp) <- c(list(rownames(x)), dimnames(object$coeffs$values)[2:3]) - - # create xdecomp object - xdecomp <- ldecomp(scores = xscores, residuals = xresiduals, loadings = object$xloadings, - eigenvals = object$xeigenvals, ncomp.selected = object$ncomp.selected) - - # add u0 and Nu parameters as arguments, so the residuals can be normalized - attr(xdecomp$Q, "u0") <- object$Qlim[3, ] - attr(xdecomp$Q, "Nu") <- object$Qlim[4, ] - - attr(xdecomp$T2, "u0") <- object$T2lim[3, ] - attr(xdecomp$T2, "Nu") <- object$T2lim[4, ] - - return( - plsres(yp, y.ref = y.ref, ncomp.selected = object$ncomp.selected, - xdecomp = xdecomp, ydecomp = ydecomp) - ) + return (plsres(yp, y.ref = y.ref, ncomp.selected = object$ncomp.selected, xdecomp = xdecomp, ydecomp = ydecomp)) } #' Categorize data rows based on PLS results and critical limits for total distance. @@ -1474,6 +1574,7 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { x <- as.matrix(x) y <- as.matrix(y) + nobj <- nrow(x) npred <- ncol(x) nresp <- ncol(y) @@ -1487,6 +1588,8 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { R <- matrix(0, nrow = npred, ncol = ncomp) P <- matrix(0, nrow = npred, ncol = ncomp) Q <- matrix(0, nrow = nresp, ncol = ncomp) + T <- matrix(0, nrow = nobj, ncol = ncomp) + U <- matrix(0, nrow = nobj, ncol = ncomp) # loop for each components @@ -1499,14 +1602,18 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { t <- t / tnorm r <- r / tnorm - q <- crossprod(S, r) p <- M %*% r v <- C %*% p v <- v / sqrt(sum(v * v)) + q <- crossprod(S, r) + u <- y %*% q + R[, a] <- r Q[, a] <- q P[, a] <- p + T[, a] <- t + U[, a] <- u # coefficients are computed for each a from 1 to A B[, a, ] <- tcrossprod(R[, seq_len(a), drop = FALSE], Q[, seq_len(a), drop = FALSE]) @@ -1516,7 +1623,7 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { S <- C %*% S } - return(list(coeffs = B, weights = R, xloadings = P, yloadings = Q, ncomp = a)) + return(list(coeffs = B, weights = R, xloadings = P, xscores = T, yloadings = Q, yscores = U, ncomp = a)) } #' SIMPLS algorithm (old implementation) @@ -1738,7 +1845,7 @@ pls.cal <- function(x, y, ncomp, center, scale, method = "simpls", cv = FALSE) { # correct maximum number of components ncomp <- min(xc.ncols, xc.nrows - 1 - nobj.cv, ncomp) - # fit model + # fit the model fit <- pls.run(x, y, method = method, ncomp = ncomp, cv = cv) model$ncomp <- ncomp <- fit$ncomp @@ -1750,14 +1857,11 @@ pls.cal <- function(x, y, ncomp, center, scale, method = "simpls", cv = FALSE) { return(model) } - # compute x-scores and residuals - xscores <- x %*% (fit$weights %*% solve(crossprod(fit$xloadings, fit$weights))) - yscores <- as.matrix(y) %*% fit$yloadings # compute eigenvalues - xeigenvals <- colSums(xscores^2) / (xc.nrows - 1) + xeigenvals <- colSums(fit$xscores^2) / (xc.nrows - 1) attr(xeigenvals, "DoF") <- (xc.nrows - 1) - yeigenvals <- colSums(yscores^2) / (xc.nrows - 1) + yeigenvals <- colSums(fit$yscores^2) / (xc.nrows - 1) attr(yeigenvals, "DoF") <- (xc.nrows - 1) # correct results related to predictors for missing columns in x From 203d7a954f9cda42571f1f829b7ad186b83da5dd Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 9 Jul 2022 21:56:19 +0200 Subject: [PATCH 11/42] improvements to ldecomp.getDistances method --- R/ldecomp.R | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/R/ldecomp.R b/R/ldecomp.R index 3492ebe..d92a949 100755 --- a/R/ldecomp.R +++ b/R/ldecomp.R @@ -431,35 +431,21 @@ ldecomp.getDistances <- function(scores, loadings, residuals, eigenvals) { residuals <- residuals[, -cols_excluded, drop = FALSE] } - # get rid of hidden scores and residuals (needed for some calculations) - scores_visible <- scores - residuals_visible <- residuals - if (length(rows_excluded) > 0) { - scores_visible <- scores_visible[-rows_excluded, , drop = FALSE] - residuals_visible <- residuals_visible[-rows_excluded, , drop = FALSE] + # helper function to compute orthogonal distances for given number of components in a model + getResiduals <- function(scores, loadings, residuals, a) { + ncomp <- ncol(scores) + if (a == ncomp) return(residuals) + + residuals + tcrossprod( + scores[, (a + 1):ncomp, drop = FALSE], + loadings[, (a + 1):ncomp, drop = FALSE] + ) } - # normalize the scores - scoresn <- scale(scores, center = FALSE, scale = sqrt(eigenvals)) - - # prepare zero matrices for the and model power - T2 <- matrix(0, nrow = nobj, ncol = ncomp) - Q <- matrix(0, nrow = nobj, ncol = ncomp) - - # calculate distances and model power for each possible number of components in model - for (i in seq_len(ncomp)) { - res <- residuals - if (i < ncomp) { - res <- res + - tcrossprod( - scores[, (i + 1):ncomp, drop = F], - loadings[, (i + 1):ncomp, drop = F] - ) - } - - Q[, i] <- rowSums(res^2) - T2[, i] <- rowSums(scoresn[, seq_len(i), drop = F]^2) - } + # compute matrices with orthogonal and score distances + Q <- sapply(seq_len(ncomp), function(a) rowSums(getResiduals(scores, loadings, residuals, a)^2)) + T2 <- t(apply(scale(scores^2, center = FALSE, scale = eigenvals), 1, cumsum)) + dim(T2) <- c(nobj, ncomp) # set attributes for Q Q <- mda.setattr(Q, mda.getattr(scores), type = "row") From f589453155137ccc83c2a8c7f32ee862b1aa3cb5 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sat, 9 Jul 2022 21:56:36 +0200 Subject: [PATCH 12/42] added tests for SIMPLS algorithm --- tests/testthat/test-simpls.R | 87 ++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 tests/testthat/test-simpls.R diff --git a/tests/testthat/test-simpls.R b/tests/testthat/test-simpls.R new file mode 100644 index 0000000..e19ec95 --- /dev/null +++ b/tests/testthat/test-simpls.R @@ -0,0 +1,87 @@ +###################################### +# Tests for SIMPLS algorithms # +###################################### + +# setup({ +# pdf(file = tempfile("mdatools-test-simpls-", fileext = ".pdf")) +# sink(tempfile("mdatools-test-simpls-", fileext = ".txt"), append = FALSE, split = FALSE) +# }) + +# teardown({ +# dev.off() +# sink() +# }) + +################################################# +# Block 1. Tests the old algorithm # +################################################# + +context("simpls: PLS2 example from the paper") + +# convert results produced by SIMPLS algorithm to form +# suitable for testing +simpls2res <- function (m, X, Y, A) { + R <- m$weights + P <- m$xloadings + Q <- m$yloadings + + T <- X %*% R + Xexp <- rep(0, A) + Yexp <- rep(0, A) + + for (a in 1:A) { + B <- tcrossprod(R[, a, drop = FALSE], Q[, a, drop = FALSE]) + Xhat <- tcrossprod(T[, a, drop = FALSE], P[, a, drop = FALSE]) + Yhat <- X %*% B + Xexp[a] <- sum(Xhat^2)/sum(X^2) * 100 + Yexp[a] <- sum(Yhat^2)/sum(Y^2) * 100 + } + + rnorm = sqrt(colSums(R^2)) + R = R %*% diag(1 / rnorm) + + return(list(R = R, T = T, P = P, Q = Q, Xexp = Xexp, Yexp = Yexp)) +} + +# deterministic small data +A <- 3 +X <- matrix(c(-4, -4, 4, 4, 2, -2, 2, -2, 1, -1, -1, 1), nrow = 4) +Y <- matrix(c(430, -436, -361, 367, -94, 12, -22, 104), nrow = 4) + +# expected values +expected = list( + R = matrix(c(0.0164, 0.1798, 0.9836, 0.4562, -0.7719, 0.4428, 0.3392, 0.7141, -0.6124), ncol = A), + T = matrix(c(0.6088, -0.6712, -0.2662, 0.3286, -0.6018, -0.1489, -0.0333, 0.7840, -0.1311, -0.5266, 0.8234, -0.1657), ncol = A), + Xexp = c(6.72, 50.77, 42.51), + Yexp = c(90.15, 4.54, 5.31) +) + +test_that("new algorithm works correctly", { + + res <- simpls2res(pls.simpls(X, Y, A), X, Y, A) + + expect_equivalent(abs(res$R), abs(expected$R), tolerance = 10^-4) + expect_equivalent(abs(res$T), abs(expected$T), tolerance = 10^-4) + expect_equivalent(res$Xexp, expected$Xexp, tolerance = 10^-4) + expect_equivalent(res$Yexp, expected$Yexp, tolerance = 10^-4) +}) + +test_that("old algorithm works correctly", { + + res <- simpls2res(pls.simplsold(X, Y, A), X, Y, A) + + expect_equivalent(abs(res$R), abs(expected$R), tolerance = 10^-4) + expect_equivalent(abs(res$T), abs(expected$T), tolerance = 10^-4) + expect_equivalent(res$Xexp, expected$Xexp, tolerance = 10^-4) + expect_equivalent(res$Yexp, expected$Yexp, tolerance = 10^-4) +}) + + +test_that("new algorithm is more numerically stable", { + Xr <- matrix(rnorm(30000 * 1000), 30000, 1000) + Yr <- matrix(rnorm(30000 * 2), 30000, 2) + + expect_warning(pls.simplsold(Xr, Yr / 100000000, 50)) + expect_silent(pls.simpls(Xr, Yr / 100000000, 50)) +}) + From 5c642cf9544f39c1e00abd80b6c39124ffdebf7b Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 11:15:41 +0200 Subject: [PATCH 13/42] added orthogonalization of yscores plus improvements to SIMPLS --- R/pls.R | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/R/pls.R b/R/pls.R index e819c71..8dcb45d 100755 --- a/R/pls.R +++ b/R/pls.R @@ -659,7 +659,19 @@ pls.getxscores <- function(x, weights, xloadings) { xscores <- x %*% (weights %*% solve(crossprod(xloadings, weights))) } +pls.getyscores <- function(y, yloadings, xscores) { + ncomp <- ncol(yloadings) + yscores <- as.matrix(y) %*% yloadings + if (ncomp < 2) return(yscores) + + # orthogonalize + for (a in 2:ncomp) { + yscores[, a] <- yscores[, a] - xscores[, 1:(a-1), drop = FALSE] %*% crossprod(xscores[, 1:(a-1), drop = FALSE], yscores[, a]) + } + + return (yscores) +} #' PLS predictions #' #' @description @@ -755,7 +767,7 @@ predict.pls <- function(object, x, y = NULL, cv = FALSE, ...) { # autoscale y-values y <- prep.autoscale(y, center = object$ycenter, scale = object$yscale) - yscores <- as.matrix(y) %*% object$yloadings + yscores <- pls.getyscores(as.matrix(y), object$yloadings, xscores) # below we use xdecomp$scores instead of xscores to provide all names and attributes ydecomp <- pls.getydecomp(y, yscores, xdecomp$scores, object$yloadings, object$yeigenvals, @@ -1581,15 +1593,12 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { # initial estimation S <- crossprod(x, y) M <- crossprod(x) - C <- diag(1, npred, npred) # prepare space for results B <- array(0, dim = c(npred, ncomp, nresp)) - R <- matrix(0, nrow = npred, ncol = ncomp) - P <- matrix(0, nrow = npred, ncol = ncomp) Q <- matrix(0, nrow = nresp, ncol = ncomp) - T <- matrix(0, nrow = nobj, ncol = ncomp) - U <- matrix(0, nrow = nobj, ncol = ncomp) + R <- V <- P <- matrix(0, nrow = npred, ncol = ncomp) + T <- U <- matrix(0, nrow = nobj, ncol = ncomp) # loop for each components @@ -1602,25 +1611,30 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { t <- t / tnorm r <- r / tnorm - p <- M %*% r - v <- C %*% p - v <- v / sqrt(sum(v * v)) - - q <- crossprod(S, r) + p <- crossprod(x, t) + q <- crossprod(y, t) u <- y %*% q + v <- p + + if (a > 1) { + v <- v - V %*% crossprod(V, p) + u <- u - T %*% crossprod(T, u) + } + + v <- v / sqrt(sum(v * v)) R[, a] <- r - Q[, a] <- q + V[, a] <- v P[, a] <- p T[, a] <- t U[, a] <- u + Q[, a] <- q # coefficients are computed for each a from 1 to A B[, a, ] <- tcrossprod(R[, seq_len(a), drop = FALSE], Q[, seq_len(a), drop = FALSE]) - C <- C - tcrossprod(v) M <- M - tcrossprod(p) - S <- C %*% S + S <- S - v %*% crossprod(v, S) } return(list(coeffs = B, weights = R, xloadings = P, xscores = T, yloadings = Q, yscores = U, ncomp = a)) From 6cd6cc78253f0825095b6350dd77b346f2b9d141 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 12:07:48 +0200 Subject: [PATCH 14/42] added new tests for PLS and SIMPLS --- inst/testdata/plstlbx-people-weights.csv | 11 +++++ inst/testdata/plstlbx-people-xhdist.csv | 32 +++++++++++++ inst/testdata/plstlbx-people-xloadings.csv | 11 +++++ inst/testdata/plstlbx-people-xqdist.csv | 32 +++++++++++++ inst/testdata/plstlbx-people-xscores.csv | 32 +++++++++++++ inst/testdata/plstlbx-people-ypred.csv | 32 +++++++++++++ inst/testdata/plstlbx-people-yqdist.csv | 32 +++++++++++++ inst/testdata/plstlbx-people-yscores.csv | 32 +++++++++++++ tests/testthat/test-pls.R | 54 +++++++++++++++++++++- tests/testthat/test-simpls.R | 29 +++++++++++- 10 files changed, 294 insertions(+), 3 deletions(-) create mode 100644 inst/testdata/plstlbx-people-weights.csv create mode 100644 inst/testdata/plstlbx-people-xhdist.csv create mode 100644 inst/testdata/plstlbx-people-xloadings.csv create mode 100644 inst/testdata/plstlbx-people-xqdist.csv create mode 100644 inst/testdata/plstlbx-people-xscores.csv create mode 100644 inst/testdata/plstlbx-people-ypred.csv create mode 100644 inst/testdata/plstlbx-people-yqdist.csv create mode 100644 inst/testdata/plstlbx-people-yscores.csv diff --git a/inst/testdata/plstlbx-people-weights.csv b/inst/testdata/plstlbx-people-weights.csv new file mode 100644 index 0000000..626e85e --- /dev/null +++ b/inst/testdata/plstlbx-people-weights.csv @@ -0,0 +1,11 @@ +0.4319 0.1854 0.1149 -0.0445 +0.4356 0.1764 0.2654 0.4317 +-0.3699 0.0237 0.8413 0.8718 +0.1452 -0.0604 0.0389 0.1838 +0.1593 -0.3207 -0.1333 -0.1604 +0.3133 -0.2264 0.3859 0.5192 +-0.0399 0.6602 0.4212 0.4030 +-0.4139 -0.2195 0.0848 -0.0953 +0.4193 0.1815 0.1986 0.0160 +-0.0696 0.5662 -0.1668 -0.1324 +-0.0540 -0.0880 -0.5830 0.5314 diff --git a/inst/testdata/plstlbx-people-xhdist.csv b/inst/testdata/plstlbx-people-xhdist.csv new file mode 100644 index 0000000..5dbaf8d --- /dev/null +++ b/inst/testdata/plstlbx-people-xhdist.csv @@ -0,0 +1,32 @@ +7.4875 +3.2397 +2.5583 +10.8514 +3.2828 +4.0721 +7.1071 +3.9232 +3.8897 +3.3473 +3.8491 +4.6580 +3.1512 +2.5963 +2.0801 +2.2521 +4.2200 +4.5492 +3.5500 +5.5432 +3.1816 +3.5329 +2.3602 +3.5737 +4.0456 +3.4025 +5.0792 +3.6392 +1.8355 +1.5958 +2.7383 +2.807 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-xloadings.csv b/inst/testdata/plstlbx-people-xloadings.csv new file mode 100644 index 0000000..6972cd3 --- /dev/null +++ b/inst/testdata/plstlbx-people-xloadings.csv @@ -0,0 +1,11 @@ +0.4106 0.1141 0.1036 0.0457 +0.4153 0.0766 0.0066 0.1391 +-0.3726 -0.0907 0.3806 0.3537 +0.1521 -0.0844 -0.0460 0.0869 +0.1963 -0.3142 0.0105 0.0939 +0.3394 -0.3346 0.1565 0.1682 +-0.1160 0.5861 0.0871 0.1951 +-0.3886 -0.1876 0.1756 0.0410 +0.3983 0.0960 0.1564 0.0921 +-0.1349 0.6053 -0.2032 -0.0492 +-0.0439 0.0270 -0.8447 0.8707 diff --git a/inst/testdata/plstlbx-people-xqdist.csv b/inst/testdata/plstlbx-people-xqdist.csv new file mode 100644 index 0000000..971cce6 --- /dev/null +++ b/inst/testdata/plstlbx-people-xqdist.csv @@ -0,0 +1,32 @@ +1.9041 +0.8735 +0.4510 +1.1024 +2.7907 +2.2340 +1.2173 +0.9854 +1.2716 +0.7961 +0.4860 +0.3754 +1.6481 +1.3018 +0.3733 +1.2522 +2.3064 +2.6794 +4.7591 +1.7569 +0.6085 +1.4004 +0.4096 +1.4456 +2.6432 +2.4861 +5.3929 +9.5875 +1.2365 +3.1687 +0.8946 +2.6442 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-xscores.csv b/inst/testdata/plstlbx-people-xscores.csv new file mode 100644 index 0000000..59763c8 --- /dev/null +++ b/inst/testdata/plstlbx-people-xscores.csv @@ -0,0 +1,32 @@ +4.8335 -0.4321 1.5689 0.2021 +2.8513 -0.6212 -0.6223 0.7530 +2.7141 -0.6940 -0.6950 0.4506 +-1.0533 -2.0401 -1.3808 -1.8132 +-1.0359 -1.0859 1.2470 0.5618 +-0.6989 -1.0948 1.6394 0.2147 +2.3781 -1.4688 -1.4418 1.1361 +2.2660 -1.4589 -1.0201 0.6079 +-1.5931 -1.2889 1.1821 -0.6916 +-1.5246 -1.4122 1.1142 -0.4895 +2.8442 -1.3881 -0.0623 -0.8758 +-2.5820 -2.2613 -0.7718 0.4286 +-1.5279 -1.3792 1.2190 -0.0454 +-1.7034 -1.6552 0.7695 0.1952 +2.7436 -1.1517 0.0271 -0.2252 +2.7353 -1.3513 -0.2008 -0.0098 +2.1300 2.4733 0.6869 0.0039 +2.4403 2.4744 0.3435 0.5122 +-1.8386 0.0980 -0.7841 1.0571 +-2.8772 2.1646 0.5288 0.8908 +-3.3698 0.0943 -0.7736 -0.4325 +0.6607 1.7764 -0.9579 -0.6680 +1.1611 1.9766 -0.3325 -0.3089 +1.4512 1.4728 -0.2671 -1.0428 +-2.9197 0.9634 0.5934 -0.9167 +-3.1564 0.8307 -0.8396 0.4499 +1.2063 1.3302 0.0391 -1.4371 +0.3213 1.2869 1.2137 0.7285 +-2.3826 0.1716 -0.7929 0.0487 +-2.5680 0.7585 -0.1434 0.2315 +0.8214 2.2157 -0.5020 -0.1606 +-2.7271 0.6961 -0.5844 0.6443 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-ypred.csv b/inst/testdata/plstlbx-people-ypred.csv new file mode 100644 index 0000000..826d4ce --- /dev/null +++ b/inst/testdata/plstlbx-people-ypred.csv @@ -0,0 +1,32 @@ +48.0862 +44.2076 +43.8202 +36.0841 +38.3637 +38.9222 +42.8676 +42.6631 +36.9118 +37.0067 +43.4588 +34.5260 +37.2101 +36.7130 +43.6720 +43.5525 +44.7849 +45.3343 +37.0879 +36.8568 +34.1175 +41.2542 +42.5075 +42.4945 +35.5957 +35.1031 +42.0069 +41.7480 +35.8982 +36.1891 +42.0698 +35.8860 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-yqdist.csv b/inst/testdata/plstlbx-people-yqdist.csv new file mode 100644 index 0000000..1185db6 --- /dev/null +++ b/inst/testdata/plstlbx-people-yqdist.csv @@ -0,0 +1,32 @@ +0.0005 +0.0028 +0.0021 +0.0005 +0.0087 +0.0004 +0.0496 +0.0075 +0.0548 +0.0000 +0.1401 +0.1431 +0.0411 +0.0054 +0.0071 +0.0132 +0.0030 +0.0292 +0.0005 +0.0483 +0.0009 +0.0043 +0.0160 +0.0168 +0.0108 +0.0530 +0.0000 +0.0042 +0.0007 +0.0931 +0.0003 +0.0517 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-yscores.csv b/inst/testdata/plstlbx-people-yscores.csv new file mode 100644 index 0000000..7fb603e --- /dev/null +++ b/inst/testdata/plstlbx-people-yscores.csv @@ -0,0 +1,32 @@ +11.1420 0.0915 0.0668 -0.0015 +5.6355 -0.1266 -0.0208 0.0050 +5.6355 -0.0683 0.0099 0.0308 +-5.3774 -0.5884 -0.1461 -0.0654 +-2.6242 -0.0651 0.0344 -0.0154 +-1.2476 0.0568 0.0901 0.0140 +2.8823 -0.4560 -0.1198 -0.0433 +4.2589 -0.1430 0.0211 0.0502 +-5.3774 -0.3587 -0.0865 -0.1055 +-4.0008 -0.1226 0.0276 -0.0162 +2.8823 -0.6542 -0.2142 -0.1614 +-5.3774 0.0619 0.1609 0.1486 +-2.6242 0.1441 0.1462 0.0709 +-4.0008 -0.0465 0.0763 0.0325 +5.6355 -0.0808 0.0311 0.0229 +5.6355 -0.0773 0.0445 0.0407 +7.0122 0.4454 0.0560 0.0198 +8.3888 0.5787 0.1163 0.0772 +-4.0008 0.0109 -0.0008 0.0256 +-5.3774 0.1874 -0.0425 -0.0501 +-8.1307 -0.1337 -0.0660 -0.0245 +1.5057 0.0092 -0.1003 -0.0445 +4.2589 0.3269 0.0316 0.0353 +4.2589 0.2036 0.0055 0.0131 +-5.3774 0.2055 0.0363 0.0079 +-5.3774 0.3061 0.0896 0.0964 +2.8823 0.0424 -0.0590 -0.0463 +2.8823 0.4188 0.1137 0.0462 +-5.3774 -0.0230 -0.0205 0.0109 +-6.7540 -0.2094 -0.1393 -0.1015 +2.8823 0.2061 -0.0371 -0.0115 +-6.7540 -0.1417 -0.1050 -0.0606 diff --git a/tests/testthat/test-pls.R b/tests/testthat/test-pls.R index 3a5ba34..6bb7f1b 100644 --- a/tests/testthat/test-pls.R +++ b/tests/testthat/test-pls.R @@ -590,4 +590,56 @@ test_that("XY-residual limits are computed correctly", { expect_equal(sum(c2 == "extreme"), 2) expect_equal(sum(c2 == "outlier"), 2) -}) \ No newline at end of file +}) + +test_that("PLS gives results comparable to other software", { + + # read model parameters and calibration scores made in PLS_Toolbox + xscores <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xscores.csv", sep = " ", header = FALSE)) + yscores <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-yscores.csv", sep = " ", header = FALSE)) + weights <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-weights.csv", sep = " ", header = FALSE)) + xloadings <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xloadings.csv", sep = " ", header = FALSE)) + yloadings <- c(5.3643, 1.0338, 0.4675, 0.3567) + coeffs <- c(0.2078, 0.2647, 0.0073, 0.0722, -0.0016, 0.1829, 0.1420, -0.1984, 0.2153, 0.0151, -0.0405) + + # make a model + data(people) + X <- people[, -4] + y <- people[, 4, drop = FALSE] + m <- pls(X, y, 4, scale = TRUE, cv = list("ven", 10)) + + # compare main model parameters + # here we re-normalize results from PLS_Toolbox + xnorm <- sqrt(colSums(xscores^2)) + expect_equivalent(m$xloadings, xloadings %*% diag(xnorm), tolerance = 10^-3) + expect_equivalent(m$res$cal$xdecomp$scores, xscores %*% diag(1/xnorm), tolerance = 10^-3) + expect_equivalent(m$weights, weights %*% diag(1/xnorm), tolerance = 10^-3) + + expect_equivalent(m$yloadings, yloadings, tolerance = 10^-4) + expect_equivalent(m$coeffs$values[, 4, 1], coeffs, tolerance = 10^-4) + + # check selectivity ratio + # here we change the result from PLS toolbox a bit for large values as they add + # given portion of x-variance when compute SR + sr <- c(24.8, 21.7, 2.2359, 0.1179, 0.1552, 0.9740, 0.0076, 5.9018, 10.0, 0.0256, 0.0138) + expect_equivalent(selratio(m), sr, tolerance = 10^-1) + + # compare calibration results + ypred <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-ypred.csv", sep = " ", header = FALSE)) + xqdist <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xqdist.csv", sep = " ", header = FALSE)) + xhdist <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xhdist.csv", sep = " ", header = FALSE)) + yqdist <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-yqdist.csv", sep = " ", header = FALSE)) + rmsec <- c(1.0273, 0.7404, 0.6668, 0.6198) + r2c <- c(0.9283, 0.9627, 0.9698, 0.9739) + + expect_equivalent(m$res$cal$xdecomp$T2[, 4], xhdist, tolerance = 10^-3) + expect_equivalent(m$res$cal$xdecomp$Q[, 4], xqdist, tolerance = 10^-3) + + expect_equivalent(m$res$cal$ydecomp$Q[, 4], yqdist, tolerance = 10^-3) + expect_equivalent(m$res$cal$ydecomp$scores, yscores, tolerance = 10^-4) + expect_equivalent(m$res$cal$y.pred[, 4, 1], ypred, tolerance = 10^-4) + + expect_equivalent(m$res$cal$rmse, rmsec, tolerance = 10^-3) + expect_equivalent(m$res$cal$r2, r2c, tolerance = 10^-3) + +}) diff --git a/tests/testthat/test-simpls.R b/tests/testthat/test-simpls.R index e19ec95..b38db8a 100644 --- a/tests/testthat/test-simpls.R +++ b/tests/testthat/test-simpls.R @@ -57,7 +57,6 @@ expected = list( ) test_that("new algorithm works correctly", { - res <- simpls2res(pls.simpls(X, Y, A), X, Y, A) expect_equivalent(abs(res$R), abs(expected$R), tolerance = 10^-4) @@ -67,7 +66,6 @@ test_that("new algorithm works correctly", { }) test_that("old algorithm works correctly", { - res <- simpls2res(pls.simplsold(X, Y, A), X, Y, A) expect_equivalent(abs(res$R), abs(expected$R), tolerance = 10^-4) @@ -85,3 +83,30 @@ test_that("new algorithm is more numerically stable", { expect_silent(pls.simpls(Xr, Yr / 100000000, 50)) }) + +test_that("new algorithm gives results comparable to other software", { + + # read model parameters made in PLS_Toolbox + weights <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-weights.csv", sep = " ", header = FALSE)) + xloadings <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xloadings.csv", sep = " ", header = FALSE)) + xscores <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xscores.csv", sep = " ", header = FALSE)) + yscores <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-yscores.csv", sep = " ", header = FALSE)) + yloadings <- c(5.3643, 1.0338, 0.4675, 0.3567) + coeffs <- c(0.2078, 0.2647, 0.0073, 0.0722, -0.0016, 0.1829, 0.1420, -0.1984, 0.2153, 0.0151, -0.0405) + + # make a model + data(people) + X <- scale(people[, -4], center = TRUE, scale = TRUE) + y <- scale(people[, 4, drop = FALSE], center = TRUE, scale = TRUE) + m <- pls.simpls(X, y, 4) + + # here we re-normalize results from PLS_Toolbox + xnorm <- sqrt(colSums(xscores^2)) + expect_equivalent(m$xloadings, xloadings %*% diag(xnorm), tolerance = 10^-3) + expect_equivalent(m$xscores, xscores %*% diag(1/xnorm), tolerance = 10^-3) + expect_equivalent(m$weights, weights %*% diag(1/xnorm), tolerance = 10^-3) + + expect_equivalent(m$yscores, yscores, tolerance = 10^-4) + expect_equivalent(m$yloadings, yloadings, tolerance = 10^-4) + expect_equivalent(m$coeffs[, 4, 1], coeffs, tolerance = 10^-4) +}) From 9556506d03568393d06cb7877575157c7f479b2a Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 12:08:18 +0200 Subject: [PATCH 15/42] small improvements to selratio() method --- R/pls.R | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/R/pls.R b/R/pls.R index 8dcb45d..ed8e856 100755 --- a/R/pls.R +++ b/R/pls.R @@ -2098,17 +2098,18 @@ selratio <- function(obj, ncomp = obj$ncomp.selected) { # prepare matrix for vipscores selratio <- matrix(0, nrow = nvar, ncol = nresp) - # get norm value for regression coefficients - bnorm <- sqrt(colSums(coeffs^2)) - - # compute target projections - ttp <- x %*% (coeffs %*% diag(1 / bnorm, nrow = nresp, ncol = nresp)) - ptp <- t(crossprod(x, ttp) %*% diag(1 / colSums(ttp^2), nrow = nresp, ncol = nresp)) - # compute selectivity ratio for (y in seq_len(nresp)) { - expvar <- ttp[, y, drop = FALSE] %*% ptp[y, , drop = FALSE] - selratio[var_ind, y] <- colSums(expvar^2) / colSums((x - expvar)^2) + b <- coeffs[, y, drop = FALSE] / sqrt(sum(coeffs[, y]^2)) + t <- x %*% b + p <- crossprod(t, x) / sum(t * t) + + exp <- t %*% p + res <- x - exp + expvar <- colSums(exp^2) + resvar <- colSums(res^2) + resvar[resvar < .Machine$double.eps] <- 1 + selratio[var_ind, y] <- expvar / resvar } rownames(selratio) <- rownames(obj$xloadings) From d8456f4ffdc9858f27537554d976a7bb165030d3 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 14:24:30 +0200 Subject: [PATCH 16/42] implementation of new cross-validation approach and adding corresponding tests --- R/crossval.R | 24 +++++++-------- R/pls.R | 2 +- R/regmodel.R | 6 ++-- R/simca.R | 40 +++++++++++-------------- tests/testthat/test-crossval.R | 55 ++++++++++++++++++---------------- tests/testthat/test-pls.R | 13 ++++++-- 6 files changed, 73 insertions(+), 67 deletions(-) diff --git a/R/crossval.R b/R/crossval.R index 976828d..d238cec 100755 --- a/R/crossval.R +++ b/R/crossval.R @@ -25,8 +25,8 @@ crossval.getParams <- function(cv, nobj) { if (type == "loo") { return( list( - type = "rand", - nrep = nrep, + type = "loo", + nrep = 1, nseg = nobj ) ) @@ -80,6 +80,9 @@ crossval <- function(cv = 1, nobj = NULL, resp = NULL) { # get cross-validation parameters if (is.null(nobj)) nobj <- length(resp) + # if user already provided matrix with values - return it + if (is.numeric(cv) && length(cv) == nobj) return (as.matrix(cv)) + p <- crossval.getParams(cv = cv, nobj = nobj) if (!(p$type %in% c("rand", "ven", "loo"))) { stop("Wrong name for cross-validation method.") @@ -95,22 +98,17 @@ crossval <- function(cv = 1, nobj = NULL, resp = NULL) { stop("Wrong value for number of segments (should be between 2 and number of objects).") } - seglen <- ceiling(nobj / p$nseg) - fulllen <- seglen * p$nseg - ind <- array(0, dim = c(p$nseg, seglen, p$nrep)) + if (p$type == "loo") { + return (matrix(seq_len(nobj), ncol = 1)) + } if (p$type == "rand") { - for (i in seq_len(p$nrep)) { - v <- c(sample(nobj), rep(NA, fulllen - nobj)) - ind[, , i] <- matrix(v, nrow = p$nseg, byrow = TRUE) - } - return(ind) + return (sapply(seq_len(p$nrep), function(i) rep(seq_len(p$nseg), length.out = nobj)[sample(nobj)])) } if (p$type == "ven") { - v <- c(order(resp), rep(NA, fulllen - nobj)) - ind[, , 1] <- matrix(v, nrow = p$nseg, byrow = FALSE) - return(ind) + ind <- if (is.null(resp)) seq_len(nobj) else order(resp) + return (matrix(rep(seq_len(p$nseg), length.out = nobj)[ind], ncol = 1)) } stop("Something went wrong.") diff --git a/R/pls.R b/R/pls.R index ed8e856..040f3ce 100755 --- a/R/pls.R +++ b/R/pls.R @@ -1846,7 +1846,7 @@ pls.cal <- function(x, y, ncomp, center, scale, method = "simpls", cv = FALSE) { # find maximum number of objects in a segment nobj.cv <- 0 if (!is.logical(cv) && !is.null(cv)) { - nseg <- if (is.numeric(cv)) cv else cv[[2]] + nseg <- max(crossval(cv, xc.nrows)) nobj.cv <- if (nseg == 1) 1 else ceiling(xc.nrows / nseg) # we set cv to FALSE so fitting knows that it is not a part of cross-validation diff --git a/R/regmodel.R b/R/regmodel.R index cc33657..a23c417 100755 --- a/R/regmodel.R +++ b/R/regmodel.R @@ -56,8 +56,8 @@ crossval.regmodel <- function(obj, x, y, cv, cal.fun) { # get matrix with indices for cv segments cv_ind <- crossval(cv, nobj = nobj, resp = y[, 1]) - nseg <- nrow(cv_ind); - nrep <- dim(cv_ind)[3] + nseg <- max(cv_ind) + nrep <- ncol(cv_ind) # prepare arrays for results yp.cv <- array(0, dim = c(nobj, ncomp, nresp)) @@ -66,7 +66,7 @@ crossval.regmodel <- function(obj, x, y, cv, cal.fun) { # loop over segments and repetitions for (ir in seq_len(nrep)) { for (is in seq_len(nseg)) { - ind <- na.exclude(cv_ind[is, , ir]) + ind <- which(cv_ind[, ir] == is) if (length(ind) == 0) next xc <- x[-ind, , drop = FALSE] diff --git a/R/simca.R b/R/simca.R index 44812d5..1bbf798 100755 --- a/R/simca.R +++ b/R/simca.R @@ -310,33 +310,29 @@ crossval.simca <- function(obj, x, cv) { # get matrix with indices for cv segments nobj <- nrow(x) idx <- crossval(cv, nobj) - nseg <- nrow(idx); - nrep <- dim(idx)[3] + nseg <- max(idx) + nrep <- ncol(idx) p.pred <- array(0, dim = c(nobj, ncomp, 1)) - # loop over segments for (iRep in seq_len(nrep)) { for (iSeg in seq_len(nseg)) { - ind <- na.exclude(idx[iSeg, , iRep]) - - if (length(ind) > 0) { - x.cal <- x[-ind, , drop = F] - x.val <- x[ind, , drop = F] - - # calibrate PCA model and set distance limits - m.loc <- pca(x.cal, obj$ncomp, center = obj$center, scale = obj$scale, - method = obj$method, rand = obj$rand, lim.type = obj$lim.type, alpha = obj$alpha, - gamma = obj$gamma) - - # make prediction for validation subset - res.loc <- predict.pca(m.loc, x.val) - - # compute and save probabilities - for (i in seq_len(obj$ncomp)) { - p.pred[ind, i, 1] <- p.pred[ind, i, 1] + - getProbabilities.simca(m.loc, i, res.loc$Q[, i], res.loc$T2[, i]) - } + ind <- which(idx[, iRep] == iSeg) + x.cal <- x[-ind, , drop = FALSE] + x.val <- x[ind, , drop = FALSE] + + # calibrate PCA model and set distance limits + m.loc <- pca(x.cal, obj$ncomp, center = obj$center, scale = obj$scale, + method = obj$method, rand = obj$rand, lim.type = obj$lim.type, alpha = obj$alpha, + gamma = obj$gamma) + + # make prediction for validation subset + res.loc <- predict.pca(m.loc, x.val) + + # compute and save probabilities + for (i in seq_len(obj$ncomp)) { + p.pred[ind, i, 1] <- p.pred[ind, i, 1] + + getProbabilities.simca(m.loc, i, res.loc$Q[, i], res.loc$T2[, i]) } } } diff --git a/tests/testthat/test-crossval.R b/tests/testthat/test-crossval.R index 343cabd..e797a4f 100644 --- a/tests/testthat/test-crossval.R +++ b/tests/testthat/test-crossval.R @@ -8,25 +8,36 @@ nobj <- 24 context(sprintf("crossval: cross-validation")) test_that("leave one out works correctly", { + cv.ind <- matrix(seq_len(nobj), ncol = 1) + expect_equivalent(cv.ind, crossval(cv = "loo", nobj)) +}) + + +test_that("random cross-validation with one repetition works correctly", { + + # random full cross-validation + nseg = nobj set.seed(42) - cv.ind <- array(sample(nobj), dim = c(nobj, 1, 1)) + cv.ind <- matrix(rep(seq_len(nseg), length.out = nobj)[sample(nobj)], ncol = 1) set.seed(42) expect_equivalent(cv.ind, crossval(cv = 1, nobj)) -}) + set.seed(42) + expect_equivalent(cv.ind, crossval(cv = list("rand", 24), nobj)) -test_that("random cross-validation with one repetition works correctly", { # number of objets is a multuply of number segments + nseg = 4 set.seed(42) - cv.ind <- array(matrix(sample(nobj), nrow = 4, byrow = TRUE), dim = c(4, nobj/4, 1)) + cv.ind <- matrix(rep(seq_len(nseg), length.out = nobj)[sample(nobj)], ncol = 1) set.seed(42) expect_equivalent(cv.ind, crossval(cv = 4, nobj)) set.seed(42) expect_equivalent(cv.ind, crossval(cv = list("rand", 4), nobj)) # number of objets is not a multuply of number segments + nseg = 5 set.seed(42) - cv.ind <- array(matrix(c(sample(nobj), NA), nrow = 5, byrow = TRUE), dim = c(5, ceiling(nobj/5), 1)) + cv.ind <- matrix(rep(seq_len(nseg), length.out = nobj)[sample(nobj)], ncol = 1) set.seed(42) expect_equivalent(cv.ind, crossval(cv = 5, nobj)) set.seed(42) @@ -37,47 +48,41 @@ test_that("random cross-validation with one repetition works correctly", { test_that("random cross-validation with several repetition works correctly", { # number of objets is a multuply of number segments set.seed(42) - cv.ind <- array( - cbind( - matrix(sample(nobj), nrow = 4, byrow = TRUE), - matrix(sample(nobj), nrow = 4, byrow = TRUE), - matrix(sample(nobj), nrow = 4, byrow = TRUE) - ), dim = c(4, nobj/4, 3) - ) + nseg <- 4 + nrep <- 3 + cv.ind <- sapply(seq_len(nrep), function(i) rep(seq_len(nseg), length.out = nobj)[sample(nobj)]) set.seed(42) expect_equivalent(cv.ind, crossval(cv = list("rand", 4, 3), nobj)) - expect_equivalent(as.numeric(table(crossval(cv = list("rand", 4, 3), nobj))), rep(3, nobj)) # number of objets is not a multuply of number segments set.seed(42) - cv.ind <- array( - cbind( - matrix(c(sample(nobj), NA), nrow = 5, byrow = TRUE), - matrix(c(sample(nobj), NA), nrow = 5, byrow = TRUE), - matrix(c(sample(nobj), NA), nrow = 5, byrow = TRUE), - matrix(c(sample(nobj), NA), nrow = 5, byrow = TRUE) - ), dim = c(5, ceiling(nobj/5), 4) - ) + nseg <- 5 + nrep <- 4 + cv.ind <- sapply(seq_len(nrep), function(i) rep(seq_len(nseg), length.out = nobj)[sample(nobj)]) set.seed(42) expect_equivalent(cv.ind, crossval(cv = list("rand", 5, 4), nobj)) - expect_equivalent(as.numeric(table(crossval(cv = list("rand", 5, 4), nobj), useNA = "ifany")), rep(4, nobj + 1)) }) -test_that("random cross-validation with several repetition works correctly", { +test_that("systematic cross-validation works correctly", { # number of objets is a multuply of number segments + nseg <- 4 set.seed(42) resp <- rnorm(nobj) - cv.ind <- array(matrix(order(resp), nrow = 4, byrow = FALSE), dim = c(4, nobj/4, 1)) + cv.ind <- matrix(rep(seq_len(nseg), length.out = nobj)[order(resp)], ncol = 1) set.seed(42) expect_equivalent(cv.ind, crossval(cv = list("ven", 4), resp = resp)) # number of objets is not a multuply of number segments + nseg <- 5 set.seed(42) resp <- rnorm(nobj) - cv.ind <- array(matrix(c(order(resp), NA), nrow = 5, byrow = FALSE), dim = c(5, ceiling(nobj/5), 1)) + cv.ind <- matrix(rep(seq_len(nseg), length.out = nobj)[order(resp)], ncol = 1) set.seed(42) expect_equivalent(cv.ind, crossval(cv = list("ven", 5), resp = resp)) + + # it also works well without any response + expect_equivalent(crossval(list("ven", 4), 10), matrix(c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2), ncol = 1)) }) diff --git a/tests/testthat/test-pls.R b/tests/testthat/test-pls.R index 6bb7f1b..2fbb18c 100644 --- a/tests/testthat/test-pls.R +++ b/tests/testthat/test-pls.R @@ -602,11 +602,11 @@ test_that("PLS gives results comparable to other software", { yloadings <- c(5.3643, 1.0338, 0.4675, 0.3567) coeffs <- c(0.2078, 0.2647, 0.0073, 0.0722, -0.0016, 0.1829, 0.1420, -0.1984, 0.2153, 0.0151, -0.0405) - # make a model + # make a model with manual cv splits data(people) X <- people[, -4] y <- people[, 4, drop = FALSE] - m <- pls(X, y, 4, scale = TRUE, cv = list("ven", 10)) + m <- pls(X, y, 4, scale = TRUE, cv = rep(1:10, length.out = nrow(X))) # compare main model parameters # here we re-normalize results from PLS_Toolbox @@ -622,7 +622,7 @@ test_that("PLS gives results comparable to other software", { # here we change the result from PLS toolbox a bit for large values as they add # given portion of x-variance when compute SR sr <- c(24.8, 21.7, 2.2359, 0.1179, 0.1552, 0.9740, 0.0076, 5.9018, 10.0, 0.0256, 0.0138) - expect_equivalent(selratio(m), sr, tolerance = 10^-1) + expect_equivalent(selratio(m, 4), sr, tolerance = 10^-1) # compare calibration results ypred <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-ypred.csv", sep = " ", header = FALSE)) @@ -642,4 +642,11 @@ test_that("PLS gives results comparable to other software", { expect_equivalent(m$res$cal$rmse, rmsec, tolerance = 10^-3) expect_equivalent(m$res$cal$r2, r2c, tolerance = 10^-3) + + # compare cross-validation results + rmsecv <- c(1.1044, 0.8673, 0.8627, 0.8186) + r2cv <- c(0.9173, 0.9489, 0.9498, 0.9545) + + expect_equivalent(m$res$cv$rmse, rmsecv, tolerance = 10^-3) + expect_equivalent(m$res$cv$r2, r2cv, tolerance = 10^-3) }) From ed5b161370486c9315043472f3f549157a04f699 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 14:25:29 +0200 Subject: [PATCH 17/42] fixed a small bug in ldecomp.getDistances() method --- R/ldecomp.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/ldecomp.R b/R/ldecomp.R index d92a949..027e8cd 100755 --- a/R/ldecomp.R +++ b/R/ldecomp.R @@ -444,6 +444,8 @@ ldecomp.getDistances <- function(scores, loadings, residuals, eigenvals) { # compute matrices with orthogonal and score distances Q <- sapply(seq_len(ncomp), function(a) rowSums(getResiduals(scores, loadings, residuals, a)^2)) + dim(Q) <- c(nobj, ncomp) + T2 <- t(apply(scale(scores^2, center = FALSE, scale = eigenvals), 1, cumsum)) dim(T2) <- c(nobj, ncomp) From 9dbf4b3a0eb721bbd7b93e7e34a83640b02c98ad Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 15:26:30 +0200 Subject: [PATCH 18/42] fixes bug #107 --- R/ldecomp.R | 6 +++- R/pca.R | 12 ++++++-- R/pls.R | 21 +++++++++++-- tests/testthat/test-bugs-220710.R | 49 +++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/test-bugs-220710.R diff --git a/R/ldecomp.R b/R/ldecomp.R index 027e8cd..441a39c 100755 --- a/R/ldecomp.R +++ b/R/ldecomp.R @@ -142,9 +142,13 @@ plotVariance.ldecomp <- function(obj, type = "b", variance = "expvar", labels = #' most of graphical parameters from \code{\link{mdaplot}} function can be used. #' #' @export -plotScores.ldecomp <- function(obj, comp = c(1, 2), type = "p", show.axes = TRUE, +plotScores.ldecomp <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, type = "p", show.axes = TRUE, show.plot = TRUE, ...) { + if (min(comp) < 1 || max(comp) > ncol(obj$scores)) { + stop("Wrong values for 'comp' parameter.") + } + # get scores for given components and generate column names with explained variance plot_data <- mda.subset(obj$scores, select = comp) colnames(plot_data) <- paste0("Comp ", comp, " (", round(obj$expvar[comp], 2), "%)") diff --git a/R/pca.R b/R/pca.R index 555335d..9c20fd9 100755 --- a/R/pca.R +++ b/R/pca.R @@ -1091,9 +1091,13 @@ plotCumVariance.pca <- function(obj, legend.position = "bottomright", ...) { #' See examples in help for \code{\link{pca}} function. #' #' @export -plotScores.pca <- function(obj, comp = c(1, 2), type = "p", show.axes = TRUE, +plotScores.pca <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, type = "p", show.axes = TRUE, show.legend = TRUE, res = obj$res, ...) { + if (min(comp) < 1 || max(comp) > ncol(obj$loadings)) { + stop("Wrong values for 'comp' parameter.") + } + res <- getRes(res, "ldecomp") if (length(res) == 1) { return(plotScores(res[[1]], comp = comp, type = type, ...)) @@ -1207,9 +1211,13 @@ plotResiduals.pca <- function(obj, ncomp = obj$ncomp.selected, log = FALSE, #' See examples in help for \code{\link{pca}} function. #' #' @export -plotLoadings.pca <- function(obj, comp = c(1, 2), type = (if (length(comp == 2)) "p" else "l"), +plotLoadings.pca <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, type = (if (length(comp == 2)) "p" else "l"), show.legend = TRUE, show.axes = TRUE, ...) { + if (min(comp) < 1 || max(comp) > ncol(obj$loadings)) { + stop("Wrong values for 'comp' parameter.") + } + plot_data <- mda.subset(obj$loadings, select = comp) colnames(plot_data) <- paste0("Comp ", comp, " (", round(obj$res[["cal"]]$expvar[comp], 2), "%)") attr(plot_data, "name") <- "Loadings" diff --git a/R/pls.R b/R/pls.R index 040f3ce..473e795 100755 --- a/R/pls.R +++ b/R/pls.R @@ -659,6 +659,16 @@ pls.getxscores <- function(x, weights, xloadings) { xscores <- x %*% (weights %*% solve(crossprod(xloadings, weights))) } +#' Compute and orthogonalize matrix with Y-scores +#' +#' @param y +#' matrix with response values, already preprocessed and cleaned +#' @param yloadings +#' matrix with Y-loadings +#' @param xscores +#' matrix with X-scores (needed for orthogonalization) +#' +#' @return matrix with Y-scores pls.getyscores <- function(y, yloadings, xscores) { ncomp <- ncol(yloadings) @@ -1096,9 +1106,12 @@ plotVariance.pls <- function(obj, decomp = "xdecomp", variance = "expvar", type #' See examples in help for \code{\link{pls}} function. #' #' @export -plotXScores.pls <- function(obj, comp = c(1, 2), show.axes = T, main = "Scores (X)", +plotXScores.pls <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, show.axes = T, main = "Scores (X)", res = obj$res, ...) { + if (min(comp) < 1 || max(comp) > ncol(obj$weights)) { + stop("Wrong values for 'comp' parameter.") + } # set up values for showing axes lines show.lines <- FALSE @@ -1332,9 +1345,13 @@ plotXYResiduals.pls <- function(obj, ncomp = obj$ncomp.selected, norm = TRUE, lo #' See examples in help for \code{\link{pls}} function. #' #' @export -plotXLoadings.pls <- function(obj, comp = c(1, 2), type = "p", show.axes = TRUE, +plotXLoadings.pls <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, type = "p", show.axes = TRUE, show.legend = TRUE, ...) { + if (min(comp) < 1 || max(comp) > ncol(obj$weights)) { + stop("Wrong values for 'comp' parameter.") + } + plot_data <- mda.subset(obj$xloadings, select = comp) colnames(plot_data) <- sprintf("Comp %d (%.2f%%)", comp, obj$res[["cal"]]$xdecomp$expvar[comp]) attr(plot_data, "name") <- "Loadings (X)" diff --git a/tests/testthat/test-bugs-220710.R b/tests/testthat/test-bugs-220710.R new file mode 100644 index 0000000..6f144de --- /dev/null +++ b/tests/testthat/test-bugs-220710.R @@ -0,0 +1,49 @@ + +setup({ + pdf(file = tempfile("mdatools-test-classmodel-", fileext = ".pdf")) + sink(tempfile("mdatools-test-test-classmodel-", fileext = ".txt"), append = FALSE, split = FALSE) +}) + +teardown({ + dev.off() + sink() +}) + +context("test for bug #107:") +data(people) + +test_that("bug is fixed in PCA", { + m <- pca(people, 1) + expect_silent(plotScores(m)) + expect_silent(plotLoadings(m)) + expect_silent(plotScores(m, 1)) + expect_silent(plotLoadings(m, 1)) + + expect_error(plotScores(m, 0)) + expect_error(plotScores(m, c(1, 3))) + expect_error(plotScores(m$res$cal, 0)) + expect_error(plotScores(m$res$cal, c(1, 3))) + + expect_error(plotLoadings(m, 0)) + expect_error(plotLoadings(m, c(1, 3))) + expect_error(plotLoadings(m$res$cal, 0)) + expect_error(plotLoadings(m$res$cal, c(1, 3))) + +}) + +test_that("bug is fixed in PLS", { + m <- pls(people[, -4], people[, 4], 1) + expect_silent(plotXScores(m)) + expect_silent(plotXLoadings(m)) + + expect_error(plotXScores(m, 0)) + expect_error(plotXScores(m, c(1, 3))) + expect_error(plotXScores(m$res$cal, 0)) + expect_error(plotXScores(m$res$cal, c(1, 3))) + + expect_error(plotXLoadings(m, 0)) + expect_error(plotXLoadings(m, c(1, 3))) + expect_error(plotXLoadings(m$res$cal, 0)) + expect_error(plotXLoadings(m$res$cal, c(1, 3))) + +}) \ No newline at end of file From a3bcf863a20658b70bf45379e50f65d2dffe31a4 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 15:30:39 +0200 Subject: [PATCH 19/42] fixed an issue with path to data files in some tests --- tests/testthat/test-pls.R | 18 ++++++++++-------- tests/testthat/test-simpls.R | 25 +++++++++++++------------ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/tests/testthat/test-pls.R b/tests/testthat/test-pls.R index 2fbb18c..73481d1 100644 --- a/tests/testthat/test-pls.R +++ b/tests/testthat/test-pls.R @@ -593,12 +593,14 @@ test_that("XY-residual limits are computed correctly", { }) test_that("PLS gives results comparable to other software", { + dataFolder = file.path(system.file("/inst/testdata/", package="mdatools")) + weights <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-weights.csv"), sep = " ", header = FALSE)) # read model parameters and calibration scores made in PLS_Toolbox - xscores <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xscores.csv", sep = " ", header = FALSE)) - yscores <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-yscores.csv", sep = " ", header = FALSE)) - weights <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-weights.csv", sep = " ", header = FALSE)) - xloadings <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xloadings.csv", sep = " ", header = FALSE)) + xscores <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xscores.csv"), sep = " ", header = FALSE)) + yscores <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-yscores.csv"), sep = " ", header = FALSE)) + weights <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-weights.csv"), sep = " ", header = FALSE)) + xloadings <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xloadings.csv"), sep = " ", header = FALSE)) yloadings <- c(5.3643, 1.0338, 0.4675, 0.3567) coeffs <- c(0.2078, 0.2647, 0.0073, 0.0722, -0.0016, 0.1829, 0.1420, -0.1984, 0.2153, 0.0151, -0.0405) @@ -625,10 +627,10 @@ test_that("PLS gives results comparable to other software", { expect_equivalent(selratio(m, 4), sr, tolerance = 10^-1) # compare calibration results - ypred <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-ypred.csv", sep = " ", header = FALSE)) - xqdist <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xqdist.csv", sep = " ", header = FALSE)) - xhdist <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xhdist.csv", sep = " ", header = FALSE)) - yqdist <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-yqdist.csv", sep = " ", header = FALSE)) + ypred <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-ypred.csv"), sep = " ", header = FALSE)) + xqdist <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xqdist.csv"), sep = " ", header = FALSE)) + xhdist <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xhdist.csv"), sep = " ", header = FALSE)) + yqdist <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-yqdist.csv"), sep = " ", header = FALSE)) rmsec <- c(1.0273, 0.7404, 0.6668, 0.6198) r2c <- c(0.9283, 0.9627, 0.9698, 0.9739) diff --git a/tests/testthat/test-simpls.R b/tests/testthat/test-simpls.R index b38db8a..d88b910 100644 --- a/tests/testthat/test-simpls.R +++ b/tests/testthat/test-simpls.R @@ -2,15 +2,15 @@ # Tests for SIMPLS algorithms # ###################################### -# setup({ -# pdf(file = tempfile("mdatools-test-simpls-", fileext = ".pdf")) -# sink(tempfile("mdatools-test-simpls-", fileext = ".txt"), append = FALSE, split = FALSE) -# }) +setup({ + pdf(file = tempfile("mdatools-test-simpls-", fileext = ".pdf")) + sink(tempfile("mdatools-test-simpls-", fileext = ".txt"), append = FALSE, split = FALSE) +}) -# teardown({ -# dev.off() -# sink() -# }) +teardown({ + dev.off() + sink() +}) ################################################# # Block 1. Tests the old algorithm # @@ -87,10 +87,11 @@ test_that("new algorithm is more numerically stable", { test_that("new algorithm gives results comparable to other software", { # read model parameters made in PLS_Toolbox - weights <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-weights.csv", sep = " ", header = FALSE)) - xloadings <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xloadings.csv", sep = " ", header = FALSE)) - xscores <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-xscores.csv", sep = " ", header = FALSE)) - yscores <- as.matrix(read.delim("../../inst/testdata/plstlbx-people-yscores.csv", sep = " ", header = FALSE)) + dataFolder = file.path(system.file("/inst/testdata/", package="mdatools")) + weights <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-weights.csv"), sep = " ", header = FALSE)) + xloadings <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xloadings.csv"), sep = " ", header = FALSE)) + xscores <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xscores.csv"), sep = " ", header = FALSE)) + yscores <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-yscores.csv"), sep = " ", header = FALSE)) yloadings <- c(5.3643, 1.0338, 0.4675, 0.3567) coeffs <- c(0.2078, 0.2647, 0.0073, 0.0722, -0.0016, 0.1829, 0.1420, -0.1984, 0.2153, 0.0151, -0.0405) From cc0ff4ffc5578b23df7e1ef3af1e07613c6dc1cd Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 17:05:20 +0200 Subject: [PATCH 20/42] updates to help files --- man/plotLoadings.pca.Rd | 2 +- man/plotScores.ldecomp.Rd | 2 +- man/plotScores.pca.Rd | 2 +- man/plotXLoadings.pls.Rd | 2 +- man/plotXScores.pls.Rd | 2 +- man/pls.getpredictions.Rd | 40 ++++++++++++++++++++++++++++++++++ man/pls.getxdecomp.Rd | 40 ++++++++++++++++++++++++++++++++++ man/pls.getxscores.Rd | 21 ++++++++++++++++++ man/pls.getydecomp.Rd | 46 +++++++++++++++++++++++++++++++++++++++ man/pls.getyscores.Rd | 21 ++++++++++++++++++ man/pls.simplsold.Rd | 28 ++++++++++++++++++++++++ 11 files changed, 201 insertions(+), 5 deletions(-) create mode 100644 man/pls.getpredictions.Rd create mode 100644 man/pls.getxdecomp.Rd create mode 100644 man/pls.getxscores.Rd create mode 100644 man/pls.getydecomp.Rd create mode 100644 man/pls.getyscores.Rd create mode 100644 man/pls.simplsold.Rd diff --git a/man/plotLoadings.pca.Rd b/man/plotLoadings.pca.Rd index 923056b..c4a306f 100644 --- a/man/plotLoadings.pca.Rd +++ b/man/plotLoadings.pca.Rd @@ -6,7 +6,7 @@ \usage{ \method{plotLoadings}{pca}( obj, - comp = c(1, 2), + comp = if (obj$ncomp > 1) c(1, 2) else 1, type = (if (length(comp == 2)) "p" else "l"), show.legend = TRUE, show.axes = TRUE, diff --git a/man/plotScores.ldecomp.Rd b/man/plotScores.ldecomp.Rd index 8a31d26..60f6fed 100644 --- a/man/plotScores.ldecomp.Rd +++ b/man/plotScores.ldecomp.Rd @@ -6,7 +6,7 @@ \usage{ \method{plotScores}{ldecomp}( obj, - comp = c(1, 2), + comp = if (obj$ncomp > 1) c(1, 2) else 1, type = "p", show.axes = TRUE, show.plot = TRUE, diff --git a/man/plotScores.pca.Rd b/man/plotScores.pca.Rd index 194a3b6..a23193d 100644 --- a/man/plotScores.pca.Rd +++ b/man/plotScores.pca.Rd @@ -6,7 +6,7 @@ \usage{ \method{plotScores}{pca}( obj, - comp = c(1, 2), + comp = if (obj$ncomp > 1) c(1, 2) else 1, type = "p", show.axes = TRUE, show.legend = TRUE, diff --git a/man/plotXLoadings.pls.Rd b/man/plotXLoadings.pls.Rd index a9ab0ae..d5288d6 100644 --- a/man/plotXLoadings.pls.Rd +++ b/man/plotXLoadings.pls.Rd @@ -6,7 +6,7 @@ \usage{ \method{plotXLoadings}{pls}( obj, - comp = c(1, 2), + comp = if (obj$ncomp > 1) c(1, 2) else 1, type = "p", show.axes = TRUE, show.legend = TRUE, diff --git a/man/plotXScores.pls.Rd b/man/plotXScores.pls.Rd index 98d3905..ed79a3f 100644 --- a/man/plotXScores.pls.Rd +++ b/man/plotXScores.pls.Rd @@ -6,7 +6,7 @@ \usage{ \method{plotXScores}{pls}( obj, - comp = c(1, 2), + comp = if (obj$ncomp > 1) c(1, 2) else 1, show.axes = T, main = "Scores (X)", res = obj$res, diff --git a/man/pls.getpredictions.Rd b/man/pls.getpredictions.Rd new file mode 100644 index 0000000..31f07bf --- /dev/null +++ b/man/pls.getpredictions.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pls.R +\name{pls.getpredictions} +\alias{pls.getpredictions} +\title{Compute predictions for response values} +\usage{ +pls.getpredictions( + x, + coeffs, + ycenter, + yscale, + ynames = NULL, + y.attrs = NULL, + objnames = NULL, + compnames = NULL +) +} +\arguments{ +\item{x}{matrix with predictors, already preprocessed (e.g. mean centered) and cleaned} + +\item{coeffs}{array with regression coefficients} + +\item{ycenter}{`ycenter` property of PLS model} + +\item{yscale}{`yscale` property of PLS model} + +\item{ynames}{vector with names of the responses} + +\item{y.attrs}{list with response attributes (e.g. from reference values if any)} + +\item{objnames}{vector with names of objects (rows of x)} + +\item{compnames}{vector with names used for components} +} +\value{ +array with predicted y-values +} +\description{ +Compute predictions for response values +} diff --git a/man/pls.getxdecomp.Rd b/man/pls.getxdecomp.Rd new file mode 100644 index 0000000..d330cb2 --- /dev/null +++ b/man/pls.getxdecomp.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pls.R +\name{pls.getxdecomp} +\alias{pls.getxdecomp} +\title{Compute object with decomposition of x-values} +\usage{ +pls.getxdecomp( + x, + xscores, + xloadings, + xeigenvals, + xnames = NULL, + x.attrs = NULL, + objnames = NULL, + compnames = NULL +) +} +\arguments{ +\item{x}{matrix with predictors, already preprocessed (e.g. mean centered) and cleaned} + +\item{xscores}{matrix with X-scores} + +\item{xloadings}{matrix with X-loadings} + +\item{xeigenvals}{matrix with eigenvalues for X} + +\item{xnames}{vector with names of the predictors} + +\item{x.attrs}{list with preditors attributes} + +\item{objnames}{vector with names of objects (rows of x)} + +\item{compnames}{vector with names used for components} +} +\value{ +array `ldecomp` object for x-values +} +\description{ +Compute object with decomposition of x-values +} diff --git a/man/pls.getxscores.Rd b/man/pls.getxscores.Rd new file mode 100644 index 0000000..0fe457f --- /dev/null +++ b/man/pls.getxscores.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pls.R +\name{pls.getxscores} +\alias{pls.getxscores} +\title{Compute matrix with X-scores} +\usage{ +pls.getxscores(x, weights, xloadings) +} +\arguments{ +\item{x}{matrix with predictors, already preprocessed and cleaned} + +\item{weights}{matrix with PLS weights} + +\item{xloadings}{matrix with X-loadings} +} +\value{ +matrix with X-scores +} +\description{ +Compute matrix with X-scores +} diff --git a/man/pls.getydecomp.Rd b/man/pls.getydecomp.Rd new file mode 100644 index 0000000..e629db9 --- /dev/null +++ b/man/pls.getydecomp.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pls.R +\name{pls.getydecomp} +\alias{pls.getydecomp} +\title{Compute object with decomposition of y-values} +\usage{ +pls.getydecomp( + y, + yscores, + xscores, + yloadings, + yeigenvals, + ynames = NULL, + y.attrs = NULL, + x.attrs = NULL, + objnames = NULL, + compnames = NULL +) +} +\arguments{ +\item{y}{matrix with responses, already preprocessed (e.g. mean centered) and cleaned} + +\item{yscores}{matrix with Y-scores} + +\item{xscores}{matrix with X-scores} + +\item{yloadings}{matrix with Y-loadings} + +\item{yeigenvals}{matrix with eigenvalues for Y} + +\item{ynames}{vector with names of the responses} + +\item{y.attrs}{list with response attributes (e.g. from reference values if any)} + +\item{x.attrs}{list with preditors attributes} + +\item{objnames}{vector with names of objects (rows of x)} + +\item{compnames}{vector with names used for components} +} +\value{ +array `ldecomp` object for y-values (or NULL if y is not provided) +} +\description{ +Compute object with decomposition of y-values +} diff --git a/man/pls.getyscores.Rd b/man/pls.getyscores.Rd new file mode 100644 index 0000000..7075ab4 --- /dev/null +++ b/man/pls.getyscores.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pls.R +\name{pls.getyscores} +\alias{pls.getyscores} +\title{Compute and orthogonalize matrix with Y-scores} +\usage{ +pls.getyscores(y, yloadings, xscores) +} +\arguments{ +\item{y}{matrix with response values, already preprocessed and cleaned} + +\item{yloadings}{matrix with Y-loadings} + +\item{xscores}{matrix with X-scores (needed for orthogonalization)} +} +\value{ +matrix with Y-scores +} +\description{ +Compute and orthogonalize matrix with Y-scores +} diff --git a/man/pls.simplsold.Rd b/man/pls.simplsold.Rd new file mode 100644 index 0000000..214ab38 --- /dev/null +++ b/man/pls.simplsold.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pls.R +\name{pls.simplsold} +\alias{pls.simplsold} +\title{SIMPLS algorithm (old implementation)} +\usage{ +pls.simplsold(x, y, ncomp, cv = FALSE) +} +\arguments{ +\item{x}{a matrix with x values (predictors)} + +\item{y}{a matrix with y values (responses)} + +\item{ncomp}{number of components to calculate} + +\item{cv}{logical, is model calibrated during cross-validation or not} +} +\value{ +a list with computed regression coefficients, loadings and scores for x and y matrices, +and weights. +} +\description{ +SIMPLS algorithm for calibration of PLS model (old version) +} +\references{ +[1]. S. de Jong. SIMPLS: An Alternative approach to partial least squares regression. +Chemometrics and Intelligent Laboratory Systems, 18, 1993 (251-263). +} From f27e933f2f4bac4504a95ceb4c5c8000c5c99443 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 17:05:36 +0200 Subject: [PATCH 21/42] test data was moved to testthat directory --- tests/testthat/plstlbx-people-weights.csv | 11 +++++++ tests/testthat/plstlbx-people-xhdist.csv | 32 +++++++++++++++++++++ tests/testthat/plstlbx-people-xloadings.csv | 11 +++++++ tests/testthat/plstlbx-people-xqdist.csv | 32 +++++++++++++++++++++ tests/testthat/plstlbx-people-xscores.csv | 32 +++++++++++++++++++++ tests/testthat/plstlbx-people-ypred.csv | 32 +++++++++++++++++++++ tests/testthat/plstlbx-people-yqdist.csv | 32 +++++++++++++++++++++ tests/testthat/plstlbx-people-yscores.csv | 32 +++++++++++++++++++++ 8 files changed, 214 insertions(+) create mode 100644 tests/testthat/plstlbx-people-weights.csv create mode 100644 tests/testthat/plstlbx-people-xhdist.csv create mode 100644 tests/testthat/plstlbx-people-xloadings.csv create mode 100644 tests/testthat/plstlbx-people-xqdist.csv create mode 100644 tests/testthat/plstlbx-people-xscores.csv create mode 100644 tests/testthat/plstlbx-people-ypred.csv create mode 100644 tests/testthat/plstlbx-people-yqdist.csv create mode 100644 tests/testthat/plstlbx-people-yscores.csv diff --git a/tests/testthat/plstlbx-people-weights.csv b/tests/testthat/plstlbx-people-weights.csv new file mode 100644 index 0000000..626e85e --- /dev/null +++ b/tests/testthat/plstlbx-people-weights.csv @@ -0,0 +1,11 @@ +0.4319 0.1854 0.1149 -0.0445 +0.4356 0.1764 0.2654 0.4317 +-0.3699 0.0237 0.8413 0.8718 +0.1452 -0.0604 0.0389 0.1838 +0.1593 -0.3207 -0.1333 -0.1604 +0.3133 -0.2264 0.3859 0.5192 +-0.0399 0.6602 0.4212 0.4030 +-0.4139 -0.2195 0.0848 -0.0953 +0.4193 0.1815 0.1986 0.0160 +-0.0696 0.5662 -0.1668 -0.1324 +-0.0540 -0.0880 -0.5830 0.5314 diff --git a/tests/testthat/plstlbx-people-xhdist.csv b/tests/testthat/plstlbx-people-xhdist.csv new file mode 100644 index 0000000..5dbaf8d --- /dev/null +++ b/tests/testthat/plstlbx-people-xhdist.csv @@ -0,0 +1,32 @@ +7.4875 +3.2397 +2.5583 +10.8514 +3.2828 +4.0721 +7.1071 +3.9232 +3.8897 +3.3473 +3.8491 +4.6580 +3.1512 +2.5963 +2.0801 +2.2521 +4.2200 +4.5492 +3.5500 +5.5432 +3.1816 +3.5329 +2.3602 +3.5737 +4.0456 +3.4025 +5.0792 +3.6392 +1.8355 +1.5958 +2.7383 +2.807 \ No newline at end of file diff --git a/tests/testthat/plstlbx-people-xloadings.csv b/tests/testthat/plstlbx-people-xloadings.csv new file mode 100644 index 0000000..6972cd3 --- /dev/null +++ b/tests/testthat/plstlbx-people-xloadings.csv @@ -0,0 +1,11 @@ +0.4106 0.1141 0.1036 0.0457 +0.4153 0.0766 0.0066 0.1391 +-0.3726 -0.0907 0.3806 0.3537 +0.1521 -0.0844 -0.0460 0.0869 +0.1963 -0.3142 0.0105 0.0939 +0.3394 -0.3346 0.1565 0.1682 +-0.1160 0.5861 0.0871 0.1951 +-0.3886 -0.1876 0.1756 0.0410 +0.3983 0.0960 0.1564 0.0921 +-0.1349 0.6053 -0.2032 -0.0492 +-0.0439 0.0270 -0.8447 0.8707 diff --git a/tests/testthat/plstlbx-people-xqdist.csv b/tests/testthat/plstlbx-people-xqdist.csv new file mode 100644 index 0000000..971cce6 --- /dev/null +++ b/tests/testthat/plstlbx-people-xqdist.csv @@ -0,0 +1,32 @@ +1.9041 +0.8735 +0.4510 +1.1024 +2.7907 +2.2340 +1.2173 +0.9854 +1.2716 +0.7961 +0.4860 +0.3754 +1.6481 +1.3018 +0.3733 +1.2522 +2.3064 +2.6794 +4.7591 +1.7569 +0.6085 +1.4004 +0.4096 +1.4456 +2.6432 +2.4861 +5.3929 +9.5875 +1.2365 +3.1687 +0.8946 +2.6442 \ No newline at end of file diff --git a/tests/testthat/plstlbx-people-xscores.csv b/tests/testthat/plstlbx-people-xscores.csv new file mode 100644 index 0000000..59763c8 --- /dev/null +++ b/tests/testthat/plstlbx-people-xscores.csv @@ -0,0 +1,32 @@ +4.8335 -0.4321 1.5689 0.2021 +2.8513 -0.6212 -0.6223 0.7530 +2.7141 -0.6940 -0.6950 0.4506 +-1.0533 -2.0401 -1.3808 -1.8132 +-1.0359 -1.0859 1.2470 0.5618 +-0.6989 -1.0948 1.6394 0.2147 +2.3781 -1.4688 -1.4418 1.1361 +2.2660 -1.4589 -1.0201 0.6079 +-1.5931 -1.2889 1.1821 -0.6916 +-1.5246 -1.4122 1.1142 -0.4895 +2.8442 -1.3881 -0.0623 -0.8758 +-2.5820 -2.2613 -0.7718 0.4286 +-1.5279 -1.3792 1.2190 -0.0454 +-1.7034 -1.6552 0.7695 0.1952 +2.7436 -1.1517 0.0271 -0.2252 +2.7353 -1.3513 -0.2008 -0.0098 +2.1300 2.4733 0.6869 0.0039 +2.4403 2.4744 0.3435 0.5122 +-1.8386 0.0980 -0.7841 1.0571 +-2.8772 2.1646 0.5288 0.8908 +-3.3698 0.0943 -0.7736 -0.4325 +0.6607 1.7764 -0.9579 -0.6680 +1.1611 1.9766 -0.3325 -0.3089 +1.4512 1.4728 -0.2671 -1.0428 +-2.9197 0.9634 0.5934 -0.9167 +-3.1564 0.8307 -0.8396 0.4499 +1.2063 1.3302 0.0391 -1.4371 +0.3213 1.2869 1.2137 0.7285 +-2.3826 0.1716 -0.7929 0.0487 +-2.5680 0.7585 -0.1434 0.2315 +0.8214 2.2157 -0.5020 -0.1606 +-2.7271 0.6961 -0.5844 0.6443 \ No newline at end of file diff --git a/tests/testthat/plstlbx-people-ypred.csv b/tests/testthat/plstlbx-people-ypred.csv new file mode 100644 index 0000000..826d4ce --- /dev/null +++ b/tests/testthat/plstlbx-people-ypred.csv @@ -0,0 +1,32 @@ +48.0862 +44.2076 +43.8202 +36.0841 +38.3637 +38.9222 +42.8676 +42.6631 +36.9118 +37.0067 +43.4588 +34.5260 +37.2101 +36.7130 +43.6720 +43.5525 +44.7849 +45.3343 +37.0879 +36.8568 +34.1175 +41.2542 +42.5075 +42.4945 +35.5957 +35.1031 +42.0069 +41.7480 +35.8982 +36.1891 +42.0698 +35.8860 \ No newline at end of file diff --git a/tests/testthat/plstlbx-people-yqdist.csv b/tests/testthat/plstlbx-people-yqdist.csv new file mode 100644 index 0000000..1185db6 --- /dev/null +++ b/tests/testthat/plstlbx-people-yqdist.csv @@ -0,0 +1,32 @@ +0.0005 +0.0028 +0.0021 +0.0005 +0.0087 +0.0004 +0.0496 +0.0075 +0.0548 +0.0000 +0.1401 +0.1431 +0.0411 +0.0054 +0.0071 +0.0132 +0.0030 +0.0292 +0.0005 +0.0483 +0.0009 +0.0043 +0.0160 +0.0168 +0.0108 +0.0530 +0.0000 +0.0042 +0.0007 +0.0931 +0.0003 +0.0517 \ No newline at end of file diff --git a/tests/testthat/plstlbx-people-yscores.csv b/tests/testthat/plstlbx-people-yscores.csv new file mode 100644 index 0000000..7fb603e --- /dev/null +++ b/tests/testthat/plstlbx-people-yscores.csv @@ -0,0 +1,32 @@ +11.1420 0.0915 0.0668 -0.0015 +5.6355 -0.1266 -0.0208 0.0050 +5.6355 -0.0683 0.0099 0.0308 +-5.3774 -0.5884 -0.1461 -0.0654 +-2.6242 -0.0651 0.0344 -0.0154 +-1.2476 0.0568 0.0901 0.0140 +2.8823 -0.4560 -0.1198 -0.0433 +4.2589 -0.1430 0.0211 0.0502 +-5.3774 -0.3587 -0.0865 -0.1055 +-4.0008 -0.1226 0.0276 -0.0162 +2.8823 -0.6542 -0.2142 -0.1614 +-5.3774 0.0619 0.1609 0.1486 +-2.6242 0.1441 0.1462 0.0709 +-4.0008 -0.0465 0.0763 0.0325 +5.6355 -0.0808 0.0311 0.0229 +5.6355 -0.0773 0.0445 0.0407 +7.0122 0.4454 0.0560 0.0198 +8.3888 0.5787 0.1163 0.0772 +-4.0008 0.0109 -0.0008 0.0256 +-5.3774 0.1874 -0.0425 -0.0501 +-8.1307 -0.1337 -0.0660 -0.0245 +1.5057 0.0092 -0.1003 -0.0445 +4.2589 0.3269 0.0316 0.0353 +4.2589 0.2036 0.0055 0.0131 +-5.3774 0.2055 0.0363 0.0079 +-5.3774 0.3061 0.0896 0.0964 +2.8823 0.0424 -0.0590 -0.0463 +2.8823 0.4188 0.1137 0.0462 +-5.3774 -0.0230 -0.0205 0.0109 +-6.7540 -0.2094 -0.1393 -0.1015 +2.8823 0.2061 -0.0371 -0.0115 +-6.7540 -0.1417 -0.1050 -0.0606 From 7f6d5064e611a98770d0dcfc505ebacb427837c4 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 18:22:54 +0200 Subject: [PATCH 22/42] moved rest of testdata files to testthat directory --- inst/testdata/plstlbx-people-weights.csv | 11 ------ inst/testdata/plstlbx-people-xhdist.csv | 32 ------------------ inst/testdata/plstlbx-people-xloadings.csv | 11 ------ inst/testdata/plstlbx-people-xqdist.csv | 32 ------------------ inst/testdata/plstlbx-people-xscores.csv | 32 ------------------ inst/testdata/plstlbx-people-ypred.csv | 32 ------------------ inst/testdata/plstlbx-people-yqdist.csv | 32 ------------------ inst/testdata/plstlbx-people-yscores.csv | 32 ------------------ {inst/testdata => tests/testthat}/People.xlsx | Bin .../testthat}/pls-coeffs.csv | 0 .../testthat}/pls-expvar.csv | 0 .../testthat}/pls-vipscores.csv | 0 .../testthat}/pls-weight.csv | 0 .../testthat}/pls-weights.csv | 0 .../testthat}/pls-xloadings.csv | 0 .../testdata => tests/testthat}/pls-xres.csv | 0 .../testthat}/pls-xscores.csv | 0 .../testthat}/pls-yloadings.csv | 0 .../testdata => tests/testthat}/pls-yres.csv | 0 .../testthat}/pls-yscores.csv | 0 .../testthat}/pls2-vipscores.csv | 0 .../testthat}/test_ldecomp.m | 0 {inst/testdata => tests/testthat}/test_pls.m | 0 23 files changed, 214 deletions(-) delete mode 100644 inst/testdata/plstlbx-people-weights.csv delete mode 100644 inst/testdata/plstlbx-people-xhdist.csv delete mode 100644 inst/testdata/plstlbx-people-xloadings.csv delete mode 100644 inst/testdata/plstlbx-people-xqdist.csv delete mode 100644 inst/testdata/plstlbx-people-xscores.csv delete mode 100644 inst/testdata/plstlbx-people-ypred.csv delete mode 100644 inst/testdata/plstlbx-people-yqdist.csv delete mode 100644 inst/testdata/plstlbx-people-yscores.csv rename {inst/testdata => tests/testthat}/People.xlsx (100%) rename {inst/testdata => tests/testthat}/pls-coeffs.csv (100%) rename {inst/testdata => tests/testthat}/pls-expvar.csv (100%) rename {inst/testdata => tests/testthat}/pls-vipscores.csv (100%) rename {inst/testdata => tests/testthat}/pls-weight.csv (100%) rename {inst/testdata => tests/testthat}/pls-weights.csv (100%) rename {inst/testdata => tests/testthat}/pls-xloadings.csv (100%) rename {inst/testdata => tests/testthat}/pls-xres.csv (100%) rename {inst/testdata => tests/testthat}/pls-xscores.csv (100%) rename {inst/testdata => tests/testthat}/pls-yloadings.csv (100%) rename {inst/testdata => tests/testthat}/pls-yres.csv (100%) rename {inst/testdata => tests/testthat}/pls-yscores.csv (100%) rename {inst/testdata => tests/testthat}/pls2-vipscores.csv (100%) rename {inst/testdata => tests/testthat}/test_ldecomp.m (100%) rename {inst/testdata => tests/testthat}/test_pls.m (100%) diff --git a/inst/testdata/plstlbx-people-weights.csv b/inst/testdata/plstlbx-people-weights.csv deleted file mode 100644 index 626e85e..0000000 --- a/inst/testdata/plstlbx-people-weights.csv +++ /dev/null @@ -1,11 +0,0 @@ -0.4319 0.1854 0.1149 -0.0445 -0.4356 0.1764 0.2654 0.4317 --0.3699 0.0237 0.8413 0.8718 -0.1452 -0.0604 0.0389 0.1838 -0.1593 -0.3207 -0.1333 -0.1604 -0.3133 -0.2264 0.3859 0.5192 --0.0399 0.6602 0.4212 0.4030 --0.4139 -0.2195 0.0848 -0.0953 -0.4193 0.1815 0.1986 0.0160 --0.0696 0.5662 -0.1668 -0.1324 --0.0540 -0.0880 -0.5830 0.5314 diff --git a/inst/testdata/plstlbx-people-xhdist.csv b/inst/testdata/plstlbx-people-xhdist.csv deleted file mode 100644 index 5dbaf8d..0000000 --- a/inst/testdata/plstlbx-people-xhdist.csv +++ /dev/null @@ -1,32 +0,0 @@ -7.4875 -3.2397 -2.5583 -10.8514 -3.2828 -4.0721 -7.1071 -3.9232 -3.8897 -3.3473 -3.8491 -4.6580 -3.1512 -2.5963 -2.0801 -2.2521 -4.2200 -4.5492 -3.5500 -5.5432 -3.1816 -3.5329 -2.3602 -3.5737 -4.0456 -3.4025 -5.0792 -3.6392 -1.8355 -1.5958 -2.7383 -2.807 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-xloadings.csv b/inst/testdata/plstlbx-people-xloadings.csv deleted file mode 100644 index 6972cd3..0000000 --- a/inst/testdata/plstlbx-people-xloadings.csv +++ /dev/null @@ -1,11 +0,0 @@ -0.4106 0.1141 0.1036 0.0457 -0.4153 0.0766 0.0066 0.1391 --0.3726 -0.0907 0.3806 0.3537 -0.1521 -0.0844 -0.0460 0.0869 -0.1963 -0.3142 0.0105 0.0939 -0.3394 -0.3346 0.1565 0.1682 --0.1160 0.5861 0.0871 0.1951 --0.3886 -0.1876 0.1756 0.0410 -0.3983 0.0960 0.1564 0.0921 --0.1349 0.6053 -0.2032 -0.0492 --0.0439 0.0270 -0.8447 0.8707 diff --git a/inst/testdata/plstlbx-people-xqdist.csv b/inst/testdata/plstlbx-people-xqdist.csv deleted file mode 100644 index 971cce6..0000000 --- a/inst/testdata/plstlbx-people-xqdist.csv +++ /dev/null @@ -1,32 +0,0 @@ -1.9041 -0.8735 -0.4510 -1.1024 -2.7907 -2.2340 -1.2173 -0.9854 -1.2716 -0.7961 -0.4860 -0.3754 -1.6481 -1.3018 -0.3733 -1.2522 -2.3064 -2.6794 -4.7591 -1.7569 -0.6085 -1.4004 -0.4096 -1.4456 -2.6432 -2.4861 -5.3929 -9.5875 -1.2365 -3.1687 -0.8946 -2.6442 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-xscores.csv b/inst/testdata/plstlbx-people-xscores.csv deleted file mode 100644 index 59763c8..0000000 --- a/inst/testdata/plstlbx-people-xscores.csv +++ /dev/null @@ -1,32 +0,0 @@ -4.8335 -0.4321 1.5689 0.2021 -2.8513 -0.6212 -0.6223 0.7530 -2.7141 -0.6940 -0.6950 0.4506 --1.0533 -2.0401 -1.3808 -1.8132 --1.0359 -1.0859 1.2470 0.5618 --0.6989 -1.0948 1.6394 0.2147 -2.3781 -1.4688 -1.4418 1.1361 -2.2660 -1.4589 -1.0201 0.6079 --1.5931 -1.2889 1.1821 -0.6916 --1.5246 -1.4122 1.1142 -0.4895 -2.8442 -1.3881 -0.0623 -0.8758 --2.5820 -2.2613 -0.7718 0.4286 --1.5279 -1.3792 1.2190 -0.0454 --1.7034 -1.6552 0.7695 0.1952 -2.7436 -1.1517 0.0271 -0.2252 -2.7353 -1.3513 -0.2008 -0.0098 -2.1300 2.4733 0.6869 0.0039 -2.4403 2.4744 0.3435 0.5122 --1.8386 0.0980 -0.7841 1.0571 --2.8772 2.1646 0.5288 0.8908 --3.3698 0.0943 -0.7736 -0.4325 -0.6607 1.7764 -0.9579 -0.6680 -1.1611 1.9766 -0.3325 -0.3089 -1.4512 1.4728 -0.2671 -1.0428 --2.9197 0.9634 0.5934 -0.9167 --3.1564 0.8307 -0.8396 0.4499 -1.2063 1.3302 0.0391 -1.4371 -0.3213 1.2869 1.2137 0.7285 --2.3826 0.1716 -0.7929 0.0487 --2.5680 0.7585 -0.1434 0.2315 -0.8214 2.2157 -0.5020 -0.1606 --2.7271 0.6961 -0.5844 0.6443 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-ypred.csv b/inst/testdata/plstlbx-people-ypred.csv deleted file mode 100644 index 826d4ce..0000000 --- a/inst/testdata/plstlbx-people-ypred.csv +++ /dev/null @@ -1,32 +0,0 @@ -48.0862 -44.2076 -43.8202 -36.0841 -38.3637 -38.9222 -42.8676 -42.6631 -36.9118 -37.0067 -43.4588 -34.5260 -37.2101 -36.7130 -43.6720 -43.5525 -44.7849 -45.3343 -37.0879 -36.8568 -34.1175 -41.2542 -42.5075 -42.4945 -35.5957 -35.1031 -42.0069 -41.7480 -35.8982 -36.1891 -42.0698 -35.8860 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-yqdist.csv b/inst/testdata/plstlbx-people-yqdist.csv deleted file mode 100644 index 1185db6..0000000 --- a/inst/testdata/plstlbx-people-yqdist.csv +++ /dev/null @@ -1,32 +0,0 @@ -0.0005 -0.0028 -0.0021 -0.0005 -0.0087 -0.0004 -0.0496 -0.0075 -0.0548 -0.0000 -0.1401 -0.1431 -0.0411 -0.0054 -0.0071 -0.0132 -0.0030 -0.0292 -0.0005 -0.0483 -0.0009 -0.0043 -0.0160 -0.0168 -0.0108 -0.0530 -0.0000 -0.0042 -0.0007 -0.0931 -0.0003 -0.0517 \ No newline at end of file diff --git a/inst/testdata/plstlbx-people-yscores.csv b/inst/testdata/plstlbx-people-yscores.csv deleted file mode 100644 index 7fb603e..0000000 --- a/inst/testdata/plstlbx-people-yscores.csv +++ /dev/null @@ -1,32 +0,0 @@ -11.1420 0.0915 0.0668 -0.0015 -5.6355 -0.1266 -0.0208 0.0050 -5.6355 -0.0683 0.0099 0.0308 --5.3774 -0.5884 -0.1461 -0.0654 --2.6242 -0.0651 0.0344 -0.0154 --1.2476 0.0568 0.0901 0.0140 -2.8823 -0.4560 -0.1198 -0.0433 -4.2589 -0.1430 0.0211 0.0502 --5.3774 -0.3587 -0.0865 -0.1055 --4.0008 -0.1226 0.0276 -0.0162 -2.8823 -0.6542 -0.2142 -0.1614 --5.3774 0.0619 0.1609 0.1486 --2.6242 0.1441 0.1462 0.0709 --4.0008 -0.0465 0.0763 0.0325 -5.6355 -0.0808 0.0311 0.0229 -5.6355 -0.0773 0.0445 0.0407 -7.0122 0.4454 0.0560 0.0198 -8.3888 0.5787 0.1163 0.0772 --4.0008 0.0109 -0.0008 0.0256 --5.3774 0.1874 -0.0425 -0.0501 --8.1307 -0.1337 -0.0660 -0.0245 -1.5057 0.0092 -0.1003 -0.0445 -4.2589 0.3269 0.0316 0.0353 -4.2589 0.2036 0.0055 0.0131 --5.3774 0.2055 0.0363 0.0079 --5.3774 0.3061 0.0896 0.0964 -2.8823 0.0424 -0.0590 -0.0463 -2.8823 0.4188 0.1137 0.0462 --5.3774 -0.0230 -0.0205 0.0109 --6.7540 -0.2094 -0.1393 -0.1015 -2.8823 0.2061 -0.0371 -0.0115 --6.7540 -0.1417 -0.1050 -0.0606 diff --git a/inst/testdata/People.xlsx b/tests/testthat/People.xlsx similarity index 100% rename from inst/testdata/People.xlsx rename to tests/testthat/People.xlsx diff --git a/inst/testdata/pls-coeffs.csv b/tests/testthat/pls-coeffs.csv similarity index 100% rename from inst/testdata/pls-coeffs.csv rename to tests/testthat/pls-coeffs.csv diff --git a/inst/testdata/pls-expvar.csv b/tests/testthat/pls-expvar.csv similarity index 100% rename from inst/testdata/pls-expvar.csv rename to tests/testthat/pls-expvar.csv diff --git a/inst/testdata/pls-vipscores.csv b/tests/testthat/pls-vipscores.csv similarity index 100% rename from inst/testdata/pls-vipscores.csv rename to tests/testthat/pls-vipscores.csv diff --git a/inst/testdata/pls-weight.csv b/tests/testthat/pls-weight.csv similarity index 100% rename from inst/testdata/pls-weight.csv rename to tests/testthat/pls-weight.csv diff --git a/inst/testdata/pls-weights.csv b/tests/testthat/pls-weights.csv similarity index 100% rename from inst/testdata/pls-weights.csv rename to tests/testthat/pls-weights.csv diff --git a/inst/testdata/pls-xloadings.csv b/tests/testthat/pls-xloadings.csv similarity index 100% rename from inst/testdata/pls-xloadings.csv rename to tests/testthat/pls-xloadings.csv diff --git a/inst/testdata/pls-xres.csv b/tests/testthat/pls-xres.csv similarity index 100% rename from inst/testdata/pls-xres.csv rename to tests/testthat/pls-xres.csv diff --git a/inst/testdata/pls-xscores.csv b/tests/testthat/pls-xscores.csv similarity index 100% rename from inst/testdata/pls-xscores.csv rename to tests/testthat/pls-xscores.csv diff --git a/inst/testdata/pls-yloadings.csv b/tests/testthat/pls-yloadings.csv similarity index 100% rename from inst/testdata/pls-yloadings.csv rename to tests/testthat/pls-yloadings.csv diff --git a/inst/testdata/pls-yres.csv b/tests/testthat/pls-yres.csv similarity index 100% rename from inst/testdata/pls-yres.csv rename to tests/testthat/pls-yres.csv diff --git a/inst/testdata/pls-yscores.csv b/tests/testthat/pls-yscores.csv similarity index 100% rename from inst/testdata/pls-yscores.csv rename to tests/testthat/pls-yscores.csv diff --git a/inst/testdata/pls2-vipscores.csv b/tests/testthat/pls2-vipscores.csv similarity index 100% rename from inst/testdata/pls2-vipscores.csv rename to tests/testthat/pls2-vipscores.csv diff --git a/inst/testdata/test_ldecomp.m b/tests/testthat/test_ldecomp.m similarity index 100% rename from inst/testdata/test_ldecomp.m rename to tests/testthat/test_ldecomp.m diff --git a/inst/testdata/test_pls.m b/tests/testthat/test_pls.m similarity index 100% rename from inst/testdata/test_pls.m rename to tests/testthat/test_pls.m From 7244e2c16cbc4e418bdc0e58b628fd48ff1e8dab Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 18:23:06 +0200 Subject: [PATCH 23/42] small improvements to PLS tests --- tests/testthat/test-pls.R | 64 +++++++++++------------------------- tests/testthat/test-simpls.R | 9 +++-- 2 files changed, 24 insertions(+), 49 deletions(-) diff --git a/tests/testthat/test-pls.R b/tests/testthat/test-pls.R index 73481d1..0788661 100644 --- a/tests/testthat/test-pls.R +++ b/tests/testthat/test-pls.R @@ -253,17 +253,10 @@ context("pls: compare pedict.pls() outcome with known values") test_that("predictions for people data are correct", { # model - f <- system.file("testdata", "pls-xloadings.csv", package = "mdatools") - xloadings <- read.csv(f, header = FALSE) - - f <- system.file("testdata", "pls-yloadings.csv", package = "mdatools") - yloadings <- read.csv(f, header = FALSE) - - f <- system.file("testdata", "pls-weights.csv", package = "mdatools") - weights <- read.csv(f, header = FALSE) - - f <- system.file("testdata", "pls-coeffs.csv", package = "mdatools") - coeffs <- read.csv(f, header = FALSE) + xloadings <- read.csv("pls-xloadings.csv", header = FALSE) + yloadings <- read.csv("pls-yloadings.csv", header = FALSE) + weights <- read.csv("pls-weights.csv", header = FALSE) + coeffs <- read.csv("pls-coeffs.csv", header = FALSE) xc <- people[, -4] yc <- people[, 4, drop = F] @@ -274,25 +267,11 @@ test_that("predictions for people data are correct", { expect_equivalent(obj$coeffs$values[, 4, ], as.matrix(coeffs), tolerance = 10^-5) # predictions - f <- system.file("testdata", "pls-xscores.csv", package = "mdatools") - xscores <- read.csv(f, header = FALSE) - print(f) - - f <- system.file("testdata", "pls-yscores.csv", package = "mdatools") - yscores <- read.csv(f, header = FALSE) - print(f) - - f <- system.file("testdata", "pls-xres.csv", package = "mdatools") - xresid <- read.csv(f, header = FALSE) - print(f) - - f <- system.file("testdata", "pls-yres.csv", package = "mdatools") - yresid <- read.csv(f, header = FALSE) - print(f) - - f <- system.file("testdata", "pls-expvar.csv", package = "mdatools") - expvar <- read.csv(f, header = FALSE) - print(f) + xscores <- read.csv("pls-xscores.csv", header = FALSE) + yscores <- read.csv("pls-yscores.csv", header = FALSE) + xresid <- read.csv("pls-xres.csv", header = FALSE) + yresid <- read.csv("pls-yres.csv", header = FALSE) + expvar <- read.csv("pls-expvar.csv", header = FALSE) res <- predict(obj, xc, yc) expect_equivalent(res$xdecomp$scores, as.matrix(xscores), tolerance = 10^-4) @@ -474,8 +453,7 @@ test_that("vipscores for people data (A = 4) identical to once computed in MATLA d <- datasets[[1]] m <- pls(d$xc, d$yc, d$ncomp, center = d$center, scale = d$scale, cv = 10) - f <- system.file("testdata", "pls-vipscores.csv", package = "mdatools") - vip <- read.csv(f, header = FALSE) + vip <- read.csv("pls-vipscores.csv", header = FALSE) expect_equivalent(vipscores(m, ncomp = 4), as.matrix(vip), tolerance = 10^-4) }) @@ -485,8 +463,7 @@ test_that("vipscores for people data (A = 4) identical to once computed in MATLA Y <- people[, c(4, 6)] m <- pls(X, Y, 8, center = TRUE, scale = TRUE, cv = 1) - f <- system.file("testdata", "pls2-vipscores.csv", package = "mdatools") - vip <- read.csv(f, header = FALSE)[[1]] + vip <- read.csv("pls2-vipscores.csv", header = FALSE)[[1]] #dim(vip) <- c(ncol(X), 2) expect_equivalent(vipscores(m, ncomp = 4), matrix(vip, ncol = 2), tolerance = 10^-4) @@ -593,14 +570,13 @@ test_that("XY-residual limits are computed correctly", { }) test_that("PLS gives results comparable to other software", { - dataFolder = file.path(system.file("/inst/testdata/", package="mdatools")) - weights <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-weights.csv"), sep = " ", header = FALSE)) + # read model parameters and calibration scores made in PLS_Toolbox - xscores <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xscores.csv"), sep = " ", header = FALSE)) - yscores <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-yscores.csv"), sep = " ", header = FALSE)) - weights <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-weights.csv"), sep = " ", header = FALSE)) - xloadings <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xloadings.csv"), sep = " ", header = FALSE)) + xscores <- as.matrix(read.delim("plstlbx-people-xscores.csv", sep = " ", header = FALSE)) + yscores <- as.matrix(read.delim("plstlbx-people-yscores.csv", sep = " ", header = FALSE)) + weights <- as.matrix(read.delim("plstlbx-people-weights.csv", sep = " ", header = FALSE)) + xloadings <- as.matrix(read.delim("plstlbx-people-xloadings.csv", sep = " ", header = FALSE)) yloadings <- c(5.3643, 1.0338, 0.4675, 0.3567) coeffs <- c(0.2078, 0.2647, 0.0073, 0.0722, -0.0016, 0.1829, 0.1420, -0.1984, 0.2153, 0.0151, -0.0405) @@ -627,10 +603,10 @@ test_that("PLS gives results comparable to other software", { expect_equivalent(selratio(m, 4), sr, tolerance = 10^-1) # compare calibration results - ypred <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-ypred.csv"), sep = " ", header = FALSE)) - xqdist <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xqdist.csv"), sep = " ", header = FALSE)) - xhdist <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xhdist.csv"), sep = " ", header = FALSE)) - yqdist <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-yqdist.csv"), sep = " ", header = FALSE)) + ypred <- as.matrix(read.delim("plstlbx-people-ypred.csv", sep = " ", header = FALSE)) + xqdist <- as.matrix(read.delim("plstlbx-people-xqdist.csv", sep = " ", header = FALSE)) + xhdist <- as.matrix(read.delim("plstlbx-people-xhdist.csv", sep = " ", header = FALSE)) + yqdist <- as.matrix(read.delim("plstlbx-people-yqdist.csv", sep = " ", header = FALSE)) rmsec <- c(1.0273, 0.7404, 0.6668, 0.6198) r2c <- c(0.9283, 0.9627, 0.9698, 0.9739) diff --git a/tests/testthat/test-simpls.R b/tests/testthat/test-simpls.R index d88b910..888a8d8 100644 --- a/tests/testthat/test-simpls.R +++ b/tests/testthat/test-simpls.R @@ -87,11 +87,10 @@ test_that("new algorithm is more numerically stable", { test_that("new algorithm gives results comparable to other software", { # read model parameters made in PLS_Toolbox - dataFolder = file.path(system.file("/inst/testdata/", package="mdatools")) - weights <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-weights.csv"), sep = " ", header = FALSE)) - xloadings <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xloadings.csv"), sep = " ", header = FALSE)) - xscores <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-xscores.csv"), sep = " ", header = FALSE)) - yscores <- as.matrix(read.delim(paste0(dataFolder, "/plstlbx-people-yscores.csv"), sep = " ", header = FALSE)) + weights <- as.matrix(read.delim("plstlbx-people-weights.csv", sep = " ", header = FALSE)) + xloadings <- as.matrix(read.delim("plstlbx-people-xloadings.csv", sep = " ", header = FALSE)) + xscores <- as.matrix(read.delim("plstlbx-people-xscores.csv", sep = " ", header = FALSE)) + yscores <- as.matrix(read.delim("plstlbx-people-yscores.csv", sep = " ", header = FALSE)) yloadings <- c(5.3643, 1.0338, 0.4675, 0.3567) coeffs <- c(0.2078, 0.2647, 0.0073, 0.0722, -0.0016, 0.1829, 0.1420, -0.1984, 0.2153, 0.0151, -0.0405) From ed803dde3dd711fc8c189f6d6f8ec6376a7ee6ab Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 20:44:40 +0200 Subject: [PATCH 24/42] refactoring of prep.savgol() code to solve #104 --- R/prep.R | 31 ++++++++++++------------------- tests/testthat/test-prep.R | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/R/prep.R b/R/prep.R index f91a2dc..1f52c67 100755 --- a/R/prep.R +++ b/R/prep.R @@ -238,11 +238,11 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { # compute generalized factorial genfact <- function(a, b) { - f <- 1; + f <- 1 if ((a - b + 1) > a) return(f) for (i in (a - b + 1):a) { - f <- f * i; + f <- f * i } return(f) @@ -257,28 +257,21 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { return(sum) } - f <- function(data, width, porder, dorder) { + f <- function(x, width, porder, dorder) { - nobj <- nrow(data) - nvar <- ncol(data) - pdata <- matrix(0, ncol = nvar, nrow = nobj) - - m <- (width - 1) / 2 + nvar <- ncol(x) + px <- matrix(0.0, nrow(x), ncol(x)) + m <- round((width - 1) / 2) w <- outer(-m:m, -m:m, function(x, y) weight(x, y, m, porder, dorder)) - n <- ncol(data) - - for (j in seq_len(nobj)) { - for (i in 1:m) { - pdata[j, i] <- convolve(data[j, 1:(2 * m + 1)], w[, i], type = "filter") - pdata[j, n - i + 1] <- convolve(data[j, (n - 2 * m):n], w[, width - i + 1], type = "filter") - } - for (i in (m+1):(n-m)) { - pdata[j, i] <- convolve(data[j, (i - m):(i + m)], w[, m + 1], type = "filter") - } + for (i in seq_len(m)) { + px[, i] = apply(x[, seq_len(2 * m + 1), drop = FALSE], 1, function(xx) convolve(xx, w[, i], type = "filter")[1]) + px[, nvar - i + 1] = apply(x[, (nvar - 2 * m):nvar, drop = FALSE], 1, function(xx) convolve(xx, w[, width - i + 1], type = "filter")[1]) } - return(pdata) + px[, (m+1):(nvar-m)] = apply(x, 1, function(xx) convolve(xx, w[, m + 1], type = "filter")) + + return (px) } return(prep.generic(data, f, width = width, porder = porder, dorder = dorder)) diff --git a/tests/testthat/test-prep.R b/tests/testthat/test-prep.R index 2d1540b..3a9210a 100644 --- a/tests/testthat/test-prep.R +++ b/tests/testthat/test-prep.R @@ -240,6 +240,26 @@ test_that("SavGol smoothing works correctly", { expect_equal(dim(dy3), dim(y)) expect_equivalent(which((abs(dy3) < 10^-12)), zeros) + # check for small numeric sets + x = matrix(c(1, 1, 1, 3, 4, 7, 4, 3, 1, 1, 1.0), nrow = 1) + y = matrix(c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5.0), nrow = 1) + z = matrix(c(4, 6, 4, 6, 4, 6, 4, 6, 4, 6.0), nrow = 1) + + # no derivartive + expect_equivalent(prep.savgol(x, width = 5, porder = 1, dorder = 0), c(0.4, 1.2, 2.0, 3.2, 3.8, 4.2, 3.8, 3.2, 2.0, 1.2, 0.4)) + expect_equivalent(prep.savgol(y, width = 3, porder = 1, dorder = 0), c(5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0)) + expect_equivalent(prep.savgol(z, width = 5, porder = 1, dorder = 0), c(4.8, 4.8, 4.8, 5.2, 4.8, 5.2, 4.8, 5.2, 5.2, 5.2)) + + # first derivartive + expect_equivalent(prep.savgol(x, width = 3, porder = 1, dorder = 1), c(0.0, 0.0, 1.0, 1.5, 2.0, 0.0, -2.0, -1.5, -1.0, 0.0, 0.0)) + expect_equivalent(prep.savgol(y, width = 3, porder = 1, dorder = 1), c(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + expect_equivalent(prep.savgol(z, width = 3, porder = 1, dorder = 1), c(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + + # second derivartive + expect_equivalent(prep.savgol(x, width = 3, porder = 2, dorder = 2), c(0.0, 0.0, 2.0, -1.0, 2.0, -6.0, 2.0, -1.0, 2.0, 0.0, 0.0)) + expect_equivalent(prep.savgol(y, width = 3, porder = 2, dorder = 2), c(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + expect_equivalent(prep.savgol(z, width = 3, porder = 2, dorder = 2), c(-4.0, -4.0, 4.0, -4.0, 4.0, -4.0, 4.0, -4.0, 4.0, 4.0)) + }) context("prep: alsbasecorr") From ba313f86ae284ac92d7fa476674e459142343a8d Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Sun, 10 Jul 2022 20:54:07 +0200 Subject: [PATCH 25/42] changed r-lib actions version to 2.0 --- .github/workflows/test-package.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml index a65e9af..b0542b2 100644 --- a/.github/workflows/test-package.yml +++ b/.github/workflows/test-package.yml @@ -33,11 +33,11 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 - name: Query dependencies run: | From c4904bff856524b22fb45b3a27325f6783f6bc40 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Mon, 11 Jul 2022 15:49:35 +0200 Subject: [PATCH 26/42] further improvements to prep.alsbasecorr(), solves #105 --- R/prep.R | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/R/prep.R b/R/prep.R index 1f52c67..8ef9a68 100755 --- a/R/prep.R +++ b/R/prep.R @@ -407,28 +407,27 @@ prep.alsbasecorr <- function(data, plambda = 5, p = 0.1, max.niter = 10) { attrs <- mda.getattr(data) dimnames <- dimnames(data) - baseline <- function(y) { - - m <- length(y) - D <- Matrix::Matrix(diff(diag(m), difference = 2), sparse = TRUE) - LDD <- (10^plambda) * Matrix::crossprod(D) - w <- Matrix::Matrix(1, nrow = m, ncol = 1, sparse = TRUE) - - for (i in seq_len(max.niter)) { + m <- ncol(data) + baseline <- matrix(0, nrow(data), ncol(data)) + LDD <- Matrix::Matrix((10^plambda) * crossprod(diff(diag(m), difference = 2)), sparse = TRUE) + w.ini <- matrix(rep(1, m)) + + for (i in seq_len(nrow(data))) { + y <- data[i, ] + w <- w.ini + for (j in seq_len(max.niter)) { W <- Matrix::Diagonal(x = as.numeric(w)) - z <- Matrix::solve(W + LDD, w * y) + z <- Matrix::solve(as(W + LDD, 'dgCMatrix'), w * y, sparse = TRUE) w.old <- w w <- p * (y > z) + (1 - p) * (y < z) - if (sum(abs(w - w.old)) < 10^-12) { - break - } + if (sum(abs(w - w.old)) < 10^-10) break } - return(as.numeric(z)) + baseline[i, ] <- as.numeric(z) } - pspectra <- t(apply(data, 1, function(x) x - baseline(x))) + pspectra <- data - baseline pspectra <- mda.setattr(pspectra, attrs) dimnames(pspectra) <- dimnames From 534a1fe29489734a5bd4e067f85978a4a10fe58d Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Mon, 11 Jul 2022 16:12:21 +0200 Subject: [PATCH 27/42] updated date --- LICENSE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE.md b/LICENSE.md index 7cf9f2d..1dfac4a 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,4 +1,4 @@ -Copyright (c) 2016-2021 Sergey Kucheryavskiy +Copyright (c) 2016-2022 Sergey Kucheryavskiy Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the From 7afbff5e54e42fd800e0b20f33d39d4bbb1a8585 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Mon, 11 Jul 2022 16:12:55 +0200 Subject: [PATCH 28/42] updated imports --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 0215c11..9b48d18 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -351,6 +351,7 @@ importFrom(graphics,rasterImage) importFrom(graphics,rect) importFrom(graphics,segments) importFrom(graphics,text) +importFrom(methods,as) importFrom(methods,show) importFrom(stats,convolve) importFrom(stats,cor) From df2a8cc31fbf6bb36ccf773611bc79bdde817063 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Mon, 11 Jul 2022 16:13:23 +0200 Subject: [PATCH 29/42] updated roxygen version and version is bumped to 0.13.0 --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cbcee84..2af3c12 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mdatools Title: Multivariate Data Analysis for Chemometrics -Version: 0.12.1 -Date: 2021-10-16 +Version: 0.13.0 +Date: 2022-07-09 Author: Sergey Kucheryavskiy () Maintainer: Sergey Kucheryavskiy Description: Projection based methods for preprocessing, @@ -10,7 +10,7 @@ Description: Projection based methods for preprocessing, Encoding: UTF-8 License: MIT + file LICENSE Imports: methods, graphics, grDevices, stats, Matrix -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 Suggests: testthat NeedsCompilation: no Packaged: 2019-05-24 11:03:33 UTC; svkucheryavski From 726a53f075be1b29682c33615efee14ff04982f0 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Mon, 11 Jul 2022 16:13:35 +0200 Subject: [PATCH 30/42] added news about new release --- NEWS.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 954ba32..d1c5c32 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,21 @@ -v. 0.12.1 +v. 0.13.0 ========= +This release brings an updated implementation of PLS algorithm (SIMPLS) which is more numerically stable and gives sufficiently less warnings about using too many components in case when you work with small y-values. The speed of `pls()` method in general has been also improved. -* fixed a bug in `vipscores()` which could lead to a bit higher values for PLS2 models. +Another important thing is that cross-validation of regression and classification models has been re-written towards more simple solution and now you can also use your own custom splits by providing a vector with segment indices associated with each measurement. For example if you run PLS with parameter `cv = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2)` it is assumed that you want to use venetian blinds split with four segments and your dataset has 10 measurements. See more details in the tutorial, where description of cross-validation procedure has been moved to a separate section. + +Other changes and improvements: + +* Refactoring and improvements of `prep.savgol()` code made the method much faster (up to 50-60 times faster for datasets with many measurements). + +* Refactoring and improvements of `prep.alsbasecorr()` code made the method 2-3 times faster especially for large datasets. * added [PQN](https://doi.org/10.1021/ac051632c) normalization method to `prep.norm()` function. +* fixed a bug in `vipscores()` which could lead to a bit higher values for PLS2 models. + +* fixes to several small bugs and general improvements. + v.0.12.0 ======== From 1c171ee18d9e0148d9e2320dd169200274444543 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Mon, 11 Jul 2022 16:13:53 +0200 Subject: [PATCH 31/42] updated according to new version --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4b0f0f2..ca0c67d 100755 --- a/README.md +++ b/README.md @@ -19,16 +19,16 @@ If you want to cite the package, please use the following: Sergey Kucheryavskiy, What is new ----------- -Latest release (0.12.0) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. +Latest release (0.13.0) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. How to install -------------- -The package is available from CRAN by usual installing procedure. However, due to restrictions in CRAN politics regarding number of submissions (one in 3-4 month), mostly major releases will be published there (with 2-3 weeks delay after GitHub release as more thorough testing is needed). You can [download](https://github.com/svkucheryavski/mdatools/releases) a zip-file with source package and install it using the `install.packages` command, e.g. if the downloaded file is `mdatools_0.12.0.tar.gz` and it is located in a current working directory, just run the following: +The package is available from CRAN by usual installing procedure. However, due to restrictions in CRAN politics regarding number of submissions (one in 3-4 month), mostly major releases will be published there (with 2-3 weeks delay after GitHub release as more thorough testing is needed). You can [download](https://github.com/svkucheryavski/mdatools/releases) a zip-file with source package and install it using the `install.packages` command, e.g. if the downloaded file is `mdatools_0.13.0.tar.gz` and it is located in a current working directory, just run the following: ``` -install.packages("mdatools_0.12.0.tar.gz") +install.packages("mdatools_0.13.0.tar.gz") ``` If you have `devtools` package installed, the following command will install the current developer version from the master branch of GitHub repository (do not forget to load the `devtools` package first): From 581c72bc4e4832466227f6767b97611021f36208 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Mon, 11 Jul 2022 16:14:02 +0200 Subject: [PATCH 32/42] fixed a typo in help text --- R/prep.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/prep.R b/R/prep.R index 8ef9a68..fae668d 100755 --- a/R/prep.R +++ b/R/prep.R @@ -362,7 +362,7 @@ prep.ref2km <- function(data) { return(prep.generic(data, f)) } -#' Baseline correction using assymetric least squares +#' Baseline correction using asymetric least squares #' #' @param data #' matrix with spectra (rows correspond to individual spectra) @@ -401,6 +401,7 @@ prep.ref2km <- function(data) { #' } #' #' @importFrom Matrix Matrix Diagonal +#' @importFrom methods as #' #' @export prep.alsbasecorr <- function(data, plambda = 5, p = 0.1, max.niter = 10) { From eae8610a9c866aabfad62b3486cf6e14aaab887b Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Mon, 11 Jul 2022 20:33:40 +0200 Subject: [PATCH 33/42] corrections to help text --- man/prep.alsbasecorr.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/prep.alsbasecorr.Rd b/man/prep.alsbasecorr.Rd index b34d0f0..c78f1e3 100644 --- a/man/prep.alsbasecorr.Rd +++ b/man/prep.alsbasecorr.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/prep.R \name{prep.alsbasecorr} \alias{prep.alsbasecorr} -\title{Baseline correction using assymetric least squares} +\title{Baseline correction using asymetric least squares} \usage{ prep.alsbasecorr(data, plambda = 5, p = 0.1, max.niter = 10) } @@ -19,7 +19,7 @@ prep.alsbasecorr(data, plambda = 5, p = 0.1, max.niter = 10) preprocessed spectra. } \description{ -Baseline correction using assymetric least squares +Baseline correction using asymetric least squares } \details{ The function implements baseline correction algorithm based on Whittaker smoother. The method From 0704579c83c0c3132a3776b96f281808724858c8 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Tue, 12 Jul 2022 07:49:06 +0200 Subject: [PATCH 34/42] added plotRMSERatio method --- NAMESPACE | 2 ++ R/defaults.R | 11 +++++++++ R/regmodel.R | 43 ++++++++++++++++++++++++++++++++++ man/plotRMSERatio.Rd | 16 +++++++++++++ man/plotRMSERatio.regmodel.Rd | 21 +++++++++++++++++ tests/testthat/test-regmodel.R | 18 ++++++++++++++ 6 files changed, 111 insertions(+) create mode 100644 man/plotRMSERatio.Rd create mode 100644 man/plotRMSERatio.regmodel.Rd diff --git a/NAMESPACE b/NAMESPACE index 9b48d18..0905b0f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -62,6 +62,7 @@ S3method(plotPuritySpectra,mcrpure) S3method(plotRMSE,ipls) S3method(plotRMSE,regmodel) S3method(plotRMSE,regres) +S3method(plotRMSERatio,regmodel) S3method(plotRegcoeffs,regmodel) S3method(plotResiduals,ldecomp) S3method(plotResiduals,pca) @@ -264,6 +265,7 @@ export(plotPurity) export(plotPuritySpectra) export(plotQDoF) export(plotRMSE) +export(plotRMSERatio) export(plotRegcoeffs) export(plotResiduals) export(plotScatter) diff --git a/R/defaults.R b/R/defaults.R index 594fae8..b6fddbd 100755 --- a/R/defaults.R +++ b/R/defaults.R @@ -1,4 +1,15 @@ +#' Plot for ratio RMSEC/RMSECV vs RMSECV +#' @param obj +#' object with any regression model +#' @param ... +#' other parameters +#' +#' @export +plotRMSERatio <- function(obj, ...) { + UseMethod("plotRMSERatio") +} + #' Plot purity spectra #' @param obj #' object with mcr pure case diff --git a/R/regmodel.R b/R/regmodel.R index a23c417..d86331d 100755 --- a/R/regmodel.R +++ b/R/regmodel.R @@ -301,6 +301,49 @@ plotRMSE.regmodel <- function(obj, ny = 1, type = "b", labels = "values", mdaplotg(plot_data, type = type, xticks = xticks, labels = labels, ylab = ylab, ...) } + +#' RMSECV/RMSEC ratio plot for regression model +#' +#' @description +#' Shows plot with RMSECV/RMSEC values vs. RMSECV for each component. +#' +#' @param obj +#' a regression model (object of class \code{regmodel}) +#' @param ny +#' number of response variable to make the plot for (if y is multivariate) +#' @param type +#' type of the plot (use only "b" or "l") +#' @param show.labels +#' logical, show or not labels for plot points +#' @param labels +#' vector with point labels (by default number of components) +#' @param main +#' main plot title +#' @param xlab +#' label for x-axis +#' @param ylab +#' label for y-axis +#' @param ... +#' other plot parameters (see \code{mdaplot} for details) +#' +#' @export +plotRMSERatio.regmodel <- function(obj, ny = 1, type = "b", show.labels = TRUE, labels = seq_len(obj$ncomp), + main = paste0("RMSECV/RMSEC ratio (", obj$res$cal$respnames[ny], ")"), + ylab = "RMSECV/RMSEC ratio", + xlab = "RMSECV", ...) { + + stopifnot("Cross-validation results are not found." = !is.null(obj$res$cv)) + stopifnot("Parameter 'ny' has a wrong value." = (length(ny) == 1 && ny >= 1 && ny <= nrow(obj$res$cal$rmse))) + + plot_data <- matrix(obj$res$cv$rmse[ny, ]/obj$res$cal$rmse[ny, ], nrow = 1) + attr(plot_data, "xaxis.values") <- obj$res$cv$rmse[ny, ] + attr(plot_data, "xaxis.name") <- xlab + + mdaplot(plot_data, type = type, xlab = xlab, ylab = ylab, main = main, show.labels = show.labels, + labels = labels,...) +} + + #' Predictions plot for regression model #' #' @description diff --git a/man/plotRMSERatio.Rd b/man/plotRMSERatio.Rd new file mode 100644 index 0000000..45a1d2b --- /dev/null +++ b/man/plotRMSERatio.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/defaults.R +\name{plotRMSERatio} +\alias{plotRMSERatio} +\title{Plot for ratio RMSEC/RMSECV vs RMSECV} +\usage{ +plotRMSERatio(obj, ...) +} +\arguments{ +\item{obj}{object with any regression model} + +\item{...}{other parameters} +} +\description{ +Plot for ratio RMSEC/RMSECV vs RMSECV +} diff --git a/man/plotRMSERatio.regmodel.Rd b/man/plotRMSERatio.regmodel.Rd new file mode 100644 index 0000000..2312c24 --- /dev/null +++ b/man/plotRMSERatio.regmodel.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/regmodel.R +\name{plotRMSERatio.regmodel} +\alias{plotRMSERatio.regmodel} +\title{RMSE plot for regression model} +\usage{ +\method{plotRMSERatio}{regmodel}( + obj, + ny = 1, + type = "b", + xlab = "RMSECV/RMSEC ratio", + ylab = "RMSECV", + ... +) +} +\arguments{ +\item{obj}{a regression model (object of class \code{regmodel})} +} +\description{ +Shows plot with root mean squared error values vs. number of components for PLS model. +} diff --git a/tests/testthat/test-regmodel.R b/tests/testthat/test-regmodel.R index 3706636..cec5352 100644 --- a/tests/testthat/test-regmodel.R +++ b/tests/testthat/test-regmodel.R @@ -196,3 +196,21 @@ test_that("text outcome works fine", { expect_output(summary(m, ny = 1, ncomp = 1, res = list("xres" = m$res$cal))) expect_output(print(m)) }) + +test_that("RMSE ratio plot works fine", { + data(simdata) + X <- simdata$spectra.c + Y <- simdata$conc.c + m1 <- pls(X, Y, 11, cv = 1) + + par(mfrow = c(2, 2)) + expect_silent(plotRMSERatio(m1)) + expect_silent(plotRMSERatio(m1, ny = 2)) + expect_silent(plotRMSERatio(m1, ny = 3)) + expect_silent(plotRMSERatio(m1, ny = 3, col = "red", pch = 3, lty = 2, lwd = 2)) + + expect_error(plotRMSERatio(m1, 0)) + expect_error(plotRMSERatio(m1, 4)) + expect_error(plotRMSERatio(m1, c(1, 2))) + +}) \ No newline at end of file From d7eeb7da97a57761cc3401cad840513b252f2316 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Tue, 12 Jul 2022 07:56:26 +0200 Subject: [PATCH 35/42] updated help text and news --- NEWS.md | 1 + R/pls.R | 1 + man/plotRMSERatio.regmodel.Rd | 27 +++++++++++++++++++++++---- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index d1c5c32..e364292 100755 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,7 @@ Other changes and improvements: * Refactoring and improvements of `prep.alsbasecorr()` code made the method 2-3 times faster especially for large datasets. +* added new plotting method `plotRMSERatio()` for regression models (inspired by [this post](https://eigenvector.com/%EF%BF%BCevaluating-models-hating-on-r-squared/) by Barry M. Wise) * added [PQN](https://doi.org/10.1021/ac051632c) normalization method to `prep.norm()` function. * fixed a bug in `vipscores()` which could lead to a bit higher values for PLS2 models. diff --git a/R/pls.R b/R/pls.R index 473e795..a715afc 100755 --- a/R/pls.R +++ b/R/pls.R @@ -125,6 +125,7 @@ #' \tabular{ll}{ #' \code{\link{plotPredictions.regmodel}} \tab shows predicted vs. measured plot.\cr #' \code{\link{plotRMSE.regmodel}} \tab shows RMSE plot.\cr +#' \code{\link{plotRMSERatio.regmodel}} \tab shows plot for ratio RMSECV/RMSEC values.\cr #' \code{\link{plotYResiduals.regmodel}} \tab shows residuals plot for y values.\cr #' \code{\link{getRegcoeffs.regmodel}} \tab returns matrix with regression coefficients.\cr #' } diff --git a/man/plotRMSERatio.regmodel.Rd b/man/plotRMSERatio.regmodel.Rd index 2312c24..73d0293 100644 --- a/man/plotRMSERatio.regmodel.Rd +++ b/man/plotRMSERatio.regmodel.Rd @@ -2,20 +2,39 @@ % Please edit documentation in R/regmodel.R \name{plotRMSERatio.regmodel} \alias{plotRMSERatio.regmodel} -\title{RMSE plot for regression model} +\title{RMSECV/RMSEC ratio plot for regression model} \usage{ \method{plotRMSERatio}{regmodel}( obj, ny = 1, type = "b", - xlab = "RMSECV/RMSEC ratio", - ylab = "RMSECV", + show.labels = TRUE, + labels = seq_len(obj$ncomp), + main = paste0("RMSECV/RMSEC ratio (", obj$res$cal$respnames[ny], ")"), + ylab = "RMSECV/RMSEC ratio", + xlab = "RMSECV", ... ) } \arguments{ \item{obj}{a regression model (object of class \code{regmodel})} + +\item{ny}{number of response variable to make the plot for (if y is multivariate)} + +\item{type}{type of the plot (use only "b" or "l")} + +\item{show.labels}{logical, show or not labels for plot points} + +\item{labels}{vector with point labels (by default number of components)} + +\item{main}{main plot title} + +\item{ylab}{label for y-axis} + +\item{xlab}{label for x-axis} + +\item{...}{other plot parameters (see \code{mdaplot} for details)} } \description{ -Shows plot with root mean squared error values vs. number of components for PLS model. +Shows plot with RMSECV/RMSEC values vs. RMSECV for each component. } From 7a92dfd39896080f9fed132d2928b33ba4712566 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Tue, 12 Jul 2022 13:46:35 +0200 Subject: [PATCH 36/42] fixed minor issues found by linter --- R/classres.R | 10 ++++---- R/crossval.R | 8 +++--- R/ipls.R | 10 ++++---- R/ldecomp.R | 5 ++-- R/mcrpure.R | 9 +++---- R/mdaplot.R | 7 +++--- R/mdaplotg.R | 2 +- R/misc.R | 8 +++--- R/pca.R | 11 ++++---- R/pcv.R | 11 ++++---- R/plotseries.R | 9 ++----- R/pls.R | 50 +++++++++++++++++++------------------ R/plsdares.R | 2 +- R/plsres.R | 2 +- R/prep.R | 24 ++++++++++-------- R/randtest.R | 8 +++--- R/regcoeffs.R | 4 +-- R/regmodel.R | 6 ++--- R/regres.R | 6 ++--- R/simca.R | 4 +-- R/simcam.R | 2 +- man/mdaplot.formatValues.Rd | 2 +- man/plotXScores.pls.Rd | 2 +- man/plotXYScores.pls.Rd | 2 +- man/pls.Rd | 1 + man/randtest.Rd | 4 +-- 26 files changed, 104 insertions(+), 105 deletions(-) diff --git a/R/classres.R b/R/classres.R index 064382a..8581837 100755 --- a/R/classres.R +++ b/R/classres.R @@ -324,7 +324,7 @@ classres.getPerformance <- function(c.ref, c.pred) { dim(c.ref) <- NULL attrs <- mda.getattr(c.pred) if (length(attrs$exclrows) > 0) { - c.pred <- c.pred[-attrs$exclrows, , , drop = F] + c.pred <- c.pred[-attrs$exclrows, , , drop = FALSE] c.ref <- c.ref[-attrs$exclrows] } @@ -339,10 +339,10 @@ classres.getPerformance <- function(c.ref, c.pred) { # compute main performance indicators classnames <- dimnames(c.pred)[[3]] for (i in seq_len(nclasses)) { - fn[i, ] <- colSums((c.ref == classnames[i]) & (c.pred[, , i, drop = F] == -1)) - fp[i, ] <- colSums((c.ref != classnames[i]) & (c.pred[, , i, drop = F] == 1)) - tp[i, ] <- colSums((c.ref == classnames[i]) & (c.pred[, , i, drop = F] == 1)) - tn[i, ] <- colSums((c.ref != classnames[i]) & (c.pred[, , i, drop = F] == -1)) + fn[i, ] <- colSums((c.ref == classnames[i]) & (c.pred[, , i, drop = FALSE] == -1)) + fp[i, ] <- colSums((c.ref != classnames[i]) & (c.pred[, , i, drop = FALSE] == 1)) + tp[i, ] <- colSums((c.ref == classnames[i]) & (c.pred[, , i, drop = FALSE] == 1)) + tn[i, ] <- colSums((c.ref != classnames[i]) & (c.pred[, , i, drop = FALSE] == -1)) } # compute main statistics diff --git a/R/crossval.R b/R/crossval.R index d238cec..6debe23 100755 --- a/R/crossval.R +++ b/R/crossval.R @@ -81,7 +81,7 @@ crossval <- function(cv = 1, nobj = NULL, resp = NULL) { if (is.null(nobj)) nobj <- length(resp) # if user already provided matrix with values - return it - if (is.numeric(cv) && length(cv) == nobj) return (as.matrix(cv)) + if (is.numeric(cv) && length(cv) == nobj) return(as.matrix(cv)) p <- crossval.getParams(cv = cv, nobj = nobj) if (!(p$type %in% c("rand", "ven", "loo"))) { @@ -99,16 +99,16 @@ crossval <- function(cv = 1, nobj = NULL, resp = NULL) { } if (p$type == "loo") { - return (matrix(seq_len(nobj), ncol = 1)) + return(matrix(seq_len(nobj), ncol = 1)) } if (p$type == "rand") { - return (sapply(seq_len(p$nrep), function(i) rep(seq_len(p$nseg), length.out = nobj)[sample(nobj)])) + return(sapply(seq_len(p$nrep), function(i) rep(seq_len(p$nseg), length.out = nobj)[sample(nobj)])) } if (p$type == "ven") { ind <- if (is.null(resp)) seq_len(nobj) else order(resp) - return (matrix(rep(seq_len(p$nseg), length.out = nobj)[ind], ncol = 1)) + return(matrix(rep(seq_len(p$nseg), length.out = nobj)[ind], ncol = 1)) } stop("Something went wrong.") diff --git a/R/ipls.R b/R/ipls.R index 2c8b22a..5e32b80 100644 --- a/R/ipls.R +++ b/R/ipls.R @@ -165,7 +165,7 @@ ipls <- function(x, y, glob.ncomp = 10, center = TRUE, scale = FALSE, cv = list( if (ncol(y) > 1) { warning("iPLS can work with one y-variable at time, selecting first column.") - y <- y[, 1, drop = F] + y <- y[, 1, drop = FALSE] } # add name to the single column @@ -244,7 +244,7 @@ ipls <- function(x, y, glob.ncomp = 10, center = TRUE, scale = FALSE, cv = list( "n" = 0, "start" = 1, "end" = ncol(x), - "selected" = F, + "selected" = FALSE, "nComp" = obj$gm$ncomp.selected, "RMSE" = gmres$rmse[1, obj$gm$ncomp.selected], "R2" = gmres$r2[1, obj$gm$ncomp.selected] @@ -349,7 +349,7 @@ ipls.forward <- function(x, y, obj, int.stat, glob.stat) { # combine already selected intervals with the current ind <- obj$int.limits[l, 1]:obj$int.limits[l, 2] xc <- x[, c(selind, ind), drop = FALSE] - xt <- if(!is.null(obj$x.test)) obj$x.test[, c(selind, ind), drop = FALSE] else NULL + xt <- if (!is.null(obj$x.test)) obj$x.test[, c(selind, ind), drop = FALSE] else NULL # build a model m <- pls(xc, y, ncomp = obj$int.ncomp, center = obj$center, scale = obj$scale, cv = obj$cv, @@ -453,7 +453,7 @@ ipls.backward <- function(x, y, obj, int.stat, glob.stat) { # combine already selected intervals with the current xc <- x[, -c(unselind, ind), drop = FALSE] - xt <- if(!is.null(obj$x.test)) obj$x.test[, -c(unselind, ind), drop = FALSE] else NULL + xt <- if (!is.null(obj$x.test)) obj$x.test[, -c(unselind, ind), drop = FALSE] else NULL # build a model m <- pls(xc, y, ncomp = obj$int.ncomp, center = obj$center, scale = obj$scale, cv = obj$cv, @@ -479,7 +479,7 @@ ipls.backward <- function(x, y, obj, int.stat, glob.stat) { "n" = l, "start" = obj$int.limits[l, 1], "end" = obj$int.limits[l, 2], - "selected" = F, + "selected" = FALSE, "nComp" = m$ncomp.selected, "RMSE" = lres$rmse[1, m$ncomp.selected], "R2" = lres$r2[1, m$ncomp.selected] diff --git a/R/ldecomp.R b/R/ldecomp.R index 441a39c..8b9777b 100755 --- a/R/ldecomp.R +++ b/R/ldecomp.R @@ -422,7 +422,6 @@ ldecomp.getVariances <- function(scores, loadings, residuals, Q) { ldecomp.getDistances <- function(scores, loadings, residuals, eigenvals) { # get names and attributes - rows_excluded <- attr(scores, "exclrows") cols_excluded <- attr(loadings, "exclrows") # get sizes @@ -504,7 +503,7 @@ jm.crit <- function(residuals, eigenvals, alpha = 0.05, gamma = 0.01) { t2 <- rev(cumsum(rev(eigenvals)^2))[seq_len(ncomp)] t3 <- rev(cumsum(rev(eigenvals)^3))[seq_len(ncomp)] - h0 <- 1 - 2 * t1 * t3 / 3 / (t2^2); + h0 <- 1 - 2 * t1 * t3 / 3 / (t2^2) ifelse(h0 < 0.001, h0 <- 0.001, h0) # inverse error function @@ -548,7 +547,7 @@ jm.prob <- function(u, eigenvals, ncomp) { t2 <- rev(cumsum(rev(eigenvals)^2))[ncomp] t3 <- rev(cumsum(rev(eigenvals)^3))[ncomp] - h0 <- 1 - 2 * t1 * t3 / 3 / (t2^2); + h0 <- 1 - 2 * t1 * t3 / 3 / (t2^2) ifelse(h0 < 0.001, h0 <- 0.001, h0) h1 <- (u / t1)^h0 diff --git a/R/mcrpure.R b/R/mcrpure.R index f7103ad..19a44cd 100644 --- a/R/mcrpure.R +++ b/R/mcrpure.R @@ -180,14 +180,16 @@ unmix.mcrpure <- function(obj, x) { # resolve spectra and concentrations St <- tryCatch( t(x) %*% Dr %*% solve(crossprod(Dr)), - error = function(e) + error = function(e) { stop("Unable to resolve the components, perhaps 'ncomp' is too large.", call. = FALSE) + } ) Ct <- tryCatch( x %*% St %*% solve(crossprod(St)), - error = function(e) + error = function(e) { stop("Unable to resolve the components, perhaps 'ncomp' is too large.", call. = FALSE) + } ) # scale @@ -293,8 +295,6 @@ summary.mcrpure <- function(object, ...) { fprintf("\nInfo:\n%s\n", object$info) } - drvstr <- c("no", "only for estimation of pure variables", "for pure variables and unmixing") - if (length(object$exclrows) > 0) { fprintf("Excluded rows: %d\n", length(object$exclrows)) } @@ -357,7 +357,6 @@ getPureVariables <- function(D, ncomp, purevars, offset) { nspec <- nrow(D) nvar <- ncol(D) nspecvis <- nspec - length(exclrows) - nvarvis <- nvar - length(exclcols) # get indices for excluded columns if provided colind <- seq_len(nvar) diff --git a/R/mdaplot.R b/R/mdaplot.R index b11cb17..2c2c221 100755 --- a/R/mdaplot.R +++ b/R/mdaplot.R @@ -28,7 +28,7 @@ mdaplot.areColors <- function(palette) { #' @return #' matrix with formatted values #' -mdaplot.formatValues <- function(data, round.only = F, digits = 3) { +mdaplot.formatValues <- function(data, round.only = FALSE, digits = 3) { # if values are not numeric - return as is if (!is.numeric(data[1])) return(data) @@ -152,7 +152,7 @@ mdaplot.showColorbar <- function(cgroup, colmap = "default", lab.col = "darkgray shift <- shift * w * 0.02 # 2 percent of segment width x <- lim[1] + dx * 0.1 - y <- lim[4] - (h + 0.1 * h); + y <- lim[4] - (h + 0.1 * h) # show colorbar and define coordinates for labels for (i in seq_len(ncol)) { @@ -168,8 +168,7 @@ mdaplot.showColorbar <- function(cgroup, colmap = "default", lab.col = "darkgray } # show labels for colorbar regions - text(labels[, 1], labels[, 2], labels = rownames(labels), pos = 1, col = lab.col, - cex = lab.cex) + text(labels[, 1], labels[, 2], labels = rownames(labels), pos = 1, col = lab.col, cex = lab.cex) } #' Plot lines diff --git a/R/mdaplotg.R b/R/mdaplotg.R index f19cb85..750c907 100755 --- a/R/mdaplotg.R +++ b/R/mdaplotg.R @@ -476,7 +476,7 @@ mdaplotg <- function( # use mdaplot with show.axes = FALSE to create the plot mdaplot(ps = ps[[i]], type = type[i], col = col[i], pch = pch[i], lty = lty[i], lwd = lwd[i], cex = cex[i], force.x.values = force.x.values, bwd = bwd, - show.grid = F, show.labels = show.labels, opacity = opacity[i], + show.grid = FALSE, show.labels = show.labels, opacity = opacity[i], lab.col = lab.col[i], lab.cex = lab.cex, show.axes = FALSE, show.excluded = show.excluded, ... ) diff --git a/R/misc.R b/R/misc.R index a976aa6..d50ee40 100755 --- a/R/misc.R +++ b/R/misc.R @@ -201,12 +201,12 @@ mda.subset <- function(x, subset = NULL, select = NULL) { attrs <- mda.getattr(x) if (!is.null(subset)) { - if (is.logical(subset) & !is.null(attrs$exclrows)) + if (is.logical(subset) && !is.null(attrs$exclrows)) subset <- subset[-attrs$exclrows] # remove excluded rows first if (!is.null(attrs$exclrows)) - x <- x[-attrs$exclrows, , drop = F] + x <- x[-attrs$exclrows, , drop = FALSE] # get numeric indices for the rows and subset them subset <- mda.getexclind(subset, rownames(x), nrow(x)) @@ -227,11 +227,11 @@ mda.subset <- function(x, subset = NULL, select = NULL) { # remove excluded rows first if (!is.null(attrs$exclcols)) - x <- x[, -attrs$exclcols, drop = F] + x <- x[, -attrs$exclcols, drop = FALSE] # get numeric indices for the rows and subset them select <- mda.getexclind(select, colnames(x), ncol(x)) - x <- x[, select, drop = F] + x <- x[, select, drop = FALSE] # correct attributes if (!is.null(attrs$xaxis.values)) { diff --git a/R/pca.R b/R/pca.R index 9c20fd9..6deb79e 100755 --- a/R/pca.R +++ b/R/pca.R @@ -666,7 +666,7 @@ pca.mvreplace <- function(x, center = TRUE, scale = FALSE, maxncomp = 10, expvar } if (center) { - gmean <- attr(xo, "scaled:center"); + gmean <- attr(xo, "scaled:center") } n <- 1 @@ -697,7 +697,7 @@ pca.mvreplace <- function(x, center = TRUE, scale = FALSE, maxncomp = 10, expvar x_new <- tcrossprod(scores, loadings) # remove centering - x_new <- sweep(x_new, 2L, lmean, "+", check.margin = F) + x_new <- sweep(x_new, 2L, lmean, "+", check.margin = FALSE) # replace missing values by the calculated x <- xo @@ -831,7 +831,7 @@ pca.nipals <- function(x, ncomp = min(ncol(x), nrow(x) - 1), tol = 10^-10) { E <- x for (i in seq_len(ncomp)) { ind <- which.max(apply(E, 2, sd)) - t <- E[, ind, drop = F] + t <- E[, ind, drop = FALSE] tau <- th <- 99999999 while (th > tol * tau) { p <- crossprod(E, t) / as.vector(crossprod(t)) @@ -1211,7 +1211,8 @@ plotResiduals.pca <- function(obj, ncomp = obj$ncomp.selected, log = FALSE, #' See examples in help for \code{\link{pca}} function. #' #' @export -plotLoadings.pca <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, type = (if (length(comp == 2)) "p" else "l"), +plotLoadings.pca <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, + type = (if (length(comp == 2)) "p" else "l"), show.legend = TRUE, show.axes = TRUE, ...) { if (min(comp) < 1 || max(comp) > ncol(obj$loadings)) { @@ -1292,7 +1293,7 @@ plotBiplot.pca <- function(obj, comp = c(1, 2), pch = c(16, NA), col = mdaplot.g main = main, colmap = col, show.lines = show.lines, show.excluded = show.excluded, ...) if (show.excluded && length(attr(loadings, "exclrows")) > 0) { - loadings <- loadings[-attr(loadings, "exclrows"), , drop = F] + loadings <- loadings[-attr(loadings, "exclrows"), , drop = FALSE] } segments(0, 0, loadings[, 1], loadings[, 2], col = col[2], lty = lty, lwd = lwd) diff --git a/R/pcv.R b/R/pcv.R index 96c33ab..b064be1 100644 --- a/R/pcv.R +++ b/R/pcv.R @@ -25,7 +25,7 @@ #' Pseudo-validation matrix (IxJ) #' #' @export -pcv <- function(x, ncomp = min(round(nrow(x)/nseg) - 1, col(x), 20), nseg = 4, scale = FALSE) { +pcv <- function(x, ncomp = min(round(nrow(x) / nseg) - 1, col(x), 20), nseg = 4, scale = FALSE) { # keep names if any attrs <- attributes(x) @@ -56,7 +56,7 @@ pcv <- function(x, ncomp = min(round(nrow(x)/nseg) - 1, col(x), 20), nseg = 4, s # split data to calibration and validation x.c <- x[-ind[, k], , drop = FALSE] - x.k <- x[ ind[, k], , drop = FALSE] + x.k <- x[ind[, k], , drop = FALSE] # get loadings for local model and rotation matrix between global and local models P.k <- svd(x.c, nv = ncomp)$v[, seq_len(ncomp), drop = FALSE] @@ -93,8 +93,8 @@ pcv <- function(x, ncomp = min(round(nrow(x)/nseg) - 1, col(x), 20), nseg = 4, s #' @return #' Rotation matrix (JxJ) getR <- function(base1, base2) { - base1 <- as.matrix(base1); - base2 <- as.matrix(base2); + base1 <- as.matrix(base1) + base2 <- as.matrix(base2) R1 <- rotationMatrixToX1(base1[, 1]) R2 <- rotationMatrixToX1(base2[, 1]) @@ -122,7 +122,7 @@ getR <- function(base1, base2) { R <- crossprod(R2, (M %*% R1)) } - return(R); + return(R) } #' Creates a rotation matrix to map a vector x to [1 0 0 ... 0] @@ -174,4 +174,3 @@ eye <- function(n) { diag(X) <- 1 return(X) } - diff --git a/R/plotseries.R b/R/plotseries.R index c77b112..4fba984 100644 --- a/R/plotseries.R +++ b/R/plotseries.R @@ -210,7 +210,7 @@ splitPlotData <- function(data, type) { # 0.12.0: yaxis.name must not be used as axis label in line and bar plots if (type %in% c("b", "l", "e", "h")) { - attrs$yaxis.name = NULL + attrs$yaxis.name <- NULL } # prepare x-axis values for other types of plots @@ -315,7 +315,7 @@ getPlotColors <- function(ps, col, opacity, cgroup, colmap) { getConfidenceEllipse <- function(points, conf.level = 0.95, n = 100) { # compute igen vectors and values - e <- eigen(cov(points)); + e <- eigen(cov(points)) # get angle between the x-axis and the largest eigenvector phi <- atan2(e$vectors[[2]], e$vectors[[1]]) @@ -647,11 +647,6 @@ plotBars <- function(ps, col = ps$col, bwd = 0.8, border = NA, force.x.values = y <- ps$y_values[1, ] if (length(x) > 1) { - # this gives variable width for bars and does not work well - #bwd_left <- c(x[seq(2, length(x))] - x[seq(1, length(x) - 1)]) - #bwd_right <- -c(x[seq(1, length(x) - 1)] - x[seq(2, length(x))]) - #bwd_left <- c(bwd_left[1], bwd_left) * bwd / 2 - #bwd_right <- c(bwd_right, bwd_right[length(bwd_right)]) * bwd / 2 dx <- min(diff(x)) bwd_right <- bwd_left <- dx * bwd / 2 } else { diff --git a/R/pls.R b/R/pls.R index a715afc..9cb9cc7 100755 --- a/R/pls.R +++ b/R/pls.R @@ -526,9 +526,10 @@ setDistanceLimits.pls <- function(obj, lim.type = obj$lim.type, alpha = obj$alph #' vector with names used for components #' #' @return array with predicted y-values -pls.getpredictions <- function(x, coeffs, ycenter, yscale, ynames = NULL, y.attrs = NULL, objnames = NULL, compnames = NULL) { +pls.getpredictions <- function(x, coeffs, ycenter, yscale, ynames = NULL, y.attrs = NULL, objnames = NULL, + compnames = NULL) { - yp <- apply(coeffs, 3, function(x, y)(y %*% x), x) + yp <- apply(coeffs, 3, function(x, y) (y %*% x), x) dim(yp) <- c(nrow(x), dim(coeffs)[2], dim(coeffs)[3]) # unscale predicted y values @@ -542,7 +543,7 @@ pls.getpredictions <- function(x, coeffs, ycenter, yscale, ynames = NULL, y.attr attr(yp, "name") <- "Response values, predicted" dimnames(yp) <- list(objnames, compnames, ynames) - return (yp) + return(yp) } #' Compute object with decomposition of y-values @@ -569,10 +570,11 @@ pls.getpredictions <- function(x, coeffs, ycenter, yscale, ynames = NULL, y.attr #' vector with names used for components #' #' @return array `ldecomp` object for y-values (or NULL if y is not provided) -pls.getydecomp <- function(y, yscores, xscores, yloadings, yeigenvals, ynames = NULL, y.attrs = NULL, x.attrs = NULL, objnames = NULL, compnames = NULL) { +pls.getydecomp <- function(y, yscores, xscores, yloadings, yeigenvals, ynames = NULL, y.attrs = NULL, + x.attrs = NULL, objnames = NULL, compnames = NULL) { # if reference y-values are not provided, no ydecomp can be computed - if (is.null(y)) return (NULL) + if (is.null(y)) return(NULL) # compute resuduals yresiduals <- y - tcrossprod(xscores, yloadings) @@ -590,8 +592,6 @@ pls.getydecomp <- function(y, yscores, xscores, yloadings, yeigenvals, ynames = yscores <- mda.setattr(yscores, x.attrs, "row") yresiduals <- mda.setattr(yresiduals, y.attrs) - - # attr(yscores, "exclrows") <- attr(yresiduals, "exclrows") <- y.attrs$exclrows attr(yscores, "name") <- "Y-scores" attr(yscores, "xaxis.name") <- "Components" attr(yresiduals, "name") <- "Residuals" @@ -601,7 +601,7 @@ pls.getydecomp <- function(y, yscores, xscores, yloadings, yeigenvals, ynames = ydecomp <- ldecomp(scores = xscores, loadings = yloadings, residuals = yresiduals, eigenvals = yeigenvals) ydecomp$scores <- yscores - return (ydecomp) + return(ydecomp) } #' Compute object with decomposition of x-values @@ -624,7 +624,8 @@ pls.getydecomp <- function(y, yscores, xscores, yloadings, yeigenvals, ynames = #' vector with names used for components #' #' @return array `ldecomp` object for x-values -pls.getxdecomp <- function(x, xscores, xloadings, xeigenvals, xnames = NULL, x.attrs = NULL, objnames = NULL, compnames = NULL) { +pls.getxdecomp <- function(x, xscores, xloadings, xeigenvals, xnames = NULL, x.attrs = NULL, objnames = NULL, + compnames = NULL) { # compute x-residuals xresiduals <- x - tcrossprod(xscores, xloadings) @@ -643,7 +644,7 @@ pls.getxdecomp <- function(x, xscores, xloadings, xeigenvals, xnames = NULL, x.a # create and return xdecomp object xdecomp <- ldecomp(scores = xscores, residuals = xresiduals, loadings = xloadings, eigenvals = xeigenvals) - return (xdecomp) + return(xdecomp) } #' Compute matrix with X-scores @@ -657,7 +658,7 @@ pls.getxdecomp <- function(x, xscores, xloadings, xeigenvals, xnames = NULL, x.a #' #' @return matrix with X-scores pls.getxscores <- function(x, weights, xloadings) { - xscores <- x %*% (weights %*% solve(crossprod(xloadings, weights))) + return(x %*% (weights %*% solve(crossprod(xloadings, weights)))) } #' Compute and orthogonalize matrix with Y-scores @@ -678,10 +679,11 @@ pls.getyscores <- function(y, yloadings, xscores) { # orthogonalize for (a in 2:ncomp) { - yscores[, a] <- yscores[, a] - xscores[, 1:(a-1), drop = FALSE] %*% crossprod(xscores[, 1:(a-1), drop = FALSE], yscores[, a]) + yscores[, a] <- yscores[, a] - xscores[, 1:(a - 1), drop = FALSE] %*% + crossprod(xscores[, 1:(a - 1), drop = FALSE], yscores[, a]) } - return (yscores) + return(yscores) } #' PLS predictions #' @@ -727,7 +729,6 @@ predict.pls <- function(object, x, y = NULL, cv = FALSE, ...) { # get dimensions nresp <- dim(object$coeffs$values)[3] - ncomp <- dim(object$coeffs$values)[2] # check dimensions of predictors if (ncol(x) != dim(object$coeffs$values)[1]) { @@ -738,7 +739,8 @@ predict.pls <- function(object, x, y = NULL, cv = FALSE, ...) { x <- prep.autoscale(x, center = object$xcenter, scale = object$xscale) # get predicted y-values - yp <- pls.getpredictions(x, object$coeffs$values, object$ycenter, object$yscale, respnames, x.attrs, objnames, compnames) + yp <- pls.getpredictions(x, object$coeffs$values, object$ycenter, object$yscale, respnames, x.attrs, + objnames, compnames) # if predictions for cross-validation - return if (cv) { @@ -790,7 +792,7 @@ predict.pls <- function(object, x, y = NULL, cv = FALSE, ...) { attr(ydecomp$Q, "Nu") <- object$Zlim[4, ] } - return (plsres(yp, y.ref = y.ref, ncomp.selected = object$ncomp.selected, xdecomp = xdecomp, ydecomp = ydecomp)) + return(plsres(yp, y.ref = y.ref, ncomp.selected = object$ncomp.selected, xdecomp = xdecomp, ydecomp = ydecomp)) } #' Categorize data rows based on PLS results and critical limits for total distance. @@ -920,7 +922,7 @@ summary.pls <- function(object, ncomp = object$ncomp.selected, if (!any(is.na(out[, 1:4]))) out[, 1:4] <- round(out[, 1:4], 3) out[, 5] <- round(out[, 5], 3) - out[, 6] <- mdaplot.formatValues(out[, 6], round.only = T) + out[, 6] <- mdaplot.formatValues(out[, 6], round.only = TRUE) out[, 7] <- round(out[, 7], 3) out[, 8] <- round(out[, 8], 4) out[, 9] <- round(out[, 9], 2) @@ -1107,7 +1109,7 @@ plotVariance.pls <- function(obj, decomp = "xdecomp", variance = "expvar", type #' See examples in help for \code{\link{pls}} function. #' #' @export -plotXScores.pls <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, show.axes = T, main = "Scores (X)", +plotXScores.pls <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, show.axes = TRUE, main = "Scores (X)", res = obj$res, ...) { if (min(comp) < 1 || max(comp) > ncol(obj$weights)) { @@ -1144,7 +1146,7 @@ plotXScores.pls <- function(obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, show. #' See examples in help for \code{\link{pls}} function. #' #' @export -plotXYScores.pls <- function(obj, ncomp = 1, show.axes = T, res = obj$res, ...) { +plotXYScores.pls <- function(obj, ncomp = 1, show.axes = TRUE, res = obj$res, ...) { show.lines <- if (show.axes) c(0, 0) else FALSE plot_data <- lapply(res, plotXYScores, ncomp = ncomp, type = "p", show.plot = FALSE) @@ -1616,7 +1618,7 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { B <- array(0, dim = c(npred, ncomp, nresp)) Q <- matrix(0, nrow = nresp, ncol = ncomp) R <- V <- P <- matrix(0, nrow = npred, ncol = ncomp) - T <- U <- matrix(0, nrow = nobj, ncol = ncomp) + TT <- U <- matrix(0, nrow = nobj, ncol = ncomp) # loop for each components @@ -1636,7 +1638,7 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { if (a > 1) { v <- v - V %*% crossprod(V, p) - u <- u - T %*% crossprod(T, u) + u <- u - TT %*% crossprod(TT, u) } v <- v / sqrt(sum(v * v)) @@ -1644,7 +1646,7 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { R[, a] <- r V[, a] <- v P[, a] <- p - T[, a] <- t + TT[, a] <- t U[, a] <- u Q[, a] <- q @@ -1655,7 +1657,7 @@ pls.simpls <- function(x, y, ncomp, cv = FALSE) { S <- S - v %*% crossprod(v, S) } - return(list(coeffs = B, weights = R, xloadings = P, xscores = T, yloadings = Q, yscores = U, ncomp = a)) + return(list(coeffs = B, weights = R, xloadings = P, xscores = TT, yloadings = Q, yscores = U, ncomp = a)) } #' SIMPLS algorithm (old implementation) @@ -1998,7 +2000,7 @@ vipscores <- function(obj, ncomp = obj$ncomp.selected) { # subset needed model parameters comp <- seq_len(ncomp) weights <- obj$weights[, comp, drop = FALSE] - yloads <- obj$yloadings[, comp, drop = FALSE]; + yloads <- obj$yloadings[, comp, drop = FALSE] # get eigenvalues xeigenvals <- obj$xeigenvals[comp] diff --git a/R/plsdares.R b/R/plsdares.R index 1548f06..e999aeb 100755 --- a/R/plsdares.R +++ b/R/plsdares.R @@ -213,7 +213,7 @@ as.matrix.plsdares <- function(x, ncomp = NULL, nc = 1, ...) { #' #' @export summary.plsdares <- function(object, nc = seq_len(object$nclasses), ...) { - cat("\nPLS-DA results (class plsdares) summary:\n"); + cat("\nPLS-DA results (class plsdares) summary:\n") fprintf("Number of selected components: %.0f\n", object$ncomp.selected) if (is.null(object$c.ref)) { diff --git a/R/plsres.R b/R/plsres.R index 12a4993..2063871 100755 --- a/R/plsres.R +++ b/R/plsres.R @@ -226,7 +226,7 @@ summary.plsres <- function(object, ny = seq_len(object$nresp), ncomp = NULL, ... out <- as.matrix.plsres(object, ny = y, ncomp = ncomp) if (!any(is.na(out[, 1:4]))) out[, 1:4] <- round(out[, 1:4], 3) out[, 5] <- round(out[, 5], 3) - out[, 6] <- mdaplot.formatValues(out[, 6], round.only = T) + out[, 6] <- mdaplot.formatValues(out[, 6], round.only = TRUE) out[, 7] <- round(out[, 7], 3) out[, 8] <- round(out[, 8], 4) out[, 9] <- round(out[, 9], 2) diff --git a/R/prep.R b/R/prep.R index fae668d..27f2dd7 100755 --- a/R/prep.R +++ b/R/prep.R @@ -142,7 +142,7 @@ prep.norm <- function(data, type = "area", col.ind = NULL, ref.spectrum = NULL) if (type == "snv") return(prep.snv(data)) - if (type == "is" && is.null(col.ind) ) { + if (type == "is" && is.null(col.ind)) { stop("For 'is' normalization you need to provide indices for IS peak.") } @@ -229,8 +229,8 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { # compute grams polynomials gram <- function(i, m, k, s) { if (k > 0) { - return( (4 * k - 2) / (k * (2 * m - k + 1)) * (i * gram(i, m, k - 1, s) + s * gram(i, m, k - 1, s - 1)) - - ((k - 1) * (2 * m + k)) / (k * (2 * m - k + 1)) * gram(i, m, k - 2, s) ) + return((4 * k - 2) / (k * (2 * m - k + 1)) * (i * gram(i, m, k - 1, s) + s * gram(i, m, k - 1, s - 1)) - + ((k - 1) * (2 * m + k)) / (k * (2 * m - k + 1)) * gram(i, m, k - 2, s)) } if (k == 0 && s == 0) return(1) return(0) @@ -252,7 +252,8 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { weight <- function(i, t, m, n, s) { sum <- 0 for (k in 0:n) { - sum <- sum + (2 * k + 1) * (genfact(2 * m, k) / genfact(2 * m + k + 1, k + 1)) * gram(i, m, k, 0) * gram(t, m, k, s) + sum <- sum + (2 * k + 1) * (genfact(2 * m, k) / genfact(2 * m + k + 1, k + 1)) * gram(i, m, k, 0) * + gram(t, m, k, s) } return(sum) } @@ -265,13 +266,15 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { w <- outer(-m:m, -m:m, function(x, y) weight(x, y, m, porder, dorder)) for (i in seq_len(m)) { - px[, i] = apply(x[, seq_len(2 * m + 1), drop = FALSE], 1, function(xx) convolve(xx, w[, i], type = "filter")[1]) - px[, nvar - i + 1] = apply(x[, (nvar - 2 * m):nvar, drop = FALSE], 1, function(xx) convolve(xx, w[, width - i + 1], type = "filter")[1]) + px[, i] <- apply(x[, seq_len(2 * m + 1), drop = FALSE], 1, + function(xx) convolve(xx, w[, i], type = "filter")[1]) + px[, nvar - i + 1] <- apply(x[, (nvar - 2 * m):nvar, drop = FALSE], 1, + function(xx) convolve(xx, w[, width - i + 1], type = "filter")[1]) } - px[, (m+1):(nvar-m)] = apply(x, 1, function(xx) convolve(xx, w[, m + 1], type = "filter")) + px[, (m + 1):(nvar - m)] <- apply(x, 1, function(xx) convolve(xx, w[, m + 1], type = "filter")) - return (px) + return(px) } return(prep.generic(data, f, width = width, porder = porder, dorder = dorder)) @@ -418,7 +421,7 @@ prep.alsbasecorr <- function(data, plambda = 5, p = 0.1, max.niter = 10) { w <- w.ini for (j in seq_len(max.niter)) { W <- Matrix::Diagonal(x = as.numeric(w)) - z <- Matrix::solve(as(W + LDD, 'dgCMatrix'), w * y, sparse = TRUE) + z <- Matrix::solve(as(W + LDD, "dgCMatrix"), w * y, sparse = TRUE) w.old <- w w <- p * (y > z) + (1 - p) * (y < z) @@ -738,7 +741,8 @@ prep <- function(name, params = NULL, method = NULL) { #' @export employ.prep <- function(obj, x, ...) { - stopifnot("employ.prep: the first argument must be a list with preprocessing methods" = is.list(obj) && class(obj[[1]]) == "prep") + stopifnot("employ.prep: the first argument must be a list with preprocessing methods" = + is.list(obj) && class(obj[[1]]) == "prep") for (p in obj) { x <- do.call(p$method, c(list(data = x), p$params)) } diff --git a/R/randtest.R b/R/randtest.R index c15a651..7c16f7a 100755 --- a/R/randtest.R +++ b/R/randtest.R @@ -92,7 +92,7 @@ #' par( mfrow = c(1, 1)) #' #' @export -randtest <- function(x, y, ncomp = 15, center = T, scale = F, nperm = 1000, sig.level = 0.05, +randtest <- function(x, y, ncomp = 15, center = TRUE, scale = FALSE, nperm = 1000, sig.level = 0.05, silent = TRUE, exclcols = NULL, exclrows = NULL) { x <- as.matrix(x) y <- as.matrix(y) @@ -100,14 +100,14 @@ randtest <- function(x, y, ncomp = 15, center = T, scale = F, nperm = 1000, sig. # remove excluded columns and rows if (length(exclcols) > 0) { x <- mda.exclcols(x, exclcols) - x <- x[, -attr(x, "exclcols"), drop = F] + x <- x[, -attr(x, "exclcols"), drop = FALSE] } if (length(exclrows) > 0) { x <- mda.exclrows(x, exclrows) exclrows <- attr(x, "exclrows") - x <- x[-exclrows, , drop = F] - y <- y[-exclrows, , drop = F] + x <- x[-exclrows, , drop = FALSE] + y <- y[-exclrows, , drop = FALSE] } nobj <- nrow(x) diff --git a/R/regcoeffs.R b/R/regcoeffs.R index 2fbad97..6dfd49b 100755 --- a/R/regcoeffs.R +++ b/R/regcoeffs.R @@ -165,7 +165,7 @@ summary.regcoeffs <- function(object, ncomp = 1, ny = 1, alpha = 0.05, ...) { } attrs <- mda.getattr(object$values) - coeffs <- object$values[, ncomp, ny, drop = F] + coeffs <- object$values[, ncomp, ny, drop = FALSE] dim(coeffs) <- c(dim(object$values)[1], 1) colnames(coeffs)[1] <- "Coeffs" if (!is.null(object$se)) { @@ -382,5 +382,5 @@ plot.regcoeffs <- function(x, ncomp = 1, ny = 1, type = (if (x$nvar > 30) "l" el err <- (ci[, 2] - ci[, 1]) / 2 mdaplotg(list(plot_data, mda.rbind(plot_data, err)), type = c(type, "e"), show.legend = FALSE, - col = col[c(2, 1)], show.grid = T, show.lines = show.lines, ylab = ylab, ...) + col = col[c(2, 1)], show.grid = TRUE, show.lines = show.lines, ylab = ylab, ...) } diff --git a/R/regmodel.R b/R/regmodel.R index d86331d..32dc541 100755 --- a/R/regmodel.R +++ b/R/regmodel.R @@ -231,7 +231,7 @@ summary.regmodel <- function(object, ncomp = object$ncomp.selected, rownames(sum_data) <- capitalize(names(res)) sum_data[, "R2"] <- round(sum_data[, "R2"], 3) - sum_data[, "RMSE"] <- mdaplot.formatValues(sum_data[, "RMSE"], round.only = T) + sum_data[, "RMSE"] <- mdaplot.formatValues(sum_data[, "RMSE"], round.only = TRUE) sum_data[, "Slope"] <- round(sum_data[, "Slope"], 3) sum_data[, "Bias"] <- round(sum_data[, "Bias"], 4) sum_data[, "RPD"] <- round(sum_data[, "RPD"], 1) @@ -335,12 +335,12 @@ plotRMSERatio.regmodel <- function(obj, ny = 1, type = "b", show.labels = TRUE, stopifnot("Cross-validation results are not found." = !is.null(obj$res$cv)) stopifnot("Parameter 'ny' has a wrong value." = (length(ny) == 1 && ny >= 1 && ny <= nrow(obj$res$cal$rmse))) - plot_data <- matrix(obj$res$cv$rmse[ny, ]/obj$res$cal$rmse[ny, ], nrow = 1) + plot_data <- matrix(obj$res$cv$rmse[ny, ] / obj$res$cal$rmse[ny, ], nrow = 1) attr(plot_data, "xaxis.values") <- obj$res$cv$rmse[ny, ] attr(plot_data, "xaxis.name") <- xlab mdaplot(plot_data, type = type, xlab = xlab, ylab = ylab, main = main, show.labels = show.labels, - labels = labels,...) + labels = labels, ...) } diff --git a/R/regres.R b/R/regres.R index f2b882f..5071a28 100755 --- a/R/regres.R +++ b/R/regres.R @@ -161,8 +161,8 @@ regres.getPerformanceStats <- function(y.pred, y.ref) { # remove excluded rows so they are not counted # when calculating statistics if (length(attrs$exclrows) > 0) { - y.pred <- y.pred[-attrs$exclrows, , , drop = F] - y.ref <- y.ref[-attrs$exclrows, , drop = F] + y.pred <- y.pred[-attrs$exclrows, , , drop = FALSE] + y.ref <- y.ref[-attrs$exclrows, , drop = FALSE] } # residuals (errors) based statistics @@ -234,7 +234,7 @@ regres.err <- function(y.pred, y.ref) { #' #' @export regres.r2 <- function(err, ytot) { - r2 <- t(1 - scale(colSums(err^2), center = F, scale = ytot)) + r2 <- t(1 - scale(colSums(err^2), center = FALSE, scale = ytot)) return(regress.addattrs(r2, attributes(err), "Coefficient of determination")) } diff --git a/R/simca.R b/R/simca.R index 1bbf798..91b241c 100755 --- a/R/simca.R +++ b/R/simca.R @@ -299,12 +299,12 @@ crossval.simca <- function(obj, x, cv) { # remove excluded rows if (length(attrs$exclrows) > 0) { - x <- x[-attrs$exclrows, , drop = F] + x <- x[-attrs$exclrows, , drop = FALSE] } # remove excluded columns if (length(attrs$exclcols) > 0) { - x <- x[, -attrs$exclcols, drop = F] + x <- x[, -attrs$exclcols, drop = FALSE] } # get matrix with indices for cv segments diff --git a/R/simcam.R b/R/simcam.R index af7153a..5365d55 100755 --- a/R/simcam.R +++ b/R/simcam.R @@ -432,7 +432,7 @@ plotModelDistance.simcam <- function(obj, nc = 1, type = "h", xticks = seq_len(o stop("Wrong values for 'nc' parameter.") } - mdaplot(mda.t(obj$moddist[, nc, drop = F]), type = type, xticks = xticks, + mdaplot(mda.t(obj$moddist[, nc, drop = FALSE]), type = type, xticks = xticks, xticklabels = xticklabels, main = main, xlab = xlab, ylab = ylab, ...) } diff --git a/man/mdaplot.formatValues.Rd b/man/mdaplot.formatValues.Rd index eeb6a23..d481191 100644 --- a/man/mdaplot.formatValues.Rd +++ b/man/mdaplot.formatValues.Rd @@ -4,7 +4,7 @@ \alias{mdaplot.formatValues} \title{Format vector with numeric values} \usage{ -mdaplot.formatValues(data, round.only = F, digits = 3) +mdaplot.formatValues(data, round.only = FALSE, digits = 3) } \arguments{ \item{data}{vector or matrix with values} diff --git a/man/plotXScores.pls.Rd b/man/plotXScores.pls.Rd index ed79a3f..9e587c3 100644 --- a/man/plotXScores.pls.Rd +++ b/man/plotXScores.pls.Rd @@ -7,7 +7,7 @@ \method{plotXScores}{pls}( obj, comp = if (obj$ncomp > 1) c(1, 2) else 1, - show.axes = T, + show.axes = TRUE, main = "Scores (X)", res = obj$res, ... diff --git a/man/plotXYScores.pls.Rd b/man/plotXYScores.pls.Rd index 557bbc1..8c6620f 100644 --- a/man/plotXYScores.pls.Rd +++ b/man/plotXYScores.pls.Rd @@ -4,7 +4,7 @@ \alias{plotXYScores.pls} \title{XY scores plot for PLS} \usage{ -\method{plotXYScores}{pls}(obj, ncomp = 1, show.axes = T, res = obj$res, ...) +\method{plotXYScores}{pls}(obj, ncomp = 1, show.axes = TRUE, res = obj$res, ...) } \arguments{ \item{obj}{a PLS model (object of class \code{pls})} diff --git a/man/pls.Rd b/man/pls.Rd index 791e7ca..6da9be7 100644 --- a/man/pls.Rd +++ b/man/pls.Rd @@ -257,6 +257,7 @@ Methods inherited from \code{regmodel} object (parent class for \code{pls}): \tabular{ll}{ \code{\link{plotPredictions.regmodel}} \tab shows predicted vs. measured plot.\cr \code{\link{plotRMSE.regmodel}} \tab shows RMSE plot.\cr + \code{\link{plotRMSERatio.regmodel}} \tab shows plot for ratio RMSECV/RMSEC values.\cr \code{\link{plotYResiduals.regmodel}} \tab shows residuals plot for y values.\cr \code{\link{getRegcoeffs.regmodel}} \tab returns matrix with regression coefficients.\cr } diff --git a/man/randtest.Rd b/man/randtest.Rd index d6814d9..9126c1d 100644 --- a/man/randtest.Rd +++ b/man/randtest.Rd @@ -8,8 +8,8 @@ randtest( x, y, ncomp = 15, - center = T, - scale = F, + center = TRUE, + scale = FALSE, nperm = 1000, sig.level = 0.05, silent = TRUE, From 53b09b84fdb78a356828460c680a259973d60ed1 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Tue, 12 Jul 2022 13:46:53 +0200 Subject: [PATCH 37/42] changed version for codecov --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ca0c67d..f9a3b90 100755 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Multivariate Data Analysis Tools ![GitHub build status](https://github.com/svkucheryavski/mdatools/workflows/R-CMD-check/badge.svg) ![GitHub All Releases](https://img.shields.io/github/downloads/svkucheryavski/mdatools/total?color=blue&logo=Github "Downloads from GitHub") ![Downloads (CRAN)](https://cranlogs.r-pkg.org/badges/grand-total/mdatools?color=blue&logo=R&style=flat-square "Downloads from CRAN") -[![codecov](https://codecov.io/gh/svkucheryavski/mdatools/branch/0.10.0/graph/badge.svg?style=flat-square)](https://codecov.io/gh/svkucheryavski/mdatools) +[![codecov](https://codecov.io/gh/svkucheryavski/mdatools/branch/0.13.0/graph/badge.svg?style=flat-square)](https://codecov.io/gh/svkucheryavski/mdatools) From 1fbcdc15f957a0c51d510c04ef6696d6d03bafce Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Tue, 12 Jul 2022 20:13:13 +0200 Subject: [PATCH 38/42] fixed a bug in prep.savgol() --- R/prep.R | 6 +++--- tests/testthat/test-prep.R | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/R/prep.R b/R/prep.R index 27f2dd7..2d31fff 100755 --- a/R/prep.R +++ b/R/prep.R @@ -252,8 +252,8 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { weight <- function(i, t, m, n, s) { sum <- 0 for (k in 0:n) { - sum <- sum + (2 * k + 1) * (genfact(2 * m, k) / genfact(2 * m + k + 1, k + 1)) * gram(i, m, k, 0) * - gram(t, m, k, s) + sum <- sum + (2 * k + 1) * (genfact(2 * m, k) / genfact(2 * m + k + 1, k + 1)) * + gram(i, m, k, 0) * gram(t, m, k, s) } return(sum) } @@ -272,7 +272,7 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { function(xx) convolve(xx, w[, width - i + 1], type = "filter")[1]) } - px[, (m + 1):(nvar - m)] <- apply(x, 1, function(xx) convolve(xx, w[, m + 1], type = "filter")) + px[, (m + 1):(nvar - m)] <- t(apply(x, 1, function(xx) convolve(xx, w[, m + 1], type = "filter"))) return(px) } diff --git a/tests/testthat/test-prep.R b/tests/testthat/test-prep.R index 3a9210a..50a86c0 100644 --- a/tests/testthat/test-prep.R +++ b/tests/testthat/test-prep.R @@ -260,6 +260,20 @@ test_that("SavGol smoothing works correctly", { expect_equivalent(prep.savgol(y, width = 3, porder = 2, dorder = 2), c(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) expect_equivalent(prep.savgol(z, width = 3, porder = 2, dorder = 2), c(-4.0, -4.0, 4.0, -4.0, 4.0, -4.0, 4.0, -4.0, 4.0, 4.0)) + + y1 <- prep.savgol( + rbind( + matrix(c(1, 1, 1, 3, 4, 7, 4, 3, 1, 1, 1.0), nrow = 1), + matrix(c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5.0), nrow = 1) + ), width = 5, porder = 1, dorder = 0 + ) + + y2 <- rbind( + prep.savgol(matrix(c(1, 1, 1, 3, 4, 7, 4, 3, 1, 1, 1.0), nrow = 1), width = 5, porder = 1, dorder = 0), + prep.savgol(matrix(c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5.0), nrow = 1), width = 5, porder = 1, dorder = 0) + ) + + expect_equivalent(y1, y2) }) context("prep: alsbasecorr") From d47169f19a9a4d12d826c9efbe3e564cd2273814 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Wed, 13 Jul 2022 21:04:57 +0200 Subject: [PATCH 39/42] small changes to readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f9a3b90..3de8885 100755 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Multivariate Data Analysis Tools ![GitHub build status](https://github.com/svkucheryavski/mdatools/workflows/R-CMD-check/badge.svg) ![GitHub All Releases](https://img.shields.io/github/downloads/svkucheryavski/mdatools/total?color=blue&logo=Github "Downloads from GitHub") ![Downloads (CRAN)](https://cranlogs.r-pkg.org/badges/grand-total/mdatools?color=blue&logo=R&style=flat-square "Downloads from CRAN") -[![codecov](https://codecov.io/gh/svkucheryavski/mdatools/branch/0.13.0/graph/badge.svg?style=flat-square)](https://codecov.io/gh/svkucheryavski/mdatools) +[![codecov](https://app.codecov.io/gh/svkucheryavski/mdatools/branch/0.12.0/graph/badge.svg?style=flat-square)](https://app.codecov.io/gh/svkucheryavski/mdatools) From f3217322fc091708ff967a7d81563f0e166cd305 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Wed, 13 Jul 2022 21:08:46 +0200 Subject: [PATCH 40/42] fixed typo in link --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3de8885..84f9f5a 100755 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Multivariate Data Analysis Tools ![GitHub build status](https://github.com/svkucheryavski/mdatools/workflows/R-CMD-check/badge.svg) ![GitHub All Releases](https://img.shields.io/github/downloads/svkucheryavski/mdatools/total?color=blue&logo=Github "Downloads from GitHub") ![Downloads (CRAN)](https://cranlogs.r-pkg.org/badges/grand-total/mdatools?color=blue&logo=R&style=flat-square "Downloads from CRAN") -[![codecov](https://app.codecov.io/gh/svkucheryavski/mdatools/branch/0.12.0/graph/badge.svg?style=flat-square)](https://app.codecov.io/gh/svkucheryavski/mdatools) +[![codecov](https://codecov.io/gh/svkucheryavski/mdatools/branch/0.12.0/graph/badge.svg?style=flat-square)](https://app.codecov.io/gh/svkucheryavski/mdatools) @@ -14,7 +14,7 @@ For more details and examples read a [Bookdown tutorial](https://mda.tools/docs/ You can also take video-lectures from [YouTube channel](https://www.youtube.com/channel/UCox0H4utfMq4FIu2kymuyTA) devoted to introductory Chemometric course I give to master students. The lectures explain theory behind basic Chemometric methods but also show how to use them in *mdatools*. If you want to cite the package, please use the following: Sergey Kucheryavskiy, *mdatools – R package for chemometrics*, Chemometrics and Intelligent Laboratory Systems, Volume 198, -2020 (DOI: [10.1016/j.chemolab.2020.103937](https://doi.org/10.1016/j.chemolab.2020.103937)). +2020 (DOI: [10.1016/j.chemolab.2020.103937](hui)). What is new ----------- From f6df47ed28c56c2eff9f679f27c256ecbf0f05b4 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Wed, 13 Jul 2022 21:13:15 +0200 Subject: [PATCH 41/42] removed codecov badge --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 84f9f5a..cb35654 100755 --- a/README.md +++ b/README.md @@ -3,7 +3,6 @@ Multivariate Data Analysis Tools ![GitHub build status](https://github.com/svkucheryavski/mdatools/workflows/R-CMD-check/badge.svg) ![GitHub All Releases](https://img.shields.io/github/downloads/svkucheryavski/mdatools/total?color=blue&logo=Github "Downloads from GitHub") ![Downloads (CRAN)](https://cranlogs.r-pkg.org/badges/grand-total/mdatools?color=blue&logo=R&style=flat-square "Downloads from CRAN") -[![codecov](https://codecov.io/gh/svkucheryavski/mdatools/branch/0.12.0/graph/badge.svg?style=flat-square)](https://app.codecov.io/gh/svkucheryavski/mdatools) From f8a5c8e52e6c090d144ee12184bf9c982c349630 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 14 Jul 2022 07:57:28 +0200 Subject: [PATCH 42/42] fixed some typos --- NEWS.md | 138 +++++++++++++++++++++++++++--------------------------- README.md | 2 +- 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/NEWS.md b/NEWS.md index e364292..2b651e0 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ v. 0.13.0 -========= +========== This release brings an updated implementation of PLS algorithm (SIMPLS) which is more numerically stable and gives sufficiently less warnings about using too many components in case when you work with small y-values. The speed of `pls()` method in general has been also improved. Another important thing is that cross-validation of regression and classification models has been re-written towards more simple solution and now you can also use your own custom splits by providing a vector with segment indices associated with each measurement. For example if you run PLS with parameter `cv = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2)` it is assumed that you want to use venetian blinds split with four segments and your dataset has 10 measurements. See more details in the tutorial, where description of cross-validation procedure has been moved to a separate section. @@ -18,8 +18,8 @@ Other changes and improvements: * fixes to several small bugs and general improvements. -v.0.12.0 -======== +v. 0.12.0 +========== This release is mostly about preprocessing - added some new methods, improved the existent once and implemented a possibility to combine preprocessing methods together (including parameter values) and apply them all together in a correct sequence. See [preprocessing section](https://mda.tools/docs/preprocessing.html) in the tutorials for details ## New features and improvements @@ -50,13 +50,13 @@ This release is mostly about preprocessing - added some new methods, improved th * the user guides have been revised and improved. -v.0.11.5 -======== +v. 0.11.5 +========== * fix for an issue in PLS SIMPLS implementation (incorrect use of `Machine$longdouble.eps`), which lead to an error when the package is tested on Apple M1. -v.0.11.4 -======== +v. 0.11.4 +========== * added possibility for providing partially known contributions (parameter `cont.forced`) or spectral values (parameter `spec.forced`) to `mcrals()`. See more in help text and user guide for the package. @@ -69,8 +69,8 @@ v.0.11.4 * fixed bug [#99](https://github.com/svkucheryavski/mdatools/issues/99), which did not allow to use user defined indices of pure variables in `mcrpure()`. -v.0.11.3 -======== +v. 0.11.3 +========== * added [Procrustes Cross-Validation method](https://doi.org/10.1021/acs.analchem.0c02175), `pcv()` (it is also available as a [separate project](https://github.com/svkucheryavski/pcv)). @@ -85,21 +85,21 @@ v.0.11.3 * added link to [YouTube channel](https://www.youtube.com/channel/UCox0H4utfMq4FIu2kymuyTA) with Chemometric course based on *mdatools* package. -v.0.11.2 -======== +v. 0.11.2 +========== * fixed an issue, which lead to a bug in `simcam.getPerformanceStats`, returning implausible and asymmetrical results (thanks to @svonallmen). * fixed a small issue sometimes giving warning when running tests on CRAN (did not influence the user experience though). -v.0.11.1 -======== +v. 0.11.1 +========== * the algorithm for `mcrpure()` method has been modified to avoid potential issues with original patented version. -v.0.11.0 -======== +v. 0.11.0 +========== ## New features @@ -118,14 +118,14 @@ v.0.11.0 * main model methods (`pls()`, `pca()`, etc.), now do additional check for the consistency of provided datasets. -v.0.10.4 -======== +v. 0.10.4 +========== * fixed a bug, which led to ignoring `opacity` option in plots. -v.0.10.3 -======== +v. 0.10.3 +========== * Fixed bug `#85` when using y-values as data frame gave an error in PLS regression @@ -133,13 +133,13 @@ v.0.10.3 * Code refactoring and tests for preprocessing methods -v.0.10.2 -======== +v. 0.10.2 +========== * Fixed a bug in `categorize.pls()` method, which could give wrong results for test set measurements (see issue #82). -v.0.10.1 -======== +v. 0.10.1 +========== * Small improvements to `plotExtreme.pca()` so user can specify additional parameters, such as, for example `cex`. If plot is made for several components, you can now specify just one value for all points (e.g. color of points or marker symbol). @@ -149,8 +149,8 @@ v.0.10.1 * Fixed a bug in `summary()` method for PLS, which worked incorrectly in case of several response variables (PLS2). -v.0.10.0 -======== +v. 0.10.0 +========== Many changes have been made in this version, but most of them are under the hood. Code has been refactored significantly in order to improve its efficiency and make future support easier. Some functionality has been re-written from the scratch. **Most** of the code is backward compatible, which means your old scripts should have no problem to run with this version. However, some changes are incompatible and this can lead to occasional errors and warning messages. All details are shown below, pay a special attention to **breaking changes** part. @@ -252,29 +252,29 @@ Other changes are listed below: * New method `categorize()` allowing to categorize data rows based on PLS results and critical limits computed for X- and Y-distance. -v.0.9.6 -======= +v. 0.9.6 +========= * fixed a bug related to wrong calculation of R2 in case of multiple response variables (PLS2) * refactoring of `regres` methods * added tests for some of the `regres` methods -v.0.9.5 -======= +v. 0.9.5 +========= * better description of cross-validation settings in help text (parameter `cv`) * added column R2 (coefficient of determination) to PLS summary as it is not always identical to `Y cumexpvar` * better use of `cex` parameter for group plots (can be specified differently for each group) * if `cex` is specified it will be also applied for legend items -v.0.9.4 -======= +v. 0.9.4 +========= * fixed a bug leading to wrong results when using parameter `max.cov` in `prep.autoscale()` (#59) -v.0.9.3 -======= +v. 0.9.3 +========= * fixed a bug leading to wrong results in multiclass PLS-DA if class labels in reference class variable (factor) were not in alphabetical order -v.0.9.2 -======= +v. 0.9.2 +========= * improvements to `ipls()` method plus fixed a bug preventing breaking the selection loop (#56) * fixed a bug in `selectCompNum()` related to use of Wold criterion (#57) * fixed a bug with using of `max.cov` parameter in `prep.autoscale()` (#58) @@ -282,15 +282,15 @@ v.0.9.2 * code refactoring and small improvements * added tests for `prep.autoscale()` -v.0.9.1 -======= +v. 0.9.1 +========= * all plot functions have new `opacity` parameter for semi-transparent colors * several improvements to PLS-DA method for one-class discrimination * fixed a bug with wrong estimation of maximum number of components for PCA/SIMCA with cross-validation * added chapter on PLS-DA to the tutorial (including last improvements) -v.0.9.0 -======= +v. 0.9.0 +========= * added randomized PCA algorithm (efficient for datasets with large number of rows) * added option to inherit and show critical limits on residuals plot for PCA/SIMCA results * added support for data driven approach to PCA/SIMCA (DD-SIMCA) @@ -303,8 +303,8 @@ v.0.9.0 * added option to use equal axes limits in `plotPrediction()` for PLS results * the tutorial has been amended and extended correspondingly -v.0.8.4 -======= +v. 0.8.4 +========= * small improvements to calculation of statistics for regression coefficients * `pls.getRegCoeffs()` now also returns standard error and confidence intervals calculated for unstandardized variables * new method `summary()` for object with regression coefficients (`regcoeffs`) @@ -312,24 +312,24 @@ v.0.8.4 * fixed a bug in some PLS plots where labels for cross-validated results forced to be numbers * when using `mdaplot` for data frame with one or more factor columns, the factors are now transofrmed to dummy variables (before it led to an error) -v.0.8.3 -======= +v. 0.8.3 +========= * fixed a bug in `mdaplots` when using factor with more than 8 levels for color grouping led to an error * fixed a bug in `pca` with wrong calculation of eigenvalues in NIPALS algorithm * bars on a bar plot now can be color grouped -v.0.8.2 -======= +v. 0.8.2 +========= * parameters `lab.cex` and `lab.col` now are also applied to colorbar labels -v.0.8.1 -======= +v. 0.8.1 +========= * fixed a bug in PCA when explained variance was calculated incorrectly for data with excluded rows * fixed several issues with SIMCA (cross-validation) and SIMCAM (Cooman's plot) * added a chapter about SIMCA to the tutorial -v.0.8.0 -======= +v. 0.8.0 +========= * tutorial has been moved from GitBook to Bookdown and fully rewritten * GitHub repo for the package has the tutorial as a static html site in `docs` folder * the `mdaplot()` and `mdaplotg()` were rewritten completely and now are more easy to use (check tutorial) @@ -346,21 +346,21 @@ v.0.8.0 * added a posibility to exclude selected rows and columns from calculations * added support for images (check tutorial) -v.0.7.2 -======= +v. 0.7.2 +========= * corrected a typo in title of selectivity ratio plot * `prep.autoscale()` now do not scale columns with coefficient of variation below given threshold -v.0.7.1 -======= +v. 0.7.1 +========= * fixed an issue lead to plot.new() error in some cases * documentation was regenerated with new version of Roxygen * file People.RData was renamed to people.RData * NIPALS method for PCA has been added * code optimization to speed calculations up -v.0.7.0 -======= +v. 0.7.0 +========= * interval PLS variable selection (iPLS) is implemented * normalization was added to preprocessing methods (`prep.norm`) * method `getRegcoeffs` was added to PLS model @@ -370,41 +370,41 @@ v.0.7.0 * NAMESPACE file is generated by roxygen2 * fixed several small bugs and typos -v.0.6.2 -======== +v. 0.6.2 +========== * Q2 residuals renamed to Q (Squared residual distance) * All plots have parameters `lab.col` and `lab.cex` for changing color and font size for data point labels -v.0.6.1 -======== +v. 0.6.1 +========== * fixed a bug led to incorrect calculation of specificity * jack-knife confidence intervals now also calculated for PLS-DA models * confidence intervals for regression coefficients are shown by default if calculated -v.0.6.0 -======== +v. 0.6.0 +========== * randomization test for PLS has been added, see `?randtest` * systematic and repeated random cross-validation are available, see `?crossval` * fixed bug with labels on bar plot with confidence intervals * fixed bug in PLS when using maximum number of components lead to NA values in weights v. 0.5.3 -======== +========== * fixed several small bugs * improvemed documentation for basic methods v. 0.5.2 -======== +========== * fixed bug for computing classification performance for numeric class names * improvements to SIMCA implementation v. 0.5.1 -======== +========== * added more details to documentation * bug fixes for variable selection methods v. 0.5.0 -======== +========== * all documentation has been rewritten using `roxygen2` package * added extra preprocessing methods * added VIP scores calculation and plot for PLS and PLS-DA models @@ -414,7 +414,7 @@ v. 0.5.0 * the first release available in CRAN v. 0.4.0 -======== +========== * New `classres` class for representation and visualisation of classification results * in PCA model, limits for T2 and Q2 now are calculated for all available components * in PCA results, limits for T2 and Q2 calculated for a model are kept and shown on residuals plot @@ -425,16 +425,16 @@ v. 0.4.0 * bug fixes and improvements v. 0.3.2 -======== +========== * Enhancements in group bar plot * Fixed bugs with wrong labels of bar plot with negative values v. 0.3.1 -======== +========== * Corrected errors and typos in README.md and small bg fixes v. 0.3.0 -======== +========== * PLS and all related methods were rewritten from the scratch to make them faster, more efficient and also to follow the same code conventions as previously rewritten PCA. Here are main changes you need to do in your code if you used mdatools PLS before: `selectNumComp(model, ncomp)` instead diff --git a/README.md b/README.md index cb35654..9221516 100755 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ For more details and examples read a [Bookdown tutorial](https://mda.tools/docs/ You can also take video-lectures from [YouTube channel](https://www.youtube.com/channel/UCox0H4utfMq4FIu2kymuyTA) devoted to introductory Chemometric course I give to master students. The lectures explain theory behind basic Chemometric methods but also show how to use them in *mdatools*. If you want to cite the package, please use the following: Sergey Kucheryavskiy, *mdatools – R package for chemometrics*, Chemometrics and Intelligent Laboratory Systems, Volume 198, -2020 (DOI: [10.1016/j.chemolab.2020.103937](hui)). +2020 (DOI: [10.1016/j.chemolab.2020.103937](https://doi.org/10.1016/j.chemolab.2020.103937)). What is new -----------