From b0ac709a89b8cc6aae9f9bddd934b3408e000bfd Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 08:31:32 +0100 Subject: [PATCH 01/16] Added procrustes cross-validation (fixes #96) --- R/pcv.R | 172 ++++++++++++++++++++++++++++++++++++++ tests/testthat/test-pcv.R | 42 ++++++++++ 2 files changed, 214 insertions(+) create mode 100644 R/pcv.R create mode 100644 tests/testthat/test-pcv.R diff --git a/R/pcv.R b/R/pcv.R new file mode 100644 index 0000000..aca1de6 --- /dev/null +++ b/R/pcv.R @@ -0,0 +1,172 @@ +#' Compute matrix with pseudo-validation set +#' +#' @param x +#' matrix with calibration set (IxJ) +#' @param ncomp +#' number of components for PCA decomposition +#' @param nseg +#' number of segments in cross-validation +#' @param scale +#' logical, standardize columns of X prior to decompositon or not +#' +#' @description +#' The method computes pseudo-validation matrix Xpv, based on PCA decomposition of calibration set X +#' and systematic (venetian blinds) cross-validation. It is assumed that data rows are ordered +#' correctly, so systematic cross-validation can be applied. +#' +#' All details can be found in [1] +#' +#' @return +#' Pseudo-validation matrix (IxJ) +#' +#' @export +pcv <- function(x, ncomp = min(round(nrow(x)/nseg) - 1, col(x), 20), nseg = 4, scale = FALSE) { + + # keep names if any + attrs <- attributes(x) + + # convert to matrix if necessary + x <- mda.df2mat(x) + + mx <- apply(x, 2, mean) + sx <- if (scale) apply(x, 2, sd) else rep(1, ncol(x)) + + # autoscale the calibration set + x <- scale(x, center = mx, scale = sx) + + # create a global model + P <- svd(x)$v[, seq_len(ncomp), drop = FALSE] + + # create matrix with indices for cross-validation, so + # each column is number of rows to be taken as local validation set + # in corresponding segment + ind <- matrix(seq_len(nrow(x)), ncol = nseg, byrow = TRUE) + + # prepare empty matrix for pseudo-validation set + x.pv <- matrix(0, nrow(x), ncol(x)) + a <- NULL + + # cv-loop + for (k in seq_len(nseg)) { + + # split data to calibration and validation + x.c <- x[-ind[, k], , drop = FALSE] + x.k <- x[ ind[, k], , drop = FALSE] + + # get loadings for local model and rotation matrix between global and local models + P.k <- svd(x.c, nv = ncomp)$v[, seq_len(ncomp), drop = FALSE] + + # correct direction of loadings for local model + a <- acos(colSums(P * P.k)) < pi / 2 + P.k <- P.k %*% diag(a * 2 - 1, ncol(P), ncol(P)) + + # get rotation matrix between the PC spaces + R <- getR(P.k, P) + + # rotate the local validation set and save as a part of Xpv + x.pv[ind[, k], ] <- tcrossprod(x.k, R) + } + + # uscenter and unscale the data + x.pv <- sweep(x.pv, 2, sx, "*") + x.pv <- sweep(x.pv, 2, mx, "+") + + attributes(x.pv) <- attrs + return(x.pv) +} + +#' Creates rotation matrix to map a set vectors \code{base1} to a set of vectors \code{base2}. +#' +#' @param base1 +#' Matrix (JxA) with A orthonormal vectors as columns to be rotated (A <= J) +#' @param base2 +#' Matrix (JxA) with A orthonormal vectors as columns, \code{base1} should be aligned with +#' +#' @description +#' In both sets vectors should be orthonormal. +#' +#' @return +#' Rotation matrix (JxJ) +getR <- function(base1, base2) { + base1 <- as.matrix(base1); + base2 <- as.matrix(base2); + + R1 <- rotationMatrixToX1(base1[, 1]) + R2 <- rotationMatrixToX1(base2[, 1]) + + if (ncol(base1) == 1) { + R <- t(R2) %*% R1 + } else { + # Compute bases rotated to match their first vectors to [1 0 0 ... 0]' + base1_r <- as.matrix(R1 %*% base1) + base2_r <- as.matrix(R2 %*% base2) + + # Get bases of subspaces of dimension n-1 (forget x1) + nr <- nrow(base1_r) # equal to nrow(base2_r) + nc <- ncol(base1_r) # equal to ncol(base2_r) + base1_rs <- base1_r[2:nr, 2:nc] + base2_rs <- base2_r[2:nr, 2:nc] + + # Recursevely compute rotation matrix to map subspaces + Rs <- getR(base1_rs, base2_rs) + + # Construct rotation matrix of the whole space (recall x1) + M <- eye(nr) + M[2:nr, 2:nr] <- Rs + + R <- crossprod(R2, (M %*% R1)) + } + + return(R); +} + +#' Creates a rotation matrix to map a vector x to [1 0 0 ... 0] +#' +#' @param x +#' Vector (sequence with J coordinates) +#' +#' @return +#' Rotation matrix (JxJ) +rotationMatrixToX1 <- function(x) { + N <- length(x) + R <- eye(N) + step <- 1 + while (step < N) { + A <- eye(N) + n <- 1 + while (n <= N - step) { + r2 <- x[n]^2 + x[n + step]^2 + if (r2 > 0) { + r <- sqrt(r2) + pcos <- x[n] / r + psin <- -x[n + step] / r + A[n, n] <- pcos + A[n, n + step] <- -psin + A[n + step, n] <- psin + A[n + step, n + step] <- pcos + } + n <- n + 2 * step + } + step <- 2 * step + x <- A %*% x + R <- A %*% R + } + return(R) +} + + +#' Create the identity matrix +#' +#' @param n +#' Size of the matrix +#' +#' @return +#' The identity matrix (n x n) +#' +#' @export +eye <- function(n) { + X <- matrix(0, n, n) + diag(X) <- 1 + return(X) +} + diff --git a/tests/testthat/test-pcv.R b/tests/testthat/test-pcv.R new file mode 100644 index 0000000..cc317c4 --- /dev/null +++ b/tests/testthat/test-pcv.R @@ -0,0 +1,42 @@ +##################################################### +# Tests for Procrustes Cross-Validation methods # +##################################################### + +setup({ + pdf(file = tempfile("mdatools-test-pcv-", fileext = ".pdf")) + sink(tempfile("mdatools-test-pcv-", fileext = ".txt"), append = FALSE, split = FALSE) +}) + +teardown({ + dev.off() + sink() +}) + +# Simple tests for the PCV R implementation +I <- 100 +J <- 50 +A <- 10 +K <- 4 +set.seed(42) +x <- matrix(rnorm(I * J), I, J) + +params <- list() +params[[1]] <- list(x = x) +params[[2]] <- list(x = x, ncomp = 1) +params[[3]] <- list(x = x, ncomp = A) +params[[4]] <- list(x = x, ncomp = A, nseg = 4) +params[[5]] <- list(x = x, ncomp = A, nseg = 10) +params[[6]] <- list(x = x, ncomp = A, nseg = nrow(X)) +params[[7]] <- list(x = x, ncomp = A, nseg = 4, scale = TRUE) +params[[8]] <- list(x = x, ncomp = A, nseg = 10, scale = TRUE) +params[[9]] <- list(x = x, ncomp = A, nseg = nrow(X), scale = TRUE) + +context("pcv: for PCA") +for (i in seq_along(params)) { + test_that(paste0("pcv() for PCA works well with parameters set #", i), { + x.pv <- do.call(pcv, params[[i]]) + expect_equal(nrow(x.pv), nrow(x)) + expect_equal(ncol(x.pv), ncol(x)) + expect_gt(ks.test(x, x.pv)$p.value, 0.01) + }) +} From fed4718d6aff1586a771649f1611e2ad3f492f35 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 08:55:01 +0100 Subject: [PATCH 02/16] fixes bug #95 and related issues --- R/plsdares.R | 2 +- R/regmodel.R | 2 +- tests/testthat/test-plsda.R | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/R/plsdares.R b/R/plsdares.R index 4540fb9..1548f06 100755 --- a/R/plsdares.R +++ b/R/plsdares.R @@ -129,7 +129,7 @@ #' @export plsdares <- function(plsres, cres) { obj <- c(plsres, cres) - class(obj) <- c("plsdares", "classres", "plsres") + class(obj) <- c("plsdares", "classres", "plsres", "regres") obj$call <- match.call() return(obj) diff --git a/R/regmodel.R b/R/regmodel.R index a261f9f..bc183f4 100755 --- a/R/regmodel.R +++ b/R/regmodel.R @@ -325,7 +325,7 @@ plotPredictions.regmodel <- function(obj, ncomp = obj$ncomp.selected, ny = 1, stop("Wrong value for 'ncomp' parameter.") } - plot_data <- lapply(res, plotPredictions, ny = ny, ncomp = ncomp, show.plot = FALSE) + plot_data <- lapply(res, plotPredictions.regres, ny = ny, ncomp = ncomp, show.plot = FALSE) attr(plot_data[[1]], "name") <- sprintf("Predictions (ncomp = %d)", ncomp) plots <- mdaplotg(plot_data, type = "p", legend.position = legend.position, ...) diff --git a/tests/testthat/test-plsda.R b/tests/testthat/test-plsda.R index 5448b09..c8c4977 100644 --- a/tests/testthat/test-plsda.R +++ b/tests/testthat/test-plsda.R @@ -110,3 +110,16 @@ test_that("predictions work fine", { summary(res) print(res) }) + +test_that("Bug #95: plots for PLS regression can be also used with PLS-DA objects", { + data(people) + X <- people[, -c(3, 9)] + c <- as.factor(people[, 9]) + m <- plsda(X, c, 3, cv = 1) + + expect_equal(class(m$res$cal), c("plsdares", "classres", "plsres", "regres")) + + par(mfrow = c(1, 2)) + expect_silent(plotRMSE(m)) + expect_silent(plotPredictions.regmodel(m)) +}) \ No newline at end of file From a47bd035f3c3f6afb297fb0afba936744b33e5bd Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 09:50:22 +0100 Subject: [PATCH 03/16] fixes #94 --- R/ldecomp.R | 23 +++++++++++------- tests/testthat/test-ldecompplots.R | 38 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/R/ldecomp.R b/R/ldecomp.R index f84cae5..5477787 100755 --- a/R/ldecomp.R +++ b/R/ldecomp.R @@ -988,12 +988,18 @@ ldecomp.getLimitsCoordinates <- function(Qlim, T2lim, ncomp, norm, log, ldecomp.plotResiduals <- function(res, Qlim, T2lim, ncomp, log = FALSE, norm = FALSE, cgroup = NULL, xlim = NULL, ylim = NULL, show.limits = c(TRUE, TRUE), lim.col = c("darkgray", "darkgray"), lim.lwd = c(1, 1), lim.lty = c(2, 3), - show.legend = TRUE, legend.position = "topright", ...) { + show.legend = TRUE, legend.position = "topright", show.excluded = FALSE, ...) { - getPlotLim <- function(lim, pd, ld, dim, show.limits) { + # return column with values either with or without excluded outliers + getValues <- function(x, dim) { + return(if (show.excluded) x[, dim] else mda.purgeRows(x)[, dim]) + } + + # compute limits fo axis depending on values and position of critical limits + getPlotLim <- function(lim, pd, ld, dim) { if (!is.null(lim) || all(!show.limits)) return(lim) - limits <- if (show.limits[[2]]) ld$outliers else ld$extremes - return(c(0, max(sapply(pd, function(x) max(x[, dim])), limits[, dim])) * 1.05) + limits <- if (show.limits[[2]]) max(ld$outliers[, dim]) else max(ld$extremes[, dim]) + return( c(0, max(sapply(pd, function(x) { max(c(getValues(x, dim), limits)) * 1.05}))) ) } # check that show.limits is logical @@ -1015,15 +1021,16 @@ ldecomp.plotResiduals <- function(res, Qlim, T2lim, ncomp, log = FALSE, norm = F # get coordinates for critical limits lim_data <- ldecomp.getLimitsCoordinates(Qlim, T2lim, ncomp = ncomp, norm = norm, log = log) - xlim <- getPlotLim(xlim, plot_data, lim_data, 1, show.limits) - ylim <- getPlotLim(ylim, plot_data, lim_data, 2, show.limits) + xlim <- getPlotLim(xlim, plot_data, lim_data, 1) + ylim <- getPlotLim(ylim, plot_data, lim_data, 2) # make plot if (length(plot_data) == 1) { - mdaplot(plot_data[[1]], type = "p", xlim = xlim, ylim = ylim, cgroup = cgroup, ...) + mdaplot(plot_data[[1]], type = "p", xlim = xlim, ylim = ylim, cgroup = cgroup, + show.excluded = show.excluded, ...) } else { mdaplotg(plot_data, type = "p", xlim = xlim, ylim = ylim, show.legend = show.legend, - legend.position = legend.position, ...) + show.excluded = show.excluded, legend.position = legend.position, ...) } # show critical limits diff --git a/tests/testthat/test-ldecompplots.R b/tests/testthat/test-ldecompplots.R index ca74947..9039ef8 100644 --- a/tests/testthat/test-ldecompplots.R +++ b/tests/testthat/test-ldecompplots.R @@ -195,3 +195,41 @@ test_that("variance plot works fine.", { plotCumVariance(obj, type = "h", col = "red", show.labels = TRUE, labels = "names") }) }) + +test_that("Bug 94: limits for residuals plot are computed correctly", { + set.seed(42) + X <- matrix(rnorm(100 * 10), 100, 10) + X[20, ] <- X[20, ] * 10 + + m1 <- pca(X, 5) + m2 <- pca(X, 5, exclrows = 20) + + ldecomp.plotResiduals(res = list(m1$res$cal), m1$Qlim, m1$T2lim, 5, norm = TRUE) + p1 <- par("usr") + + ldecomp.plotResiduals(res = list(m2$res$cal), m2$Qlim, m2$T2lim, 5, norm = TRUE) + p2 <- par("usr") + + ldecomp.plotResiduals(res = list(m2$res$cal), m2$Qlim, m2$T2lim, 5, norm = TRUE, show.limits = FALSE) + p3 <- par("usr") + + ldecomp.plotResiduals(res = list(m2$res$cal), m2$Qlim, m2$T2lim, 5, norm = TRUE, show.excluded = TRUE) + p4 <- par("usr") + + # check that limits without outlier are small enough + expect_lt(p1[2], 35) + expect_lt(p1[4], 7) + + # check that limits with hidden outlier are small enough + expect_lt(p2[2], 8) + expect_lt(p2[4], 8) + + # check that limits with shown outlier are larger + expect_lt(p2[2], p4[2]) + expect_lt(p2[4], p4[4]) + + # now limits and hidden outliers should have smallest limits + expect_lt(p3[2], p2[2]) + expect_lt(p3[4], p2[4]) + +}) \ No newline at end of file From 7216e383c47048977bc4e5ca52a2f96adbe5af10 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 10:14:02 +0100 Subject: [PATCH 04/16] added reference to help text for pcv() method --- R/pcv.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/pcv.R b/R/pcv.R index aca1de6..96c33ab 100644 --- a/R/pcv.R +++ b/R/pcv.R @@ -16,6 +16,11 @@ #' #' All details can be found in [1] #' +#' @references +#' 1. Kucheryavskiy, S., Zhilin, S., Rodionova, O., & Pomerantsev, A. +#' Procrustes Cross-Validation—A Bridge between Cross-Validation and Independent Validation Sets. +#' Analytical Chemistry, 92 (17), 2020. pp.11842–11850. DOI: 10.1021/acs.analchem.0c02175 +#' #' @return #' Pseudo-validation matrix (IxJ) #' From bba0e658ade5b94e3e00fe357a11c84b6edc55e3 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 10:14:35 +0100 Subject: [PATCH 05/16] changes to help and info text as well as release notes for 0.11.3 --- NEWS.md | 12 ++++++++++++ README.md | 8 +++++--- man/mdatools.Rd | 14 +++++++++----- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 175db33..88a1c2a 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +v.0.11.3 +======== + +* fixed bug [#94](https://github.com/svkucheryavski/mdatools/issues/94) which caused wrong limits in PCA distance plot when outliers are present but excluded. + +* fixed bug [#95](https://github.com/svkucheryavski/mdatools/issues/95) which lead to issues when PLS regression methods (e.g. `plotRMSE()`) are used for PLS-DA model object. + +* added [Procrustes Cross-Validation method](https://doi.org/10.1021/acs.analchem.0c02175), `pcv()` (it is also available as a [separate project](https://github.com/svkucheryavski/pcv)). + +* added link to [YouTube channel](https://www.youtube.com/channel/UCox0H4utfMq4FIu2kymuyTA) with Chemometric course based on *mdatools* package. + + v.0.11.2 ======== diff --git a/README.md b/README.md index 0598e52..30ff7ec 100755 --- a/README.md +++ b/README.md @@ -11,22 +11,24 @@ Multivariate Data Analysis Tools For more details and examples read a [Bookdown tutorial](https://mdatools.com/docs/). The project website, [mdatools.com](https://mdatools.com), contains additional information about supplementary materials and tools. +You can also take video-lectures from [YouTube channel](https://www.youtube.com/channel/UCox0H4utfMq4FIu2kymuyTA) devoted to introductory Chemometric course I give to master students. The lectures explain theory behind basic Chemometric methods but also show how to use them in *mdatools*. + If you want to cite the package, please use the following: Sergey Kucheryavskiy, *mdatools – R package for chemometrics*, Chemometrics and Intelligent Laboratory Systems, Volume 198, 2020 (DOI: [10.1016/j.chemolab.2020.103937](https://doi.org/10.1016/j.chemolab.2020.103937)). What is new ----------- -Latest release (0.11.2) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. +Latest release (0.11.3) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. How to install -------------- -The package is available from CRAN by usual installing procedure. However, due to restrictions in CRAN politics regarding number of submissions (one in 3-4 month), mostly major releases will be published there (with 2-3 weeks delay after GitHub release as more thorough testing is needed). You can [download](https://github.com/svkucheryavski/mdatools/releases) a zip-file with source package and install it using the `install.packages` command, e.g. if the downloaded file is `mdatools_0.11.2.tar.gz` and it is located in a current working directory, just run the following: +The package is available from CRAN by usual installing procedure. However, due to restrictions in CRAN politics regarding number of submissions (one in 3-4 month), mostly major releases will be published there (with 2-3 weeks delay after GitHub release as more thorough testing is needed). You can [download](https://github.com/svkucheryavski/mdatools/releases) a zip-file with source package and install it using the `install.packages` command, e.g. if the downloaded file is `mdatools_0.11.3.tar.gz` and it is located in a current working directory, just run the following: ``` -install.packages("mdatools_0.11.2.tar.gz") +install.packages("mdatools_0.11.3.tar.gz") ``` If you have `devtools` package installed, the following command will install the current developer version from the master branch of GitHub repository (do not forget to load the `devtools` package first): diff --git a/man/mdatools.Rd b/man/mdatools.Rd index 7271620..e715882 100644 --- a/man/mdatools.Rd +++ b/man/mdatools.Rd @@ -6,17 +6,17 @@ Package for Multivariate Data Analysis (Chemometrics) } \description{ -This package contains classes and functions for most common methods used in Chemometrics. For a complete list of functions, use \code{library(help = 'mdatools')}. +This package contains classes and functions for most common methods used in Chemometrics. For a complete list of functions, use \code{library(help = 'mdatools')}. } \details{ -The porject is hosted on GitHub, there you can also find a Bookdown user tutorial (https://svkucheryavski.github.io/mdatools/) explaining most important features of the package. +The project is hosted on GitHub (https://svkucheryavski.github.io/mdatools/), there you can also find a Bookdown user tutorial explaining most important features of the package. There is also a dedicated YouTube channel (https://www.youtube.com/channel/UCox0H4utfMq4FIu2kymuyTA) with introductory Chemometric course with examples based on mdatools functionality. Every method is represented by two classes: a model class for keeping all parameters and information about the model, and a class for keeping and visualising results of applying the model to particular data values. -Every model class, e.g. \code{\link{pls}}, has all needed functionality implemented as class methods, including model calibration, validation (test set and cross-validation), visualisation of the calibration and validation results with various plots and summary statistics. +Every model class, e.g. \code{\link{pls}}, has all needed functionality implemented as class methods, including model calibration, validation (test set and cross-validation), visualisation of the calibration and validation results with various plots and summary statistics. -So far the following modelling methods are implemented: +So far the following modelling and validation methods are implemented: \tabular{ll}{ \code{\link{pca}}, \code{\link{pcares}} \tab Principal Component Analysis (PCA).\cr \code{\link{pls}}, \code{\link{plsres}} \tab Partial Least Squares regression (PLS).\cr @@ -25,6 +25,9 @@ So far the following modelling methods are implemented: \code{\link{plsda}}, \code{\link{plsdares}} \tab Partial Least Squares Dscriminant Analysis (PLS-DA).\cr \code{\link{randtest}} \tab Randomization test for PLS-regression.\cr \code{\link{ipls}} \tab Interval PLS variable.\cr + \code{\link{mcrals}} \tab Multivariate Curve Resolution with Alternating Least Squares.\cr + \code{\link{mcrpure}} \tab Multivariate Curve Resolution with Purity approach.\cr + \code{\link{pcv}} \tab Procrustes Cross Validation.\cr } Methods for data preprocessing: @@ -34,9 +37,10 @@ Methods for data preprocessing: \code{\link{prep.snv}} \tab Standard normal variate.\cr \code{\link{prep.msc}} \tab Multiplicative scatter correction.\cr \code{\link{prep.norm}} \tab Spectra normalization.\cr + \code{\link{prep.alsbasecorr}} \tab Baseline correction with Asymmetric Least Squares.\cr } -All plotting methods are based on two functions, \code{\link{mdaplot}} and \code{\link{mdaplotg}}. The functions extend the basic functionality of R plots and allow to make automatic legend and color grouping of data points or lines with colorbar legend, automatically adjust axes limits when several data groups are plotted and so on. +All plotting methods are based on two functions, \code{\link{mdaplot}} and \code{\link{mdaplotg}}. The functions extend the basic functionality of R plots and allow to make automatic legend and color grouping of data points or lines with colorbar legend, automatically adjust axes limits when several data groups are plotted and so on. } \author{ From 13dea44be5a900eb13bb4a5d3f0ae90ef842fd11 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 10:32:01 +0100 Subject: [PATCH 06/16] added GitHub action and removed Travis CI configuration --- .github/workflows/test-package.yml | 82 ++++++++++++++++++++++++++++++ .travis.yml | 15 ------ 2 files changed, 82 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/test-package.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml new file mode 100644 index 0000000..a65e9af --- /dev/null +++ b/.github/workflows/test-package.yml @@ -0,0 +1,82 @@ +on: + push: + branches: + - main + - master + - develop + pull_request: + branches: + - main + - master + - develop + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v2 + - uses: r-lib/actions/setup-r@v1 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@v1 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + while read -r cmd + do + eval sudo $cmd + done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("rcmdcheck") + shell: Rscript {0} + + - name: Check + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@main + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1ef7cc5..0000000 --- a/.travis.yml +++ /dev/null @@ -1,15 +0,0 @@ -language: r - -r: - - release - - devel - -r_packages: - - covr - -after_success: - - travis_wait 20 Rscript -e 'library(covr); codecov()' - -script: - - R CMD build . - - travis_wait 20 R CMD check --as-cran *.tar.gz \ No newline at end of file From 157e5267f4c43cec808a6a70537bdda55f0a343c Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 11:29:33 +0100 Subject: [PATCH 07/16] fixed a small bug in pcv test --- tests/testthat/test-pcv.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-pcv.R b/tests/testthat/test-pcv.R index cc317c4..e22e140 100644 --- a/tests/testthat/test-pcv.R +++ b/tests/testthat/test-pcv.R @@ -26,10 +26,10 @@ params[[2]] <- list(x = x, ncomp = 1) params[[3]] <- list(x = x, ncomp = A) params[[4]] <- list(x = x, ncomp = A, nseg = 4) params[[5]] <- list(x = x, ncomp = A, nseg = 10) -params[[6]] <- list(x = x, ncomp = A, nseg = nrow(X)) +params[[6]] <- list(x = x, ncomp = A, nseg = nrow(x)) params[[7]] <- list(x = x, ncomp = A, nseg = 4, scale = TRUE) params[[8]] <- list(x = x, ncomp = A, nseg = 10, scale = TRUE) -params[[9]] <- list(x = x, ncomp = A, nseg = nrow(X), scale = TRUE) +params[[9]] <- list(x = x, ncomp = A, nseg = nrow(x), scale = TRUE) context("pcv: for PCA") for (i in seq_along(params)) { From c482e22183fa0fe7563e25ba0088c64bc90eff22 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 11:57:10 +0100 Subject: [PATCH 08/16] small improvements to help text --- DESCRIPTION | 4 ++-- NAMESPACE | 2 ++ R/ldecomp.R | 2 ++ man/eye.Rd | 17 +++++++++++++++++ man/getR.Rd | 19 ++++++++++++++++++ man/ldecomp.plotResiduals.Rd | 3 +++ man/pcv.Rd | 37 ++++++++++++++++++++++++++++++++++++ man/rotationMatrixToX1.Rd | 17 +++++++++++++++++ 8 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 man/eye.Rd create mode 100644 man/getR.Rd create mode 100644 man/pcv.Rd create mode 100644 man/rotationMatrixToX1.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 69a9f0d..5e711f4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mdatools Title: Multivariate Data Analysis for Chemometrics -Version: 0.11.2 -Date: 2020-10-22 +Version: 0.11.3 +Date: 2021-01-21 Author: Sergey Kucheryavskiy Maintainer: Sergey Kucheryavskiy Description: Projection based methods for preprocessing, diff --git a/NAMESPACE b/NAMESPACE index 8e4bec5..2630f3f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -172,6 +172,7 @@ export(ddmoments.param) export(ddrobust.param) export(ellipse) export(employ) +export(eye) export(fprintf) export(getCalibrationData) export(getConfusionMatrix) @@ -233,6 +234,7 @@ export(pca) export(pca.mvreplace) export(pca.run) export(pcares) +export(pcv) export(pinv) export(plotBars) export(plotBiplot) diff --git a/R/ldecomp.R b/R/ldecomp.R index 5477787..d87c51b 100755 --- a/R/ldecomp.R +++ b/R/ldecomp.R @@ -964,6 +964,8 @@ ldecomp.getLimitsCoordinates <- function(Qlim, T2lim, ncomp, norm, log, #' logical, show or not legend on the plot (if more than one result object) #' @param legend.position #' if legend must be shown, where it should be +#' @param show.excluded +#' logical, show or hide rows marked as excluded (attribute `exclrows`). #' @param ... #' other plot parameters (see \code{mdaplotg} for details) #' diff --git a/man/eye.Rd b/man/eye.Rd new file mode 100644 index 0000000..1d257c9 --- /dev/null +++ b/man/eye.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pcv.R +\name{eye} +\alias{eye} +\title{Create the identity matrix} +\usage{ +eye(n) +} +\arguments{ +\item{n}{Size of the matrix} +} +\value{ +The identity matrix (n x n) +} +\description{ +Create the identity matrix +} diff --git a/man/getR.Rd b/man/getR.Rd new file mode 100644 index 0000000..045b7e7 --- /dev/null +++ b/man/getR.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pcv.R +\name{getR} +\alias{getR} +\title{Creates rotation matrix to map a set vectors \code{base1} to a set of vectors \code{base2}.} +\usage{ +getR(base1, base2) +} +\arguments{ +\item{base1}{Matrix (JxA) with A orthonormal vectors as columns to be rotated (A <= J)} + +\item{base2}{Matrix (JxA) with A orthonormal vectors as columns, \code{base1} should be aligned with} +} +\value{ +Rotation matrix (JxJ) +} +\description{ +In both sets vectors should be orthonormal. +} diff --git a/man/ldecomp.plotResiduals.Rd b/man/ldecomp.plotResiduals.Rd index df384fb..1351769 100644 --- a/man/ldecomp.plotResiduals.Rd +++ b/man/ldecomp.plotResiduals.Rd @@ -20,6 +20,7 @@ ldecomp.plotResiduals( lim.lty = c(2, 3), show.legend = TRUE, legend.position = "topright", + show.excluded = FALSE, ... ) } @@ -54,6 +55,8 @@ ldecomp.plotResiduals( \item{legend.position}{if legend must be shown, where it should be} +\item{show.excluded}{logical, show or hide rows marked as excluded (attribute `exclrows`).} + \item{...}{other plot parameters (see \code{mdaplotg} for details)} } \description{ diff --git a/man/pcv.Rd b/man/pcv.Rd new file mode 100644 index 0000000..f5ae4b6 --- /dev/null +++ b/man/pcv.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pcv.R +\name{pcv} +\alias{pcv} +\title{Compute matrix with pseudo-validation set} +\usage{ +pcv( + x, + ncomp = min(round(nrow(x)/nseg) - 1, col(x), 20), + nseg = 4, + scale = FALSE +) +} +\arguments{ +\item{x}{matrix with calibration set (IxJ)} + +\item{ncomp}{number of components for PCA decomposition} + +\item{nseg}{number of segments in cross-validation} + +\item{scale}{logical, standardize columns of X prior to decompositon or not} +} +\value{ +Pseudo-validation matrix (IxJ) +} +\description{ +The method computes pseudo-validation matrix Xpv, based on PCA decomposition of calibration set X +and systematic (venetian blinds) cross-validation. It is assumed that data rows are ordered +correctly, so systematic cross-validation can be applied. + +All details can be found in [1] +} +\references{ +1. Kucheryavskiy, S., Zhilin, S., Rodionova, O., & Pomerantsev, A. +Procrustes Cross-Validation—A Bridge between Cross-Validation and Independent Validation Sets. +Analytical Chemistry, 92 (17), 2020. pp.11842–11850. DOI: 10.1021/acs.analchem.0c02175 +} diff --git a/man/rotationMatrixToX1.Rd b/man/rotationMatrixToX1.Rd new file mode 100644 index 0000000..8b3b2bf --- /dev/null +++ b/man/rotationMatrixToX1.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pcv.R +\name{rotationMatrixToX1} +\alias{rotationMatrixToX1} +\title{Creates a rotation matrix to map a vector x to [1 0 0 ... 0]} +\usage{ +rotationMatrixToX1(x) +} +\arguments{ +\item{x}{Vector (sequence with J coordinates)} +} +\value{ +Rotation matrix (JxJ) +} +\description{ +Creates a rotation matrix to map a vector x to [1 0 0 ... 0] +} From d55e3d14efd785dfd1f7308224f67f70293aaa8d Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 12:52:22 +0100 Subject: [PATCH 09/16] switched back to fixed width of bars in barplots --- R/plotseries.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/plotseries.R b/R/plotseries.R index 63d691b..988361f 100644 --- a/R/plotseries.R +++ b/R/plotseries.R @@ -640,10 +640,13 @@ plotBars <- function(ps, col = ps$col, bwd = 0.8, border = NA, force.x.values = y <- ps$y_values[1, ] if (length(x) > 1) { - bwd_left <- c(x[seq(2, length(x))] - x[seq(1, length(x) - 1)]) - bwd_right <- -c(x[seq(1, length(x) - 1)] - x[seq(2, length(x))]) - bwd_left <- c(bwd_left[1], bwd_left) * bwd / 2 - bwd_right <- c(bwd_right, bwd_right[length(bwd_right)]) * bwd / 2 + # this gives variable width for bars and does not work well + #bwd_left <- c(x[seq(2, length(x))] - x[seq(1, length(x) - 1)]) + #bwd_right <- -c(x[seq(1, length(x) - 1)] - x[seq(2, length(x))]) + #bwd_left <- c(bwd_left[1], bwd_left) * bwd / 2 + #bwd_right <- c(bwd_right, bwd_right[length(bwd_right)]) * bwd / 2 + dx <- min(diff(x)) + bwd_right <- bwd_left <- dx * bwd / 2 } else { bwd_left <- bwd_right <- bwd * x / 2 } From aa26136e181dad1f2eb1d7a400362429924c6352 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 21 Jan 2021 13:01:57 +0100 Subject: [PATCH 10/16] removed sink() from one of the tests to see how it works on r-devel --- tests/testthat/test-mdaplot2.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-mdaplot2.R b/tests/testthat/test-mdaplot2.R index ae5ce86..473332d 100644 --- a/tests/testthat/test-mdaplot2.R +++ b/tests/testthat/test-mdaplot2.R @@ -4,12 +4,12 @@ setup({ pdf(file = tempfile("mdatools-test-mdaplot2-", fileext = ".pdf")) - sink(tempfile("mdatools-test-mdaplot2-", fileext = ".txt"), append = FALSE, split = FALSE) + #sink(tempfile("mdatools-test-mdaplot2-", fileext = ".txt"), append = FALSE, split = FALSE) }) teardown({ dev.off() - sink() + #sink() }) par(mfrow = c(2, 2)) From bb0a1bbb96abe7abbe61242c60b028efe3017caa Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 22 Jan 2021 09:13:35 +0100 Subject: [PATCH 11/16] added additional check for parameter "cgroup" --- R/mdaplot.R | 4 ++++ tests/testthat/test-mdaplot2.R | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/R/mdaplot.R b/R/mdaplot.R index ed7e6be..b11cb17 100755 --- a/R/mdaplot.R +++ b/R/mdaplot.R @@ -273,6 +273,10 @@ mdaplot.getColors <- function(ngroups = NULL, cgroup = NULL, colmap = "default", return(mdaplot.prepareColors(palette, ngroups, opacity)) } + if (!is.null(dim(cgroup))) { + stop("Parameter 'cgroup' should be a vector of values or a factor.") + } + # if cgroup is factor return vector with corresponding values if (is.factor(cgroup)) { ngroups <- length(attr(cgroup, "levels")) diff --git a/tests/testthat/test-mdaplot2.R b/tests/testthat/test-mdaplot2.R index 473332d..51ba08e 100644 --- a/tests/testthat/test-mdaplot2.R +++ b/tests/testthat/test-mdaplot2.R @@ -4,12 +4,12 @@ setup({ pdf(file = tempfile("mdatools-test-mdaplot2-", fileext = ".pdf")) - #sink(tempfile("mdatools-test-mdaplot2-", fileext = ".txt"), append = FALSE, split = FALSE) + sink(tempfile("mdatools-test-mdaplot2-", fileext = ".txt"), append = FALSE, split = FALSE) }) teardown({ dev.off() - #sink() + sink() }) par(mfrow = c(2, 2)) @@ -60,6 +60,10 @@ test_that("excluded values and color grouping work fine together", { expect_silent(mdaplot(pd, cgroup = d, main = "Colormap: jet", colmap = "jet")) expect_silent(mdaplot(pd, cgroup = d, main = "Colormap: user", colmap = usr_cmap1)) expect_silent(mdaplot(pd, cgroup = d, main = "Colormap: user", colmap = usr_cmap2)) + + # if cgroup is a matrix or is a vector - raise an error + expect_error(mdaplot(pd, cgroup = as.matrix(d))) + expect_error(mdaplot(pd, cgroup = as.data.frame(d))) }) par(mfrow = c(2, 2)) @@ -75,7 +79,7 @@ test_that("opacity parameter works well together with color grouping", { expect_silent(tf(type = "p", cgroup = people[, "Wine"], opacity = 0.5)) expect_silent(tf(type = "l", cgroup = people[, "Wine"], opacity = 0.5)) expect_silent(tf(type = "b", cgroup = people[, "Wine"], opacity = 0.5)) - expect_silent(tf(type = "h", cgroup = people[1, ], opacity = 0.5)) + expect_silent(tf(type = "h", cgroup = as.numeric(people[1, ]), opacity = 0.5)) }) par(mfrow = c(4, 2)) From 4aed860e5292a0a5b7df0c99981c7e004fbfde24 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 22 Jan 2021 10:03:00 +0100 Subject: [PATCH 12/16] refactoring og preprocessing methods and added new: prep.ref2km() --- R/prep.R | 250 ++++++++++++++++++++----------------- man/prep.generic.Rd | 18 +++ man/prep.ref2km.Rd | 22 ++++ tests/testthat/test-prep.R | 19 ++- 4 files changed, 191 insertions(+), 118 deletions(-) create mode 100644 man/prep.generic.Rd create mode 100644 man/prep.ref2km.Rd diff --git a/R/prep.R b/R/prep.R index c20474b..1024792 100755 --- a/R/prep.R +++ b/R/prep.R @@ -26,42 +26,40 @@ #' @export prep.autoscale <- function(data, center = TRUE, scale = FALSE, max.cov = 0) { - attrs <- mda.getattr(data) - dimnames <- dimnames(data) + f <- function(data, center, scale, max.cov) { + # define values for centering + if (is.logical(center) && center) center <- apply(data, 2, mean) - # define values for centering - if (is.logical(center) && center) center <- apply(data, 2, mean) - - if (is.numeric(center) && length(center) != ncol(data)) { - stop("Number of values in 'center' should be the same as number of columns in 'daata'") - } + if (is.numeric(center) && length(center) != ncol(data)) { + stop("Number of values in 'center' should be the same as number of columns in 'daata'") + } - # define values for weigting - if (is.logical(scale) && scale) scale <- apply(data, 2, sd) + # define values for weigting + if (is.logical(scale) && scale) scale <- apply(data, 2, sd) - if (is.numeric(scale) && length(scale) != ncol(data)) { - stop("Number of values in 'scale' should be the same as number of columns in 'daata'") - } + if (is.numeric(scale) && length(scale) != ncol(data)) { + stop("Number of values in 'scale' should be the same as number of columns in 'daata'") + } - # compute coefficient of variation and set scale to 1 if it is below - # a user defined threshold - if (is.numeric(scale)) { - m <- if (is.numeric(center)) center else apply(data, 2, mean) - cv <- scale / abs(m) * 100 - scale[is.nan(cv) | cv <= max.cov] <- 1 - } + # compute coefficient of variation and set scale to 1 if it is below + # a user defined threshold + if (is.numeric(scale)) { + m <- if (is.numeric(center)) center else apply(data, 2, mean) + cv <- scale / abs(m) * 100 + scale[is.nan(cv) | cv <= max.cov] <- 1 + } - # make autoscaling and attach preprocessing attributes - data <- scale(data, center = center, scale = scale) + # make autoscaling and attach preprocessing attributes + data <- scale(data, center = center, scale = scale) + attr(data, "scaled:center") <- NULL + attr(data, "scaled:scale") <- NULL + attr(data, "prep:center") <- center + attr(data, "prep:scale") <- scale - data <- mda.setattr(data, attrs) - attr(data, "scaled:center") <- NULL - attr(data, "scaled:scale") <- NULL - attr(data, "prep:center") <- center - attr(data, "prep:scale") <- scale - dimnames(data) <- dimnames + return(data) + } - return(data) + return(prep.generic(data, f, center = center, scale = scale, max.cov = max.cov)) } #' Standard Normal Variate transformation @@ -97,14 +95,9 @@ prep.autoscale <- function(data, center = TRUE, scale = FALSE, max.cov = 0) { #' #' @export prep.snv <- function(data) { - attrs <- mda.getattr(data) - dimnames <- dimnames(data) - - data <- t(scale(t(data), center = TRUE, scale = TRUE)) - data <- mda.setattr(data, attrs) - dimnames(data) <- dimnames - return(data) + f <- function(data) t(scale(t(data), center = TRUE, scale = TRUE)) + return(prep.generic(data, f)) } #' Normalization @@ -122,25 +115,21 @@ prep.snv <- function(data) { #' #' @export prep.norm <- function(data, type = "area") { - attrs <- mda.getattr(data) - dimnames <- dimnames(data) - - w <- switch( - type, - "area" = apply(abs(data), 1, sum), - "length" = sqrt(apply(data^2, 1, sum)), - "sum" = apply(data, 1, sum) - ) - - if (is.null(w)) { - stop("Wrong value for argument 'type'.") - } - data <- sweep(data, 1, w, "/") - data <- mda.setattr(data, attrs) - dimnames(data) <- dimnames + f <- function(data, type) { + + w <- switch( + type, + "area" = apply(abs(data), 1, sum), + "length" = sqrt(apply(data^2, 1, sum)), + "sum" = apply(data, 1, sum) + ) - return(data) + if (is.null(w)) stop("Wrong value for argument 'type'.") + return(sweep(data, 1, w, "/")) + } + + return(prep.generic(data, f, type = type)) } #' Savytzky-Golay filter @@ -159,45 +148,30 @@ prep.norm <- function(data, type = "area") { #' #' @export prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { - attrs <- mda.getattr(data) - dimnames <- dimnames(data) - - if (width < 3) { - stop("Filter width ('width') should be equal at least to 3.") - } - - if (width %% 2 != 1) { - stop("Filter width ('width') should be an odd integer number.") - } - - if (dorder < 0 || dorder > 2) { - stop("Wrong value for the derivative order (should be 0, 1, or 2).") - } - - if (porder < 0 || porder > 4) { - stop("Wrong value for the polynomial degree (should be integer number between 0 and 4).") - } - - if (porder < dorder) { - stop("Polynomal degree ('porder') should not be smaller the derivative order ('dorder').") - } - nobj <- nrow(data) - nvar <- ncol(data) - pdata <- matrix(0, ncol = nvar, nrow = nobj) + f <- function(data, width, porder, dorder) { + stopifnot("Filter width ('width') should be equal at least to 3." = width > 2) + stopifnot("Filter width ('width') should be an odd integer number." = width %% 2 == 1) + stopifnot("Wrong value for the derivative order (should be 0, 1, or 2)." = dorder %in% (0:2)) + stopifnot("Wrong value for the polynomial degree (should be integer number between 0 and 4)." = porder %in% (0:4)) + stopifnot("Polynomal degree ('porder') should not be smaller the derivative order ('dorder')." = porder >= dorder) + + nobj <- nrow(data) + nvar <- ncol(data) + pdata <- matrix(0, ncol = nvar, nrow = nobj) + + for (i in seq_len(nobj)) { + d <- data[i, ] + w <- (width - 1) / 2 + f <- pinv(outer(-w:w, 0:porder, FUN = "^")) + d <- convolve(d, rev(f[dorder + 1, ]), type = "o") + pdata[i, ] <- d[(w + 1) : (length(d) - w)] + } - for (i in seq_len(nobj)) { - d <- data[i, ] - w <- (width - 1) / 2 - f <- pinv(outer(-w:w, 0:porder, FUN = "^")) - d <- convolve(d, rev(f[dorder + 1, ]), type = "o") - pdata[i, ] <- d[(w + 1) : (length(d) - w)] + return(pdata) } - pdata <- mda.setattr(pdata, attrs) - dimnames(pdata) <- dimnames - - return(pdata) + return(prep.generic(data, f, width = width, porder = porder, dorder = dorder)) } #' Multiplicative Scatter Correction transformation @@ -233,49 +207,57 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { #' #' @export prep.msc <- function(spectra, mspectrum = NULL) { - attrs <- mda.getattr(spectra) - dimnames <- dimnames(spectra) - if (is.null(mspectrum)) { - mspectrum <- apply(spectra, 2, mean) - } + f <- function(spectra, mspectrum) { + if (is.null(mspectrum)) { + mspectrum <- apply(spectra, 2, mean) + } - if (!is.null(mspectrum)) { - dim(mspectrum) <- NULL - } + if (!is.null(mspectrum)) { + dim(mspectrum) <- NULL + } - if (length(mspectrum) != ncol(spectra)) { - stop("Length of 'mspectrum' should be the same as number of columns in 'spectra'.") - } + if (length(mspectrum) != ncol(spectra)) { + stop("Length of 'mspectrum' should be the same as number of columns in 'spectra'.") + } - cspectra <- matrix(0, nrow = nrow(spectra), ncol = ncol(spectra)) - for (i in seq_len(nrow(spectra))) { - coef <- coef(lm(spectra[i, ] ~ mspectrum)) - cspectra[i, ] <- (spectra[i, ] - coef[1]) / coef[2] - } + pspectra <- matrix(0, nrow = nrow(spectra), ncol = ncol(spectra)) + for (i in seq_len(nrow(spectra))) { + coef <- coef(lm(spectra[i, ] ~ mspectrum)) + pspectra[i, ] <- (spectra[i, ] - coef[1]) / coef[2] + } - cspectra <- mda.setattr(cspectra, attrs) - attr(cspectra, "mspectrum") <- mspectrum - dimnames(cspectra) <- dimnames + attr(pspectra, "mspectrum") <- mspectrum + return(pspectra) + } - return(cspectra) + return(prep.generic(spectra, f, mspectrum = mspectrum)) } -#' Pseudo-inverse matrix + +#' Kubelka-Munk transformation #' #' @description -#' Computes pseudo-inverse matrix using SVD +#' Applies Kubelka-Munk (km) transformation to data matrix (spectra) #' -#' @param data -#' a matrix with data values to compute inverse for +#' @param spectra +#' a matrix with spectra values (absolute reflectance values) +#' +#' @return +#' preprocessed spectra. +#' +#' @details +#' Kubelka-Munk is useful preprocessing method for diffuse reflection spectra (e.g. taken for +#' powders or rough surface). It transforms the reflectance spectra R to K/M units as follows: +#' (1 - R)^2 / 2R #' #' @export -pinv <- function(data) { - # Calculates pseudo-inverse of data matrix - s <- svd(data) - s$v %*% diag(1 / s$d) %*% t(s$u) -} +prep.ref2km <- function(spectra) { + stopifnot("Can't use Kubelka-Munk transformation as some of the values are zeros or negative." = all(spectra > 0)) + f <- function(x) (1 - x)^2 / (2 * x) + return(prep.generic(spectra, f)) +} #' Baseline correction using assymetric least squares #' @@ -347,3 +329,37 @@ prep.alsbasecorr <- function(spectra, plambda = 5, p = 0.1, max.niter = 10) { return(pspectra) } + +#' Pseudo-inverse matrix +#' +#' @description +#' Computes pseudo-inverse matrix using SVD +#' +#' @param data +#' a matrix with data values to compute inverse for +#' +#' @export +pinv <- function(data) { + # Calculates pseudo-inverse of data matrix + s <- svd(data) + s$v %*% diag(1 / s$d) %*% t(s$u) +} + +#' Generic function for preprocessing +#' +#' @param x +#' data matrix to be preprocessed +#' @param f +#' function for preprocessing +#' @param ... +#' arguments for the function f +#' +#' +prep.generic <- function(x, f, ...) { + attrs <- mda.getattr(x) + dimnames <- dimnames(x) + x.p <- f(x, ...) + x.p <- mda.setattr(x.p, attrs) + dimnames(x.p) <- dimnames + return(x.p) +} \ No newline at end of file diff --git a/man/prep.generic.Rd b/man/prep.generic.Rd new file mode 100644 index 0000000..400ca75 --- /dev/null +++ b/man/prep.generic.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{prep.generic} +\alias{prep.generic} +\title{Generic function for preprocessing} +\usage{ +prep.generic(x, f, ...) +} +\arguments{ +\item{x}{data matrix to be preprocessed} + +\item{f}{function for preprocessing} + +\item{...}{arguments for the function f} +} +\description{ +Generic function for preprocessing +} diff --git a/man/prep.ref2km.Rd b/man/prep.ref2km.Rd new file mode 100644 index 0000000..3b16ea7 --- /dev/null +++ b/man/prep.ref2km.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{prep.ref2km} +\alias{prep.ref2km} +\title{Kubelka-Munk transformation} +\usage{ +prep.ref2km(spectra) +} +\arguments{ +\item{spectra}{a matrix with spectra values (absolute reflectance values)} +} +\value{ +preprocessed spectra. +} +\description{ +Applies Kubelka-Munk (km) transformation to data matrix (spectra) +} +\details{ +Kubelka-Munk is useful preprocessing method for diffuse reflection spectra (e.g. taken for +powders or rough surface). It transforms the reflectance spectra R to K/M units as follows: +(1 - R)^2 / 2R +} diff --git a/tests/testthat/test-prep.R b/tests/testthat/test-prep.R index a866d1f..3a33763 100644 --- a/tests/testthat/test-prep.R +++ b/tests/testthat/test-prep.R @@ -57,7 +57,7 @@ test_that("Full autoscaling with limits for max.cov is correct for negative data }) -context("prep: snv, msc, norm") +context("prep: snv, msc, norm, km") test_that("SNV works correctly", { spectra <- simdata$spectra.c @@ -110,6 +110,23 @@ test_that("Normalization to unit length works correctly", { expect_equivalent(apply(pspectra, 1, function(x) sqrt(sum(x^2))), rep(1, nrow(pspectra))) }) + + +test_that("Kubelka-Munk works correctly", { + spectra <- simdata$spectra.c + spectra <- spectra - min(spectra) + 0.01 * max(spectra) + expect_silent(pspectra <- prep.ref2km(spectra)) + expect_equal(mda.getattr(spectra), mda.getattr(pspectra)) + expect_equal(dim(spectra), dim(pspectra)) + expect_true(all(pspectra > 0)) + + spectra[10, 10] <- 0 + expect_error(prep.km(spectra)) + + spectra[10, 10] <- -1 + expect_error(prep.km(spectra)) +}) + context("prep: savgol") test_that("SavGol smoothing works correctly", { From 703716576f4de3bee5363b88d7d503003088b8c3 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 22 Jan 2021 10:03:23 +0100 Subject: [PATCH 13/16] text additions and improvements --- NAMESPACE | 1 + NEWS.md | 6 +++++- README.md | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 2630f3f..808c243 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -299,6 +299,7 @@ export(prep.alsbasecorr) export(prep.autoscale) export(prep.msc) export(prep.norm) +export(prep.ref2km) export(prep.savgol) export(prep.snv) export(prepCalData) diff --git a/NEWS.md b/NEWS.md index 88a1c2a..ec834ba 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,15 @@ v.0.11.3 ======== +* added [Procrustes Cross-Validation method](https://doi.org/10.1021/acs.analchem.0c02175), `pcv()` (it is also available as a [separate project](https://github.com/svkucheryavski/pcv)). + +* added Kubelka-Munk transformation for diffuse reflectance spectra (`prep.ref2km()`). + * fixed bug [#94](https://github.com/svkucheryavski/mdatools/issues/94) which caused wrong limits in PCA distance plot when outliers are present but excluded. * fixed bug [#95](https://github.com/svkucheryavski/mdatools/issues/95) which lead to issues when PLS regression methods (e.g. `plotRMSE()`) are used for PLS-DA model object. -* added [Procrustes Cross-Validation method](https://doi.org/10.1021/acs.analchem.0c02175), `pcv()` (it is also available as a [separate project](https://github.com/svkucheryavski/pcv)). +* added additional check that parameter `cgroup` for plotting functions is provided as a vector or as a factor to avoid confusion. * added link to [YouTube channel](https://www.youtube.com/channel/UCox0H4utfMq4FIu2kymuyTA) with Chemometric course based on *mdatools* package. diff --git a/README.md b/README.md index 30ff7ec..2df171f 100755 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ Multivariate Data Analysis Tools =========================================== +![GitHub build status](https://github.com/actions/mdatools/workflows/R-CMD-check/badge.svg) ![Travis (build status)](https://img.shields.io/travis/svkucheryavski/mdatools?color=blue&style=flat-square "Travis CI build status") ![Downloads (CRAN)](https://cranlogs.r-pkg.org/badges/grand-total/mdatools?color=blue&logo=R&style=flat-square "Downloads from CRAN") ![GitHub All Releases](https://img.shields.io/github/downloads/svkucheryavski/mdatools/total?color=blue&logo=Github&style=flat-square "Downloads from GitHub") From c2c2495901ecef2fbf64314628d4a394e20a2541 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 22 Jan 2021 10:38:55 +0100 Subject: [PATCH 14/16] added MIT license file for GitHub --- .gitignore | 1 + LICENSE.md | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 LICENSE.md diff --git a/.gitignore b/.gitignore index 1b8dad1..7cb37b6 100755 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ .DS_Store .lintr .vscode/* +LICENSE tests/plots/*.pdf tests/plots/*.txt diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..7cf9f2d --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,20 @@ +Copyright (c) 2016-2021 Sergey Kucheryavskiy + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file From 4b0e319a640f3a1bd57d1f796680a6ac18b68135 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 22 Jan 2021 10:45:21 +0100 Subject: [PATCH 15/16] switched build badge from travis ci to github actions --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2df171f..8f9ad7f 100755 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@ Multivariate Data Analysis Tools =========================================== -![GitHub build status](https://github.com/actions/mdatools/workflows/R-CMD-check/badge.svg) -![Travis (build status)](https://img.shields.io/travis/svkucheryavski/mdatools?color=blue&style=flat-square "Travis CI build status") +![GitHub build status](https://github.com/svkucheryavski/mdatools/workflows/R-CMD-check/badge.svg) +![GitHub All Releases](https://img.shields.io/github/downloads/svkucheryavski/mdatools/total?color=blue&logo=Github "Downloads from GitHub") ![Downloads (CRAN)](https://cranlogs.r-pkg.org/badges/grand-total/mdatools?color=blue&logo=R&style=flat-square "Downloads from CRAN") -![GitHub All Releases](https://img.shields.io/github/downloads/svkucheryavski/mdatools/total?color=blue&logo=Github&style=flat-square "Downloads from GitHub") [![codecov](https://codecov.io/gh/svkucheryavski/mdatools/branch/0.10.0/graph/badge.svg?style=flat-square)](https://codecov.io/gh/svkucheryavski/mdatools) From 2dc9b17ad10be9efb1e7c530165c6363df3105fd Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 22 Jan 2021 11:26:11 +0100 Subject: [PATCH 16/16] added ORCID to DESCRIPTION file --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5e711f4..a058af2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: mdatools Title: Multivariate Data Analysis for Chemometrics Version: 0.11.3 Date: 2021-01-21 -Author: Sergey Kucheryavskiy +Author: Sergey Kucheryavskiy () Maintainer: Sergey Kucheryavskiy Description: Projection based methods for preprocessing, exploring and analysis of multivariate data used in chemometrics.