Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
svkucheryavski committed Nov 9, 2018
2 parents 1604c21 + 9398bf7 commit b88fd15
Show file tree
Hide file tree
Showing 11 changed files with 198 additions and 159 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mdatools
Title: Multivariate Data Analysis for Chemometrics
Version: 0.9.1
Date: 2018-07-06
Version: 0.9.2
Date: 2018-11-09
Author: Sergey Kucheryavskiy
Maintainer: Sergey Kucheryavskiy <[email protected]>
Description: Package implements projection based methods for preprocessing,
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
v.0.9.2
=======
* improvements to `ipls()` method plus fixed a bug preventing breaking the selection loop (#56)
* fixed a bug in `selectCompNum()` related to use of Wold criterion (#57)
* fixed a bug with using of `max.cov` parameter in `prep.autoscale()` (#58)
* default `max.cov` value in `prep.autoscale()` is set to 0 (to avoid scaling only of constant variables)
* code refactoring and small improvements
* added tests for `prep.autoscale()`

v.0.9.1
=======
* all plot functions have new `opacity` parameter for semi-transparent colors
Expand Down
2 changes: 2 additions & 0 deletions R/classres.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
#' \code{\link{plotPerformance.classres}} \tab makes plot with misclassified ration, specificity
#' and sensitivity values.\cr
#' }
#'
#' @export
classres = function(c.pred, c.ref = NULL, p.pred = NULL, ncomp.selected = NULL) {
if (!is.null(c.ref)) {
attrs = mda.getattr(c.ref)
Expand Down
4 changes: 2 additions & 2 deletions R/crossval.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#' @return
#' matrix with object indices for each segment
#'
crossval = function(nobj, cv = NULL)
{
#' @export
crossval = function(nobj, cv = NULL) {
methods = c('rand', 'ven', 'loo')

if (is.null(cv))
Expand Down
4 changes: 3 additions & 1 deletion R/defaults.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,11 @@ getSelectivityRatio = function(obj, ...) {
#' a model object
#' @param ncomp
#' number of components to select
#' @param ...
#' other arguments
#'
#' @export
selectCompNum = function(model, ncomp) {
selectCompNum = function(model, ncomp = NULL, ...) {
UseMethod("selectCompNum")
}

Expand Down
101 changes: 33 additions & 68 deletions R/ipls.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@
#'
#' @export
ipls = function(x, y, glob.ncomp = 10, center = T, scale = F, cv = 10,
exclcols = NULL, exclrows = NULL, int.ncomp = 10, int.num = NULL, int.width = NULL,
int.limits = NULL, int.niter = NULL, ncomp.selcrit = 'min', method = 'forward',
silent = F) {
exclcols = NULL, exclrows = NULL, int.ncomp = glob.ncomp, int.num = NULL,
int.width = NULL, int.limits = NULL, int.niter = NULL, ncomp.selcrit = 'min',
method = 'forward', silent = F) {

# process names and values for xaxis
xaxis.name = attr(x, 'xaxis.name')
Expand Down Expand Up @@ -325,12 +325,12 @@ ipls.forward = function(x, y, obj) {

# do loop for max number of intervals
selind = NULL
rmse = Inf
for (i in 1:obj$int.niter) {
if (!obj$silent)
cat(sprintf('Iteration %3d/%3d... ', i, obj$int.niter))

sel = NULL
rmse = 99999999999999
for (l in int.nonselected) {
# combine already selected intervals with the current
ind = obj$int.limits[l, 1]:obj$int.limits[l, 2]
Expand Down Expand Up @@ -361,15 +361,15 @@ ipls.forward = function(x, y, obj) {
# obj$int.limits[l, 1],obj$int.limits[l, 2]))

# else check if rmse has been improved
if (rmse - m$cvres$rmse[1, m$ncomp.selected] > 0)
{
if (rmse > m$cvres$rmse[1, m$ncomp.selected]) {
ncomp = m$ncomp.selected
rmse = m$cvres$rmse[1, ncomp]
r2 = m$cvres$r2[1, ncomp]
rmse = m$cvres$rmse[1, m$ncomp.selected]
r2 = m$cvres$r2[1, m$ncomp.selected]
sel = l
}
}


if (!is.null(sel)) {
selind = c(selind, obj$int.limits[sel, 1]:obj$int.limits[sel, 2])
int.nonselected = int.nonselected[int.nonselected != sel]
Expand All @@ -379,32 +379,22 @@ ipls.forward = function(x, y, obj) {
'n' = sel,
'start' = obj$int.limits[sel, 1],
'end' = obj$int.limits[sel, 2],
'selected' = F,
'selected' = T,
'nComp' = ncomp,
'RMSE' = rmse,
'R2' = r2
))
))

if (!obj$silent)
cat(sprintf('selected interval %3d (RMSECV = %f)\n', sel, rmse))

} else {
# no improvements, quit the outer loop
if (!obj$silent)
cat('no improvements, stop.\n')
break
}
}

# find which variables to select using first local minimum
df = diff(int.stat$RMSE[2:nrow(int.stat)]) > 0
nsel = which(df)[1] + 1

if (any(df))
isel = 2:nsel
else
isel = 2:nrow(int.stat)
int.selected = int.stat$n[isel]
int.stat$selected[isel] = TRUE

# return the selection results
obj$glob.stat = glob.stat
obj$int.stat = int.stat
Expand Down Expand Up @@ -452,6 +442,7 @@ ipls.backward = function(x, y, obj) {

# do loop for max number of intervals
unselind = NULL
rmse = Inf
for (i in 1:obj$int.niter) {
if (length(int.selected) == 1)
break
Expand All @@ -460,9 +451,7 @@ ipls.backward = function(x, y, obj) {
cat(sprintf('Iteration %3d/%3d... ', i, obj$int.niter))

# do loop to select an interval

unsel = NULL
rmse = 99999999999999
for (l in int.selected) {
# combine already selected intervals with the current
ind = obj$int.limits[l, 1]:obj$int.limits[l, 2]
Expand All @@ -473,8 +462,7 @@ ipls.backward = function(x, y, obj) {
cv = obj$cv, light = TRUE, ncomp.selcrit = obj$ncomp.selcrit)

# if first round, build a data frame with statistics for each interval
if (i == 1)
{
if (i == 1) {
glob.stat = rbind(glob.stat,
data.frame(
'n' = l,
Expand Down Expand Up @@ -505,11 +493,11 @@ ipls.backward = function(x, y, obj) {
'R2' = m$cvres$r2[1, m$ncomp.selected]
))
unsel = NULL
} else if (rmse - m$cvres$rmse[1, m$ncomp.selected] > 0) {
} else if (rmse > m$cvres$rmse[1, m$ncomp.selected]) {
# else check if rmse has been improved
ncomp = m$ncomp.selected
rmse = m$cvres$rmse[1, ncomp]
r2 = m$cvres$r2[1, ncomp]
rmse = m$cvres$rmse[1, m$ncomp.selected]
r2 = m$cvres$r2[1, m$ncomp.selected]
unsel = l
}
}
Expand All @@ -534,32 +522,12 @@ ipls.backward = function(x, y, obj) {

} else {
# no improvements, quit the outer loop
if (!obj$silent)
cat('no improvements, stop.\n')
break
}
}

# sort last two rows if all intervals were processed
if (obj$int.niter == obj$int.num) {
nr = nrow(int.stat)
if (int.stat$RMSE[nr] < int.stat$RMSE[nr - 1]) {
a = int.stat[nr, ]
int.stat[nr, ] = int.stat[nr - 1, ]
int.stat[nr - 1, ] = a
}
}

# find which variables to select using first local minimum
df = diff(int.stat$RMSE[2:nrow(int.stat)]) > 0
nsel = which(df)[1] + 2

if (any(df))
isel = nsel:nrow(int.stat)
else
isel = 2:nrow(int.stat)

int.selected = int.stat$n[isel]
int.stat$selected[isel] = TRUE

# return the selection results
obj$glob.stat = glob.stat
obj$int.stat = int.stat
Expand Down Expand Up @@ -640,20 +608,20 @@ plotSelection.ipls = function(obj, glob.ncomp = NULL, main = 'iPLS results',
bars(mids, rmse, col = rgb(0.9, 0.9, 0.9), bwd = bwd, border = rgb(0.8, 0.8, 0.8))
bars(mids[obj$int.selected], rmse[obj$int.selected], col = rgb(0.5, 1.0, 0.6),
bwd = bwd[obj$int.selected], border = rgb(0.4, 0.9, 0.5))

# mean signal
lines(xlabels, xmean, col = rgb(1.0, 0.7, 0.7), lwd = 2)

# number of components for each interval
text(mids, matrix(0.05 * ylim[2], ncol = length(mids)), ncomp,
col = rgb(0.4, 0.4, 0.4), cex = 0.85)

# global model
if (is.null(glob.ncomp))
glob.ncomp = obj$gm$ncomp.selected
else if (glob.ncomp < 1 || glob.ncomp > obj$gm$ncomp)
stop('Wrong value for number of components!')

dx = (xlim[2] - xlim[1])/50
abline(h = obj$gm$cvres$rmse[1, glob.ncomp], lty = 2, col = rgb(0.5, 0.5, 0.5))
text(xlim[2] + dx, obj$gm$cvres$rmse[1, glob.ncomp], glob.ncomp, cex = 0.85,
Expand Down Expand Up @@ -684,11 +652,10 @@ plotSelection.ipls = function(obj, glob.ncomp = NULL, main = 'iPLS results',
#' other arguments
#'
#' @details
#' The plot shows RMSE values obtained at each iteration of the iPLS selection
#' algorithm as bars. The first bar correspond to the global model with all variables
#' included, second - to the model obtained at the first iteration and so on. Number
#' at the bottom of each bar corresponds to the interval included or excluded at the
#' particular iteration. The selected intervals are shown with green color.
#' The plot shows RMSE values obtained at each iteration of the iPLS algorithm as bars. The first
#' bar correspond to the global model with all variables included, second - to the model obtained
#' at the first iteration and so on. Number at the bottom of each bar corresponds to the interval
#' included or excluded at the particular iteration.
#'
#' @seealso
#' \code{\link{summary.ipls}}, \code{\link{plotSelection.ipls}}
Expand All @@ -704,23 +671,21 @@ plotRMSE.ipls = function(obj, glob.ncomp = NULL, main = 'RMSE development', xlab

rmse = obj$int.stat$RMSE
n = obj$int.stat$n
i = obj$int.stat$selected
mids = 0:(length(n) - 1)

if (is.null(xlim))
xlim = c(min(mids) - 0.5, max(mids) + 0.5)
if (is.null(ylim))
ylim = c(0, max(rmse) * 1.1)

# make plot
plot(0, 0, type = 'n', main = main, xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, ...)

# gray and green bars
bars(mids, rmse, col = rgb(0.9, 0.9, 0.9), bwd = 1, border = rgb(0.8, 0.8, 0.8))
bars(mids, rmse, col = rgb(0.5, 1.0, 0.6), bwd = 1, border = rgb(0.4, 0.9, 0.5))
bars(mids[1], rmse[1], col = rgb(0.98, 0.98, 0.98), bwd = 1, border = rgb(0.85, 0.85, 0.85))
bars(mids[i], rmse[i], col = rgb(0.5, 1.0, 0.6), bwd = 1, border = rgb(0.4, 0.9, 0.5))

# number of components for each interval

# interval numbers
text(mids[-1], ylim[1] + (ylim[2] - ylim[1])/25, n[-1], col = rgb(0.4, 0.4, 0.4), cex = 0.80)
}

Expand Down
1 change: 1 addition & 0 deletions R/mdaplots.R
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,7 @@ mdaplot = function(data = NULL, plot.data = NULL, type = 'p', pch = 16, col = NU
# show colorbar if needed
if (!is.null(cgroup) && show.colorbar == T)
mdaplot.showColorbar(cgroup, colmap, lab.col = lab.col, lab.cex = lab.cex)

}

#' Plotting function for several sets of objects
Expand Down
Loading

0 comments on commit b88fd15

Please sign in to comment.