From 1c59be9d3cbdd450143ad53c2e6e570f9e908b08 Mon Sep 17 00:00:00 2001 From: randef1ned <46381867+randef1ned@users.noreply.github.com> Date: Wed, 17 Jan 2024 21:49:40 +0800 Subject: [PATCH] Performance improvement --- DESCRIPTION | 6 ++- NAMESPACE | 7 +++ NEWS.md | 5 +++ R/RcppExports.R | 4 ++ R/mat_util.R | 71 +++++++++++++++++++++++++----- R/mrw.R | 15 ++++--- inst/include/diffusr_RcppExports.h | 21 +++++++++ man/normalize.stochastic.Rd | 4 +- src/RcppExports.cpp | 37 ++++++++++++++++ src/mat_util.cpp | 53 ++++++++++++++++------ src/mrw.cpp | 5 ++- src/neighbors.cpp | 6 +-- 12 files changed, 197 insertions(+), 37 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8297370..d2ba4aa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: diffusr Type: Package Title: Network Diffusion Algorithms -Version: 0.2.1 +Version: 0.2.2 Date: 2018-04-20 Authors@R: person("Simon", "Dirmeier", email = "simon.dirmeier@gmx.de", @@ -27,7 +27,9 @@ Imports: methods, checkmate, matrixStats, - sparseMatrixStats + sparseMatrixStats, + memuse, + pryr Suggests: knitr, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index d9a9cd1..10b6161 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,15 +14,22 @@ importFrom(checkmate,assert_int) importFrom(checkmate,assert_integer) importFrom(checkmate,assert_logical) importFrom(checkmate,assert_number) +importFrom(checkmate,assert_numeric) importFrom(checkmate,assert_vector) importFrom(checkmate,check_matrix) importFrom(checkmate,check_numeric) importFrom(checkmate,test_atomic_vector) +importFrom(checkmate,test_logical) importFrom(checkmate,test_matrix) importFrom(checkmate,test_numeric) importFrom(igraph,components) importFrom(igraph,graph_from_adjacency_matrix) importFrom(matrixStats,colSums2) +importFrom(memuse,Sys.meminfo) +importFrom(memuse,Sys.swapinfo) +importFrom(memuse,howbig) +importFrom(methods,as) importFrom(methods,is) +importFrom(pryr,object_size) importFrom(sparseMatrixStats,colAnyNAs) useDynLib(diffusr) diff --git a/NEWS.md b/NEWS.md index d28897b..65d8515 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# diffusr v0.2.2 + +* Bug fix +* Performance improvement for sparse matrix + # diffusr v0.2.1 * Allow sparse matrix as input diff --git a/R/RcppExports.R b/R/RcppExports.R index 468d41d..6bf7a22 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -9,6 +9,10 @@ stoch_col_norm_ <- function(W) { .Call('_diffusr_stoch_col_norm_', PACKAGE = 'diffusr', W) } +stoch_col_norm_s <- function(W) { + .Call('_diffusr_stoch_col_norm_s', PACKAGE = 'diffusr', W) +} + laplacian_ <- function(W) { .Call('_diffusr_laplacian_', PACKAGE = 'diffusr', W) } diff --git a/R/mat_util.R b/R/mat_util.R index 4a8210f..d140bbc 100644 --- a/R/mat_util.R +++ b/R/mat_util.R @@ -25,17 +25,28 @@ #' @param obj \code{\link[base]{matrix}} (or #' \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, #' \code{\link[base]{vector}}) that is stochstically normalized +#' @param no_r Do not use R for normalization #' @param ... additional params #' @return returns the normalized matrix/vector) #' +#' @useDynLib diffusr +#' #' @importFrom checkmate assert check_matrix test_numeric test_atomic_vector +#' test_logical +#' @importFrom memuse Sys.meminfo Sys.swapinfo howbig +#' @importFrom pryr object_size +#' @importFrom Rcpp sourceCpp #' #' @examples #' W <- matrix(abs(rnorm(10000)), 100, 100) #' stoch.W <- normalize.stochastic(W) -normalize.stochastic <- function(obj, ...) { +normalize.stochastic <- function(obj, no_r = NULL, ...) { is_matrix <- FALSE is_sparse <- FALSE + if (!test_logical(no_r, len = 1, any.missing = FALSE, all.missing = FALSE, + null.ok = FALSE)) { + no_r <- FALSE + } if (test_numeric(obj, lower = 0, finite = TRUE, any.missing = FALSE, all.missing = FALSE, null.ok = FALSE) && test_atomic_vector(obj)) { @@ -55,16 +66,54 @@ normalize.stochastic <- function(obj, ...) { is_matrix <- TRUE } if (is_matrix) { - sums <- colSums3(obj, is_sparse) - if (!all(.equals.double(sums, 1, .001))) { - message("normalizing column vectors!") - empt_col_val <- 1.0 / ncol(obj) + if (no_r) { + if (is_sparse) { + obj <- as(stoch_col_norm_s(obj), "dgCMatrix") + } else { + obj <- stoch_col_norm_(obj) + } + } else { + # check memory usage; + # if there is a memory shortage, then call C function directly + n <- as.numeric(ncol(obj)) + memory_usage <- Sys.meminfo() + swap_usage <- Sys.swapinfo() + free_ram <- memory_usage$freeram@size + free_ram <- free_ram * switch(substring(memory_usage$freeram@unit, 1, 1), + "B" = 1 / 1048576, "K" = 1 / 1024, "M" = 1, + "G" = 1024, "T" = 1048576, + .default = 1073741824) + swap_ram <- swap_usage$freeswap@size + swap_ram <- swap_ram * switch(substring(swap_usage$freeswap@unit, 1, 1), + "B" = 1 / 1048576, "K" = 1 / 1024, "M" = 1, + "G" = 1024, "T" = 1048576, + .default = 1073741824) + free_ram <- free_ram + swap_ram + object_ram_p <- howbig(n, n, unit = "MiB")@size # size in practice + object_ram_t <- as.numeric(object_size(obj)) / 1e6 # size in theory (MiB) + + # if memory is bigger than the temporary variables, then use R + if ((free_ram > object_ram_t * 4)) { + sums <- colSums3(obj, is_sparse) + if (!all(.equals.double(sums, 1, .001))) { + message("normalizing column vectors!") + empt_col_val <- 1.0 / n - obj <- obj / sums[col(obj)] - # check if need wipe zeros - zeros <- which(sums < empt_col_val) - if (length(zeros)) { - obj[, zeros] <- 0.00001 + obj <- obj / sums[col(obj)] + # check if need wipe zeros + zeros <- which(sums < 0.00001) + if (length(zeros)) { + obj[, zeros] <- empt_col_val + } + } + } else if (free_ram < object_ram_p) { + stop("You don't have sufficient memory to normalize. Required: ", + round(object_ram_p / 1024, digits = 3), " GiB, but ", + round(free_ram / 1024, digits = 3), " available.") + } else { + warning("You have just enough memory to normalize; consider ", + "increasing your physical memory capacity in the future!") + obj <- stoch_col_norm_s(obj) } } } else { @@ -83,6 +132,8 @@ normalize.stochastic <- function(obj, ...) { #' @param ... additional params #' @return returns the Laplacian #' +#' @useDynLib diffusr +#' #' @importFrom checkmate assert check_matrix #' @importFrom Rcpp sourceCpp #' diff --git a/R/mrw.R b/R/mrw.R index 06b4197..359b541 100644 --- a/R/mrw.R +++ b/R/mrw.R @@ -80,8 +80,9 @@ #' #' @useDynLib diffusr #' -#' @importFrom checkmate assert_number assert_int assert_logical test_numeric -#' assert test_matrix check_numeric +#' @importFrom checkmate assert_number assert_int assert_logical assert_numeric +#' assert test_matrix check_numeric test_atomic_vector +#' @importFrom methods as #' @importFrom Rcpp sourceCpp #' #' @examples @@ -112,7 +113,7 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4, n_elements <- nrow(graph) if (is.dgCMatrix(graph)) { assert_dgCMatrix(graph) - sparse <- TRUE + sparse <- allow.ergodic <- TRUE } else { assert( test_matrix(graph, mode = "numeric", min.rows = 3, nrows = n_elements, @@ -125,8 +126,9 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4, } # convert p0 if p0 is vector - if (test_numeric(p0, lower = 0, len = n_elements, finite = TRUE, - any.missing = FALSE, all.missing = FALSE, null.ok = FALSE)) { + if (test_atomic_vector(p0)) { + assert_numeric(p0, lower = 0, len = n_elements, finite = TRUE, + any.missing = FALSE, all.missing = FALSE, null.ok = FALSE) p0 <- as.matrix(p0) } else { assert( @@ -150,6 +152,9 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4, if (sparse) { # sparse matrix + if (!is.dgCMatrix(stoch.graph)) { + stoch.graph <- as(stoch.graph, "CsparseMatrix") + } l <- mrwr_s(normalize.stochastic(p0), stoch.graph, r, thresh, niter, do.analytical) } else { diff --git a/inst/include/diffusr_RcppExports.h b/inst/include/diffusr_RcppExports.h index d5d3096..c90e30a 100644 --- a/inst/include/diffusr_RcppExports.h +++ b/inst/include/diffusr_RcppExports.h @@ -67,6 +67,27 @@ namespace diffusr { return Rcpp::as(rcpp_result_gen); } + inline SpMat stoch_col_norm_s(const SpMat& W) { + typedef SEXP(*Ptr_stoch_col_norm_s)(SEXP); + static Ptr_stoch_col_norm_s p_stoch_col_norm_s = NULL; + if (p_stoch_col_norm_s == NULL) { + validateSignature("SpMat(*stoch_col_norm_s)(const SpMat&)"); + p_stoch_col_norm_s = (Ptr_stoch_col_norm_s)R_GetCCallable("diffusr", "_diffusr_stoch_col_norm_s"); + } + RObject rcpp_result_gen; + { + RNGScope RCPP_rngScope_gen; + rcpp_result_gen = p_stoch_col_norm_s(Shield(Rcpp::wrap(W))); + } + if (rcpp_result_gen.inherits("interrupted-error")) + throw Rcpp::internal::InterruptedException(); + if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) + throw Rcpp::LongjumpException(rcpp_result_gen); + if (rcpp_result_gen.inherits("try-error")) + throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); + return Rcpp::as(rcpp_result_gen); + } + inline MatrixXd laplacian_(const MatrixXd& W) { typedef SEXP(*Ptr_laplacian_)(SEXP); static Ptr_laplacian_ p_laplacian_ = NULL; diff --git a/man/normalize.stochastic.Rd b/man/normalize.stochastic.Rd index 46e0155..1733601 100644 --- a/man/normalize.stochastic.Rd +++ b/man/normalize.stochastic.Rd @@ -4,13 +4,15 @@ \alias{normalize.stochastic} \title{Create a stochastically normalized matrix/vector} \usage{ -normalize.stochastic(obj, ...) +normalize.stochastic(obj, no_r = NULL, ...) } \arguments{ \item{obj}{\code{\link[base]{matrix}} (or \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, \code{\link[base]{vector}}) that is stochstically normalized} +\item{no_r}{Do not use R for normalization} + \item{...}{additional params} } \value{ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 8e6b9fa..68c56ed 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -84,6 +84,40 @@ RcppExport SEXP _diffusr_stoch_col_norm_(SEXP WSEXP) { UNPROTECT(1); return rcpp_result_gen; } +// stoch_col_norm_s +SpMat stoch_col_norm_s(const SpMat& W); +static SEXP _diffusr_stoch_col_norm_s_try(SEXP WSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::traits::input_parameter< const SpMat& >::type W(WSEXP); + rcpp_result_gen = Rcpp::wrap(stoch_col_norm_s(W)); + return rcpp_result_gen; +END_RCPP_RETURN_ERROR +} +RcppExport SEXP _diffusr_stoch_col_norm_s(SEXP WSEXP) { + SEXP rcpp_result_gen; + { + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = PROTECT(_diffusr_stoch_col_norm_s_try(WSEXP)); + } + Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); + if (rcpp_isInterrupt_gen) { + UNPROTECT(1); + Rf_onintr(); + } + bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); + if (rcpp_isLongjump_gen) { + Rcpp::internal::resumeJump(rcpp_result_gen); + } + Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); + if (rcpp_isError_gen) { + SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); + UNPROTECT(1); + Rf_error("%s", CHAR(rcpp_msgSEXP_gen)); + } + UNPROTECT(1); + return rcpp_result_gen; +} // laplacian_ MatrixXd laplacian_(const MatrixXd& W); static SEXP _diffusr_laplacian__try(SEXP WSEXP) { @@ -445,6 +479,7 @@ static int _diffusr_RcppExport_validate(const char* sig) { if (signatures.empty()) { signatures.insert("MatrixXd(*heat_diffusion_)(const MatrixXd&,const MatrixXd&,const double)"); signatures.insert("MatrixXd(*stoch_col_norm_)(const MatrixXd&)"); + signatures.insert("SpMat(*stoch_col_norm_s)(const SpMat&)"); signatures.insert("MatrixXd(*laplacian_)(const MatrixXd&)"); signatures.insert("MatrixXd(*laplacian_s)(const SpMat&)"); signatures.insert("VectorXd(*node_degrees_)(const MatrixXd&)"); @@ -463,6 +498,7 @@ static int _diffusr_RcppExport_validate(const char* sig) { RcppExport SEXP _diffusr_RcppExport_registerCCallable() { R_RegisterCCallable("diffusr", "_diffusr_heat_diffusion_", (DL_FUNC)_diffusr_heat_diffusion__try); R_RegisterCCallable("diffusr", "_diffusr_stoch_col_norm_", (DL_FUNC)_diffusr_stoch_col_norm__try); + R_RegisterCCallable("diffusr", "_diffusr_stoch_col_norm_s", (DL_FUNC)_diffusr_stoch_col_norm_s_try); R_RegisterCCallable("diffusr", "_diffusr_laplacian_", (DL_FUNC)_diffusr_laplacian__try); R_RegisterCCallable("diffusr", "_diffusr_laplacian_s", (DL_FUNC)_diffusr_laplacian_s_try); R_RegisterCCallable("diffusr", "_diffusr_node_degrees_", (DL_FUNC)_diffusr_node_degrees__try); @@ -480,6 +516,7 @@ RcppExport SEXP _diffusr_RcppExport_registerCCallable() { static const R_CallMethodDef CallEntries[] = { {"_diffusr_heat_diffusion_", (DL_FUNC) &_diffusr_heat_diffusion_, 3}, {"_diffusr_stoch_col_norm_", (DL_FUNC) &_diffusr_stoch_col_norm_, 1}, + {"_diffusr_stoch_col_norm_s", (DL_FUNC) &_diffusr_stoch_col_norm_s, 1}, {"_diffusr_laplacian_", (DL_FUNC) &_diffusr_laplacian_, 1}, {"_diffusr_laplacian_s", (DL_FUNC) &_diffusr_laplacian_s, 1}, {"_diffusr_node_degrees_", (DL_FUNC) &_diffusr_node_degrees_, 1}, diff --git a/src/mat_util.cpp b/src/mat_util.cpp index 54ac534..d922dc9 100644 --- a/src/mat_util.cpp +++ b/src/mat_util.cpp @@ -22,6 +22,30 @@ #include "../inst/include/diffusr.h" +template +T stoch_col_norm_t(const T& W) { + size_t n = W.rows(); + T res(n, n); + VectorXd colsums = W.transpose() * VectorXd::Ones(n); + //res.reserve((colsums * n).sum()); + const double empt_col_val = 1.0 / n; + const double zero_col = 0.00001; + + for (unsigned int i = 0; i < n; ++i) { + if (colsums[i] <= zero_col) { + for (size_t j = 0; j < n; ++j) { + res.coeffRef(j, i) = empt_col_val; + } + } else { + for (size_t j = 0; j < n; ++j) { + res.coeffRef(j, i) = W.coeff(j, i) / colsums(i); + } + } + } + + return res; +} + //' Column normalize a matrix, so that it is stochastic. //' //' @noRd @@ -29,20 +53,20 @@ //' @return returns the normalized matrix // [[Rcpp::interfaces(r, cpp)]] // [[Rcpp::export]] -MatrixXd stoch_col_norm_(const MatrixXd& W) { - MatrixXd res(W.rows(), W.cols()); - VectorXd colsums = W.colwise().sum(); - const double empt_col_val = 1.0 / W.cols(); - const double zero_col = 0.00001; - #pragma omp parallel for - for (unsigned int i = 0; i < W.cols(); ++i) - { - if (colsums[i] <= zero_col) - res.col(i).fill(empt_col_val); - else - res.col(i) = W.col(i) / colsums(i); - } +MatrixXd stoch_col_norm_(const MatrixXd &W) { + return stoch_col_norm_t(W); +} +//' Column normalize a matrix, so that it is stochastic. +//' +//' @noRd +//' @param W the adjacency matrix to be normalized +//' @return returns the normalized matrix +// [[Rcpp::interfaces(r, cpp)]] +// [[Rcpp::export]] +SpMat stoch_col_norm_s(const SpMat &W) { + SpMat res = stoch_col_norm_t(W); + res.makeCompressed(); return res; } @@ -141,10 +165,11 @@ MatrixXd hub_normalize_t(const temp &W) { #pragma omp parallel for for (unsigned int i = 0; i < n; ++i) { + double mh; for (unsigned int j = 0; j < n; ++j) { if (W.coeff(i, j) != 0) { // Equivlant with: double mh = fc(node_degrees[i] / node_degrees[j]); - double mh = node_degrees[i] / node_degrees[j]; + mh = node_degrees[i] / node_degrees[j]; res.coeffRef(i, j) = min(1.0, mh) / node_degrees[i]; } } diff --git a/src/mrw.cpp b/src/mrw.cpp index 6d8bf4e..c89ec14 100644 --- a/src/mrw.cpp +++ b/src/mrw.cpp @@ -29,7 +29,9 @@ template VectorXd mrwr_o(const MatrixXd& p0, const type &W, const template<> SpMat inverse_matrix(const SpMat &mat, const SpMat &I) { SparseLU luDecomposition(mat); - return luDecomposition.solve(I); + SpMat ret = luDecomposition.solve(I); + ret.makeCompressed(); + return ret; } template<> MatrixXd inverse_matrix(const MatrixXd &mat, const MatrixXd &I) { FullPivLU luDecomposition(mat); @@ -54,6 +56,7 @@ VectorXd mrwr_t(const MatrixXd& p0, temp T = I - (1 - r) * W; // then inverse the matrix T temp T_inv = inverse_matrix(T, I); + I.resize(0, 0); pt = T_inv * r * p0; } else { diff --git a/src/neighbors.cpp b/src/neighbors.cpp index 1849bba..0e6537f 100644 --- a/src/neighbors.cpp +++ b/src/neighbors.cpp @@ -66,10 +66,8 @@ void add_neighbor_to_queue( distance_comparator>& queue, const NumericMatrix& W, const pair& cn) { - for (int i = 0; i < W.cols(); ++i) - { - if (i != cn.first && W(cn.first, i) > 0) - { + for (int i = 0; i < W.cols(); ++i) { + if (i != cn.first && W(cn.first, i) > 0) { queue.push(make_pair(i, W(cn.first, i))); } }