Performance improvement

randef1ned · Jan 17, 2024 · 1c59be9 · 1c59be9
1 parent 3c913eb
commit 1c59be9
Show file tree

Hide file tree

Showing 12 changed files with 197 additions and 37 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: diffusr
 Type: Package
 Title: Network Diffusion Algorithms
-Version: 0.2.1
+Version: 0.2.2
 Date: 2018-04-20
 Authors@R: person("Simon", "Dirmeier",
                   email = "[email protected]",
@@ -27,7 +27,9 @@ Imports:
     methods,
     checkmate,
     matrixStats,
-    sparseMatrixStats
+    sparseMatrixStats,
+    memuse,
+    pryr
 Suggests:
     knitr,
     rmarkdown,

diff --git a/NAMESPACE b/NAMESPACE
@@ -14,15 +14,22 @@ importFrom(checkmate,assert_int)
 importFrom(checkmate,assert_integer)
 importFrom(checkmate,assert_logical)
 importFrom(checkmate,assert_number)
+importFrom(checkmate,assert_numeric)
 importFrom(checkmate,assert_vector)
 importFrom(checkmate,check_matrix)
 importFrom(checkmate,check_numeric)
 importFrom(checkmate,test_atomic_vector)
+importFrom(checkmate,test_logical)
 importFrom(checkmate,test_matrix)
 importFrom(checkmate,test_numeric)
 importFrom(igraph,components)
 importFrom(igraph,graph_from_adjacency_matrix)
 importFrom(matrixStats,colSums2)
+importFrom(memuse,Sys.meminfo)
+importFrom(memuse,Sys.swapinfo)
+importFrom(memuse,howbig)
+importFrom(methods,as)
 importFrom(methods,is)
+importFrom(pryr,object_size)
 importFrom(sparseMatrixStats,colAnyNAs)
 useDynLib(diffusr)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# diffusr v0.2.2
+
+* Bug fix
+* Performance improvement for sparse matrix
+
 # diffusr v0.2.1
 
 * Allow sparse matrix as input

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -9,6 +9,10 @@ stoch_col_norm_ <- function(W) {
     .Call('_diffusr_stoch_col_norm_', PACKAGE = 'diffusr', W)
 }
 
+stoch_col_norm_s <- function(W) {
+    .Call('_diffusr_stoch_col_norm_s', PACKAGE = 'diffusr', W)
+}
+
 laplacian_ <- function(W) {
     .Call('_diffusr_laplacian_', PACKAGE = 'diffusr', W)
 }

diff --git a/R/mat_util.R b/R/mat_util.R
@@ -25,17 +25,28 @@
 #' @param obj  \code{\link[base]{matrix}} (or
 #'   \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}},
 #'   \code{\link[base]{vector}}) that is stochstically normalized
+#' @param no_r Do not use R for normalization
 #' @param ...  additional params
 #' @return  returns the normalized matrix/vector)
 #'
+#' @useDynLib diffusr
+#'
 #' @importFrom checkmate assert check_matrix test_numeric test_atomic_vector
+#'                       test_logical
+#' @importFrom memuse Sys.meminfo Sys.swapinfo howbig
+#' @importFrom pryr object_size
+#' @importFrom Rcpp sourceCpp
 #'
 #' @examples
 #' W <- matrix(abs(rnorm(10000)), 100, 100)
 #' stoch.W <- normalize.stochastic(W)
-normalize.stochastic <- function(obj, ...) {
+normalize.stochastic <- function(obj, no_r = NULL, ...) {
   is_matrix <- FALSE
   is_sparse <- FALSE
+  if (!test_logical(no_r, len = 1, any.missing = FALSE, all.missing = FALSE,
+                    null.ok = FALSE)) {
+    no_r <- FALSE
+  }
   if (test_numeric(obj, lower = 0, finite = TRUE, any.missing = FALSE,
                    all.missing = FALSE, null.ok = FALSE) &&
       test_atomic_vector(obj)) {
@@ -55,16 +66,54 @@ normalize.stochastic <- function(obj, ...) {
     is_matrix <- TRUE
   }
   if (is_matrix) {
-    sums <- colSums3(obj, is_sparse)
-    if (!all(.equals.double(sums, 1, .001))) {
-      message("normalizing column vectors!")
-      empt_col_val <- 1.0 / ncol(obj)
+    if (no_r) {
+      if (is_sparse) {
+        obj <- as(stoch_col_norm_s(obj), "dgCMatrix")
+      } else {
+        obj <- stoch_col_norm_(obj)
+      }
+    } else {
+      # check memory usage;
+      # if there is a memory shortage, then call C function directly
+      n <- as.numeric(ncol(obj))
+      memory_usage <- Sys.meminfo()
+      swap_usage <- Sys.swapinfo()
+      free_ram <- memory_usage$freeram@size
+      free_ram <- free_ram * switch(substring(memory_usage$freeram@unit, 1, 1),
+                                    "B" = 1 / 1048576, "K" = 1 / 1024, "M" = 1,
+                                    "G" = 1024, "T" = 1048576,
+                                    .default = 1073741824)
+      swap_ram <- swap_usage$freeswap@size
+      swap_ram <- swap_ram * switch(substring(swap_usage$freeswap@unit, 1, 1),
+                                    "B" = 1 / 1048576, "K" = 1 / 1024, "M" = 1,
+                                    "G" = 1024, "T" = 1048576,
+                                    .default = 1073741824)
+      free_ram <- free_ram + swap_ram
+      object_ram_p <- howbig(n, n, unit = "MiB")@size     # size in practice
+      object_ram_t <- as.numeric(object_size(obj)) / 1e6  # size in theory (MiB)
+
+      # if memory is bigger than the temporary variables, then use R
+      if ((free_ram > object_ram_t * 4)) {
+        sums <- colSums3(obj, is_sparse)
+        if (!all(.equals.double(sums, 1, .001))) {
+          message("normalizing column vectors!")
+          empt_col_val <- 1.0 / n
 
-      obj <- obj / sums[col(obj)]
-      # check if need wipe zeros
-      zeros <- which(sums < empt_col_val)
-      if (length(zeros)) {
-        obj[, zeros] <- 0.00001
+          obj <- obj / sums[col(obj)]
+          # check if need wipe zeros
+          zeros <- which(sums < 0.00001)
+          if (length(zeros)) {
+            obj[, zeros] <- empt_col_val
+          }
+        }
+      } else if (free_ram < object_ram_p) {
+        stop("You don't have sufficient memory to normalize. Required: ",
+             round(object_ram_p / 1024, digits = 3), " GiB, but ",
+             round(free_ram / 1024, digits = 3), " available.")
+      } else {
+        warning("You have just enough memory to normalize; consider ",
+                "increasing your physical memory capacity in the future!")
+        obj <- stoch_col_norm_s(obj)
       }
     }
   } else {
@@ -83,6 +132,8 @@ normalize.stochastic <- function(obj, ...) {
 #' @param ...  additional params
 #' @return  returns the Laplacian
 #'
+#' @useDynLib diffusr
+#'
 #' @importFrom checkmate assert check_matrix
 #' @importFrom Rcpp sourceCpp
 #'

diff --git a/R/mrw.R b/R/mrw.R
@@ -80,8 +80,9 @@
 #'
 #' @useDynLib diffusr
 #'
-#' @importFrom checkmate assert_number assert_int assert_logical test_numeric
-#'                       assert test_matrix check_numeric
+#' @importFrom checkmate assert_number assert_int assert_logical assert_numeric
+#'                       assert test_matrix check_numeric test_atomic_vector
+#' @importFrom methods as
 #' @importFrom Rcpp sourceCpp
 #'
 #' @examples
@@ -112,7 +113,7 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4,
   n_elements <- nrow(graph)
   if (is.dgCMatrix(graph)) {
     assert_dgCMatrix(graph)
-    sparse <- TRUE
+    sparse <- allow.ergodic <- TRUE
   } else {
     assert(
       test_matrix(graph, mode = "numeric", min.rows = 3, nrows = n_elements,
@@ -125,8 +126,9 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4,
   }
 
   # convert p0 if p0 is vector
-  if (test_numeric(p0, lower = 0, len = n_elements, finite = TRUE,
-                   any.missing = FALSE, all.missing = FALSE, null.ok = FALSE)) {
+  if (test_atomic_vector(p0)) {
+    assert_numeric(p0, lower = 0, len = n_elements, finite = TRUE,
+                   any.missing = FALSE, all.missing = FALSE, null.ok = FALSE)
     p0 <- as.matrix(p0)
   } else {
     assert(
@@ -150,6 +152,9 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4,
 
   if (sparse) {
     # sparse matrix
+    if (!is.dgCMatrix(stoch.graph)) {
+      stoch.graph <- as(stoch.graph, "CsparseMatrix")
+    }
     l <- mrwr_s(normalize.stochastic(p0),
                 stoch.graph, r, thresh, niter, do.analytical)
   } else {

diff --git a/inst/include/diffusr_RcppExports.h b/inst/include/diffusr_RcppExports.h
@@ -67,6 +67,27 @@ namespace diffusr {
         return Rcpp::as<MatrixXd >(rcpp_result_gen);
     }
 
+    inline SpMat stoch_col_norm_s(const SpMat& W) {
+        typedef SEXP(*Ptr_stoch_col_norm_s)(SEXP);
+        static Ptr_stoch_col_norm_s p_stoch_col_norm_s = NULL;
+        if (p_stoch_col_norm_s == NULL) {
+            validateSignature("SpMat(*stoch_col_norm_s)(const SpMat&)");
+            p_stoch_col_norm_s = (Ptr_stoch_col_norm_s)R_GetCCallable("diffusr", "_diffusr_stoch_col_norm_s");
+        }
+        RObject rcpp_result_gen;
+        {
+            RNGScope RCPP_rngScope_gen;
+            rcpp_result_gen = p_stoch_col_norm_s(Shield<SEXP>(Rcpp::wrap(W)));
+        }
+        if (rcpp_result_gen.inherits("interrupted-error"))
+            throw Rcpp::internal::InterruptedException();
+        if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen))
+            throw Rcpp::LongjumpException(rcpp_result_gen);
+        if (rcpp_result_gen.inherits("try-error"))
+            throw Rcpp::exception(Rcpp::as<std::string>(rcpp_result_gen).c_str());
+        return Rcpp::as<SpMat >(rcpp_result_gen);
+    }
+
     inline MatrixXd laplacian_(const MatrixXd& W) {
         typedef SEXP(*Ptr_laplacian_)(SEXP);
         static Ptr_laplacian_ p_laplacian_ = NULL;

diff --git a/man/normalize.stochastic.Rd b/man/normalize.stochastic.Rd
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -84,6 +84,40 @@ RcppExport SEXP _diffusr_stoch_col_norm_(SEXP WSEXP) {
     UNPROTECT(1);
     return rcpp_result_gen;
 }
+// stoch_col_norm_s
+SpMat stoch_col_norm_s(const SpMat& W);
+static SEXP _diffusr_stoch_col_norm_s_try(SEXP WSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::traits::input_parameter< const SpMat& >::type W(WSEXP);
+    rcpp_result_gen = Rcpp::wrap(stoch_col_norm_s(W));
+    return rcpp_result_gen;
+END_RCPP_RETURN_ERROR
+}
+RcppExport SEXP _diffusr_stoch_col_norm_s(SEXP WSEXP) {
+    SEXP rcpp_result_gen;
+    {
+        Rcpp::RNGScope rcpp_rngScope_gen;
+        rcpp_result_gen = PROTECT(_diffusr_stoch_col_norm_s_try(WSEXP));
+    }
+    Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error");
+    if (rcpp_isInterrupt_gen) {
+        UNPROTECT(1);
+        Rf_onintr();
+    }
+    bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen);
+    if (rcpp_isLongjump_gen) {
+        Rcpp::internal::resumeJump(rcpp_result_gen);
+    }
+    Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error");
+    if (rcpp_isError_gen) {
+        SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen);
+        UNPROTECT(1);
+        Rf_error("%s", CHAR(rcpp_msgSEXP_gen));
+    }
+    UNPROTECT(1);
+    return rcpp_result_gen;
+}
 // laplacian_
 MatrixXd laplacian_(const MatrixXd& W);
 static SEXP _diffusr_laplacian__try(SEXP WSEXP) {
@@ -445,6 +479,7 @@ static int _diffusr_RcppExport_validate(const char* sig) {
     if (signatures.empty()) {
         signatures.insert("MatrixXd(*heat_diffusion_)(const MatrixXd&,const MatrixXd&,const double)");
         signatures.insert("MatrixXd(*stoch_col_norm_)(const MatrixXd&)");
+        signatures.insert("SpMat(*stoch_col_norm_s)(const SpMat&)");
         signatures.insert("MatrixXd(*laplacian_)(const MatrixXd&)");
         signatures.insert("MatrixXd(*laplacian_s)(const SpMat&)");
         signatures.insert("VectorXd(*node_degrees_)(const MatrixXd&)");
@@ -463,6 +498,7 @@ static int _diffusr_RcppExport_validate(const char* sig) {
 RcppExport SEXP _diffusr_RcppExport_registerCCallable() { 
     R_RegisterCCallable("diffusr", "_diffusr_heat_diffusion_", (DL_FUNC)_diffusr_heat_diffusion__try);
     R_RegisterCCallable("diffusr", "_diffusr_stoch_col_norm_", (DL_FUNC)_diffusr_stoch_col_norm__try);
+    R_RegisterCCallable("diffusr", "_diffusr_stoch_col_norm_s", (DL_FUNC)_diffusr_stoch_col_norm_s_try);
     R_RegisterCCallable("diffusr", "_diffusr_laplacian_", (DL_FUNC)_diffusr_laplacian__try);
     R_RegisterCCallable("diffusr", "_diffusr_laplacian_s", (DL_FUNC)_diffusr_laplacian_s_try);
     R_RegisterCCallable("diffusr", "_diffusr_node_degrees_", (DL_FUNC)_diffusr_node_degrees__try);
@@ -480,6 +516,7 @@ RcppExport SEXP _diffusr_RcppExport_registerCCallable() {
 static const R_CallMethodDef CallEntries[] = {
     {"_diffusr_heat_diffusion_", (DL_FUNC) &_diffusr_heat_diffusion_, 3},
     {"_diffusr_stoch_col_norm_", (DL_FUNC) &_diffusr_stoch_col_norm_, 1},
+    {"_diffusr_stoch_col_norm_s", (DL_FUNC) &_diffusr_stoch_col_norm_s, 1},
     {"_diffusr_laplacian_", (DL_FUNC) &_diffusr_laplacian_, 1},
     {"_diffusr_laplacian_s", (DL_FUNC) &_diffusr_laplacian_s, 1},
     {"_diffusr_node_degrees_", (DL_FUNC) &_diffusr_node_degrees_, 1},

diff --git a/src/mat_util.cpp b/src/mat_util.cpp
@@ -22,27 +22,51 @@
 
 #include "../inst/include/diffusr.h"
 
+template <typename T>
+T stoch_col_norm_t(const T& W) {
+    size_t n = W.rows();
+    T res(n, n);
+    VectorXd colsums      = W.transpose() * VectorXd::Ones(n);
+    //res.reserve((colsums * n).sum());
+    const double    empt_col_val = 1.0 / n;
+    const double    zero_col     = 0.00001;
+
+    for (unsigned int i = 0; i < n; ++i) {
+        if (colsums[i] <= zero_col) {
+            for (size_t j = 0; j < n; ++j) {
+                res.coeffRef(j, i) = empt_col_val;
+            }
+        } else {
+            for (size_t j = 0; j < n; ++j) {
+                res.coeffRef(j, i) = W.coeff(j, i) / colsums(i);
+            }
+        }
+    }
+
+    return res;
+}
+
 //' Column normalize a matrix, so that it is stochastic.
 //'
 //' @noRd
 //' @param W  the adjacency matrix to be normalized
 //' @return  returns the normalized matrix
 // [[Rcpp::interfaces(r, cpp)]]
 // [[Rcpp::export]]
-MatrixXd stoch_col_norm_(const MatrixXd& W) {
-    MatrixXd res(W.rows(), W.cols());
-    VectorXd colsums      = W.colwise().sum();
-    const double    empt_col_val = 1.0 / W.cols();
-    const double    zero_col     = 0.00001;
-    #pragma omp parallel for
-    for (unsigned int i = 0; i < W.cols(); ++i)
-    {
-        if (colsums[i] <= zero_col)
-            res.col(i).fill(empt_col_val);
-        else
-            res.col(i) = W.col(i) / colsums(i);
-    }
+MatrixXd stoch_col_norm_(const MatrixXd &W) {
+    return stoch_col_norm_t(W);
+}
 
+//' Column normalize a matrix, so that it is stochastic.
+//'
+//' @noRd
+//' @param W  the adjacency matrix to be normalized
+//' @return  returns the normalized matrix
+// [[Rcpp::interfaces(r, cpp)]]
+// [[Rcpp::export]]
+SpMat stoch_col_norm_s(const SpMat &W) {
+    SpMat res = stoch_col_norm_t(W);
+    res.makeCompressed();
     return res;
 }
 
@@ -141,10 +165,11 @@ MatrixXd hub_normalize_t(const temp &W) {
 
     #pragma omp parallel for
     for (unsigned int i = 0; i < n; ++i) {
+        double mh;
         for (unsigned int j = 0; j < n; ++j) {
             if (W.coeff(i, j) != 0) {
                 // Equivlant with: double mh = fc(node_degrees[i] / node_degrees[j]);
-                double mh = node_degrees[i] / node_degrees[j];
+                mh = node_degrees[i] / node_degrees[j];
                 res.coeffRef(i, j) = min(1.0, mh) / node_degrees[i];
             }
         }