Skip to content

Commit

Permalink
Performance improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
randef1ned committed Jan 17, 2024
1 parent 3c913eb commit 1c59be9
Show file tree
Hide file tree
Showing 12 changed files with 197 additions and 37 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: diffusr
Type: Package
Title: Network Diffusion Algorithms
Version: 0.2.1
Version: 0.2.2
Date: 2018-04-20
Authors@R: person("Simon", "Dirmeier",
email = "[email protected]",
Expand All @@ -27,7 +27,9 @@ Imports:
methods,
checkmate,
matrixStats,
sparseMatrixStats
sparseMatrixStats,
memuse,
pryr
Suggests:
knitr,
rmarkdown,
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,22 @@ importFrom(checkmate,assert_int)
importFrom(checkmate,assert_integer)
importFrom(checkmate,assert_logical)
importFrom(checkmate,assert_number)
importFrom(checkmate,assert_numeric)
importFrom(checkmate,assert_vector)
importFrom(checkmate,check_matrix)
importFrom(checkmate,check_numeric)
importFrom(checkmate,test_atomic_vector)
importFrom(checkmate,test_logical)
importFrom(checkmate,test_matrix)
importFrom(checkmate,test_numeric)
importFrom(igraph,components)
importFrom(igraph,graph_from_adjacency_matrix)
importFrom(matrixStats,colSums2)
importFrom(memuse,Sys.meminfo)
importFrom(memuse,Sys.swapinfo)
importFrom(memuse,howbig)
importFrom(methods,as)
importFrom(methods,is)
importFrom(pryr,object_size)
importFrom(sparseMatrixStats,colAnyNAs)
useDynLib(diffusr)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# diffusr v0.2.2

* Bug fix
* Performance improvement for sparse matrix

# diffusr v0.2.1

* Allow sparse matrix as input
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ stoch_col_norm_ <- function(W) {
.Call('_diffusr_stoch_col_norm_', PACKAGE = 'diffusr', W)
}

stoch_col_norm_s <- function(W) {
.Call('_diffusr_stoch_col_norm_s', PACKAGE = 'diffusr', W)
}

laplacian_ <- function(W) {
.Call('_diffusr_laplacian_', PACKAGE = 'diffusr', W)
}
Expand Down
71 changes: 61 additions & 10 deletions R/mat_util.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,28 @@
#' @param obj \code{\link[base]{matrix}} (or
#' \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}},
#' \code{\link[base]{vector}}) that is stochstically normalized
#' @param no_r Do not use R for normalization
#' @param ... additional params
#' @return returns the normalized matrix/vector)
#'
#' @useDynLib diffusr
#'
#' @importFrom checkmate assert check_matrix test_numeric test_atomic_vector
#' test_logical
#' @importFrom memuse Sys.meminfo Sys.swapinfo howbig
#' @importFrom pryr object_size
#' @importFrom Rcpp sourceCpp
#'
#' @examples
#' W <- matrix(abs(rnorm(10000)), 100, 100)
#' stoch.W <- normalize.stochastic(W)
normalize.stochastic <- function(obj, ...) {
normalize.stochastic <- function(obj, no_r = NULL, ...) {
is_matrix <- FALSE
is_sparse <- FALSE
if (!test_logical(no_r, len = 1, any.missing = FALSE, all.missing = FALSE,
null.ok = FALSE)) {
no_r <- FALSE
}
if (test_numeric(obj, lower = 0, finite = TRUE, any.missing = FALSE,
all.missing = FALSE, null.ok = FALSE) &&
test_atomic_vector(obj)) {
Expand All @@ -55,16 +66,54 @@ normalize.stochastic <- function(obj, ...) {
is_matrix <- TRUE
}
if (is_matrix) {
sums <- colSums3(obj, is_sparse)
if (!all(.equals.double(sums, 1, .001))) {
message("normalizing column vectors!")
empt_col_val <- 1.0 / ncol(obj)
if (no_r) {
if (is_sparse) {
obj <- as(stoch_col_norm_s(obj), "dgCMatrix")
} else {
obj <- stoch_col_norm_(obj)
}
} else {
# check memory usage;
# if there is a memory shortage, then call C function directly
n <- as.numeric(ncol(obj))
memory_usage <- Sys.meminfo()
swap_usage <- Sys.swapinfo()
free_ram <- memory_usage$freeram@size
free_ram <- free_ram * switch(substring(memory_usage$freeram@unit, 1, 1),
"B" = 1 / 1048576, "K" = 1 / 1024, "M" = 1,
"G" = 1024, "T" = 1048576,
.default = 1073741824)
swap_ram <- swap_usage$freeswap@size
swap_ram <- swap_ram * switch(substring(swap_usage$freeswap@unit, 1, 1),
"B" = 1 / 1048576, "K" = 1 / 1024, "M" = 1,
"G" = 1024, "T" = 1048576,
.default = 1073741824)
free_ram <- free_ram + swap_ram
object_ram_p <- howbig(n, n, unit = "MiB")@size # size in practice
object_ram_t <- as.numeric(object_size(obj)) / 1e6 # size in theory (MiB)

# if memory is bigger than the temporary variables, then use R
if ((free_ram > object_ram_t * 4)) {
sums <- colSums3(obj, is_sparse)
if (!all(.equals.double(sums, 1, .001))) {
message("normalizing column vectors!")
empt_col_val <- 1.0 / n

obj <- obj / sums[col(obj)]
# check if need wipe zeros
zeros <- which(sums < empt_col_val)
if (length(zeros)) {
obj[, zeros] <- 0.00001
obj <- obj / sums[col(obj)]
# check if need wipe zeros
zeros <- which(sums < 0.00001)
if (length(zeros)) {
obj[, zeros] <- empt_col_val
}
}
} else if (free_ram < object_ram_p) {
stop("You don't have sufficient memory to normalize. Required: ",
round(object_ram_p / 1024, digits = 3), " GiB, but ",
round(free_ram / 1024, digits = 3), " available.")
} else {
warning("You have just enough memory to normalize; consider ",
"increasing your physical memory capacity in the future!")
obj <- stoch_col_norm_s(obj)
}
}
} else {
Expand All @@ -83,6 +132,8 @@ normalize.stochastic <- function(obj, ...) {
#' @param ... additional params
#' @return returns the Laplacian
#'
#' @useDynLib diffusr
#'
#' @importFrom checkmate assert check_matrix
#' @importFrom Rcpp sourceCpp
#'
Expand Down
15 changes: 10 additions & 5 deletions R/mrw.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,9 @@
#'
#' @useDynLib diffusr
#'
#' @importFrom checkmate assert_number assert_int assert_logical test_numeric
#' assert test_matrix check_numeric
#' @importFrom checkmate assert_number assert_int assert_logical assert_numeric
#' assert test_matrix check_numeric test_atomic_vector
#' @importFrom methods as
#' @importFrom Rcpp sourceCpp
#'
#' @examples
Expand Down Expand Up @@ -112,7 +113,7 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4,
n_elements <- nrow(graph)
if (is.dgCMatrix(graph)) {
assert_dgCMatrix(graph)
sparse <- TRUE
sparse <- allow.ergodic <- TRUE
} else {
assert(
test_matrix(graph, mode = "numeric", min.rows = 3, nrows = n_elements,
Expand All @@ -125,8 +126,9 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4,
}

# convert p0 if p0 is vector
if (test_numeric(p0, lower = 0, len = n_elements, finite = TRUE,
any.missing = FALSE, all.missing = FALSE, null.ok = FALSE)) {
if (test_atomic_vector(p0)) {
assert_numeric(p0, lower = 0, len = n_elements, finite = TRUE,
any.missing = FALSE, all.missing = FALSE, null.ok = FALSE)
p0 <- as.matrix(p0)
} else {
assert(
Expand All @@ -150,6 +152,9 @@ random.walk <- function(p0, graph, r = 0.5, niter = 1e4, thresh = 1e-4,

if (sparse) {
# sparse matrix
if (!is.dgCMatrix(stoch.graph)) {
stoch.graph <- as(stoch.graph, "CsparseMatrix")
}
l <- mrwr_s(normalize.stochastic(p0),
stoch.graph, r, thresh, niter, do.analytical)
} else {
Expand Down
21 changes: 21 additions & 0 deletions inst/include/diffusr_RcppExports.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,27 @@ namespace diffusr {
return Rcpp::as<MatrixXd >(rcpp_result_gen);
}

inline SpMat stoch_col_norm_s(const SpMat& W) {
typedef SEXP(*Ptr_stoch_col_norm_s)(SEXP);
static Ptr_stoch_col_norm_s p_stoch_col_norm_s = NULL;
if (p_stoch_col_norm_s == NULL) {
validateSignature("SpMat(*stoch_col_norm_s)(const SpMat&)");
p_stoch_col_norm_s = (Ptr_stoch_col_norm_s)R_GetCCallable("diffusr", "_diffusr_stoch_col_norm_s");
}
RObject rcpp_result_gen;
{
RNGScope RCPP_rngScope_gen;
rcpp_result_gen = p_stoch_col_norm_s(Shield<SEXP>(Rcpp::wrap(W)));
}
if (rcpp_result_gen.inherits("interrupted-error"))
throw Rcpp::internal::InterruptedException();
if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen))
throw Rcpp::LongjumpException(rcpp_result_gen);
if (rcpp_result_gen.inherits("try-error"))
throw Rcpp::exception(Rcpp::as<std::string>(rcpp_result_gen).c_str());
return Rcpp::as<SpMat >(rcpp_result_gen);
}

inline MatrixXd laplacian_(const MatrixXd& W) {
typedef SEXP(*Ptr_laplacian_)(SEXP);
static Ptr_laplacian_ p_laplacian_ = NULL;
Expand Down
4 changes: 3 additions & 1 deletion man/normalize.stochastic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,40 @@ RcppExport SEXP _diffusr_stoch_col_norm_(SEXP WSEXP) {
UNPROTECT(1);
return rcpp_result_gen;
}
// stoch_col_norm_s
SpMat stoch_col_norm_s(const SpMat& W);
static SEXP _diffusr_stoch_col_norm_s_try(SEXP WSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::traits::input_parameter< const SpMat& >::type W(WSEXP);
rcpp_result_gen = Rcpp::wrap(stoch_col_norm_s(W));
return rcpp_result_gen;
END_RCPP_RETURN_ERROR
}
RcppExport SEXP _diffusr_stoch_col_norm_s(SEXP WSEXP) {
SEXP rcpp_result_gen;
{
Rcpp::RNGScope rcpp_rngScope_gen;
rcpp_result_gen = PROTECT(_diffusr_stoch_col_norm_s_try(WSEXP));
}
Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error");
if (rcpp_isInterrupt_gen) {
UNPROTECT(1);
Rf_onintr();
}
bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen);
if (rcpp_isLongjump_gen) {
Rcpp::internal::resumeJump(rcpp_result_gen);
}
Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error");
if (rcpp_isError_gen) {
SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen);
UNPROTECT(1);
Rf_error("%s", CHAR(rcpp_msgSEXP_gen));
}
UNPROTECT(1);
return rcpp_result_gen;
}
// laplacian_
MatrixXd laplacian_(const MatrixXd& W);
static SEXP _diffusr_laplacian__try(SEXP WSEXP) {
Expand Down Expand Up @@ -445,6 +479,7 @@ static int _diffusr_RcppExport_validate(const char* sig) {
if (signatures.empty()) {
signatures.insert("MatrixXd(*heat_diffusion_)(const MatrixXd&,const MatrixXd&,const double)");
signatures.insert("MatrixXd(*stoch_col_norm_)(const MatrixXd&)");
signatures.insert("SpMat(*stoch_col_norm_s)(const SpMat&)");
signatures.insert("MatrixXd(*laplacian_)(const MatrixXd&)");
signatures.insert("MatrixXd(*laplacian_s)(const SpMat&)");
signatures.insert("VectorXd(*node_degrees_)(const MatrixXd&)");
Expand All @@ -463,6 +498,7 @@ static int _diffusr_RcppExport_validate(const char* sig) {
RcppExport SEXP _diffusr_RcppExport_registerCCallable() {
R_RegisterCCallable("diffusr", "_diffusr_heat_diffusion_", (DL_FUNC)_diffusr_heat_diffusion__try);
R_RegisterCCallable("diffusr", "_diffusr_stoch_col_norm_", (DL_FUNC)_diffusr_stoch_col_norm__try);
R_RegisterCCallable("diffusr", "_diffusr_stoch_col_norm_s", (DL_FUNC)_diffusr_stoch_col_norm_s_try);
R_RegisterCCallable("diffusr", "_diffusr_laplacian_", (DL_FUNC)_diffusr_laplacian__try);
R_RegisterCCallable("diffusr", "_diffusr_laplacian_s", (DL_FUNC)_diffusr_laplacian_s_try);
R_RegisterCCallable("diffusr", "_diffusr_node_degrees_", (DL_FUNC)_diffusr_node_degrees__try);
Expand All @@ -480,6 +516,7 @@ RcppExport SEXP _diffusr_RcppExport_registerCCallable() {
static const R_CallMethodDef CallEntries[] = {
{"_diffusr_heat_diffusion_", (DL_FUNC) &_diffusr_heat_diffusion_, 3},
{"_diffusr_stoch_col_norm_", (DL_FUNC) &_diffusr_stoch_col_norm_, 1},
{"_diffusr_stoch_col_norm_s", (DL_FUNC) &_diffusr_stoch_col_norm_s, 1},
{"_diffusr_laplacian_", (DL_FUNC) &_diffusr_laplacian_, 1},
{"_diffusr_laplacian_s", (DL_FUNC) &_diffusr_laplacian_s, 1},
{"_diffusr_node_degrees_", (DL_FUNC) &_diffusr_node_degrees_, 1},
Expand Down
53 changes: 39 additions & 14 deletions src/mat_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,51 @@

#include "../inst/include/diffusr.h"

template <typename T>
T stoch_col_norm_t(const T& W) {
size_t n = W.rows();
T res(n, n);
VectorXd colsums = W.transpose() * VectorXd::Ones(n);
//res.reserve((colsums * n).sum());
const double empt_col_val = 1.0 / n;
const double zero_col = 0.00001;

for (unsigned int i = 0; i < n; ++i) {
if (colsums[i] <= zero_col) {
for (size_t j = 0; j < n; ++j) {
res.coeffRef(j, i) = empt_col_val;
}
} else {
for (size_t j = 0; j < n; ++j) {
res.coeffRef(j, i) = W.coeff(j, i) / colsums(i);
}
}
}

return res;
}

//' Column normalize a matrix, so that it is stochastic.
//'
//' @noRd
//' @param W the adjacency matrix to be normalized
//' @return returns the normalized matrix
// [[Rcpp::interfaces(r, cpp)]]
// [[Rcpp::export]]
MatrixXd stoch_col_norm_(const MatrixXd& W) {
MatrixXd res(W.rows(), W.cols());
VectorXd colsums = W.colwise().sum();
const double empt_col_val = 1.0 / W.cols();
const double zero_col = 0.00001;
#pragma omp parallel for
for (unsigned int i = 0; i < W.cols(); ++i)
{
if (colsums[i] <= zero_col)
res.col(i).fill(empt_col_val);
else
res.col(i) = W.col(i) / colsums(i);
}
MatrixXd stoch_col_norm_(const MatrixXd &W) {
return stoch_col_norm_t(W);
}

//' Column normalize a matrix, so that it is stochastic.
//'
//' @noRd
//' @param W the adjacency matrix to be normalized
//' @return returns the normalized matrix
// [[Rcpp::interfaces(r, cpp)]]
// [[Rcpp::export]]
SpMat stoch_col_norm_s(const SpMat &W) {
SpMat res = stoch_col_norm_t(W);
res.makeCompressed();
return res;
}

Expand Down Expand Up @@ -141,10 +165,11 @@ MatrixXd hub_normalize_t(const temp &W) {

#pragma omp parallel for
for (unsigned int i = 0; i < n; ++i) {
double mh;
for (unsigned int j = 0; j < n; ++j) {
if (W.coeff(i, j) != 0) {
// Equivlant with: double mh = fc(node_degrees[i] / node_degrees[j]);
double mh = node_degrees[i] / node_degrees[j];
mh = node_degrees[i] / node_degrees[j];
res.coeffRef(i, j) = min(1.0, mh) / node_degrees[i];
}
}
Expand Down
Loading

0 comments on commit 1c59be9

Please sign in to comment.