Merge pull request #52 from koheiw/dev-zapsmall

Dev zapsmall
koheiw · Apr 15, 2024 · 02d7ea0 · 02d7ea0
2 parents 2582ec6 + 3a68aba
commit 02d7ea0
Show file tree

Hide file tree

Showing 11 changed files with 75 additions and 49 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# proxyC 0.4.2
+
+## New features and improvements
+
+- Reduce the overhead for dense similarity matrices by improving rounding numbers and conversion to Rcpp vectors.
+
 # proxyC 0.4.1
 
 ## Bug fixes

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -1,8 +1,8 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-cpp_linear <- function(mt1, mt2, method, rank, limit = -1.0, symm = FALSE, drop0 = FALSE, use_nan = FALSE, thread = -1L) {
-    .Call('_proxyC_cpp_linear', PACKAGE = 'proxyC', mt1, mt2, method, rank, limit, symm, drop0, use_nan, thread)
+cpp_linear <- function(mt1, mt2, method, rank, limit = -1.0, symm = FALSE, drop0 = FALSE, use_nan = FALSE, digits = 14L, thread = -1L) {
+    .Call('_proxyC_cpp_linear', PACKAGE = 'proxyC', mt1, mt2, method, rank, limit, symm, drop0, use_nan, digits, thread)
 }
 
 cpp_sd <- function(mt) {
@@ -13,8 +13,8 @@ cpp_nz <- function(mt) {
     .Call('_proxyC_cpp_nz', PACKAGE = 'proxyC', mt)
 }
 
-cpp_pair <- function(mt1, mt2, method, rank, limit = -1.0, weight = 1.0, smooth = 0, symm = FALSE, diag = FALSE, drop0 = FALSE, use_nan = FALSE, thread = -1L) {
-    .Call('_proxyC_cpp_pair', PACKAGE = 'proxyC', mt1, mt2, method, rank, limit, weight, smooth, symm, diag, drop0, use_nan, thread)
+cpp_pair <- function(mt1, mt2, method, rank, limit = -1.0, weight = 1.0, smooth = 0, symm = FALSE, diag = FALSE, drop0 = FALSE, use_nan = FALSE, digits = 14L, thread = -1L) {
+    .Call('_proxyC_cpp_pair', PACKAGE = 'proxyC', mt1, mt2, method, rank, limit, weight, smooth, symm, diag, drop0, use_nan, digits, thread)
 }
 
 cpp_get_max_thread <- function() {

diff --git a/R/proxy.R b/R/proxy.R
@@ -30,11 +30,11 @@
 #'   situations as above. If `NULL`, will also return zero but also generate a
 #'   warning (default).
 #' @param digits determines rounding of small values towards zero. Use primarily
-#'   to correct rounding errors in C++. See \link{zapsmall}.
-#' @details
-#'   ## Available Methods
+#'   to correct floating point errors. Rounding is performed in C++ in a similar
+#'   way as \link{zapsmall}.
+#' @details ## Available Methods
 #'
-#'   Similarity:
+#' Similarity:
 #' \itemize{
 #'   \item `cosine`: cosine similarity
 #'   \item `correlation`: Pearson's correlation
@@ -47,7 +47,7 @@
 #'   \item `faith`: Faith similarity
 #'   \item `simple matching`: the percentage of common elements
 #' }
-#'   Distance:
+#' Distance:
 #' \itemize{
 #'   \item `euclidean`: Euclidean distance
 #'   \item `chisquared`: chi-squared distance
@@ -60,18 +60,18 @@
 #'   \item `minkowski`: Minkowski distance
 #'   \item `hamming`: Hamming distance
 #' }
-#'   See the vignette for how the similarity and distance are computed:
-#'   `vignette("measures", package = "proxyC")`
+#' See the vignette for how the similarity and distance are computed:
+#' `vignette("measures", package = "proxyC")`
 #'
-#'   ## Parallel Computing
+#' ## Parallel Computing
 #'
-#'   It performs parallel computing using Intel oneAPI Threads Building Blocks.
-#'   The number of threads for parallel computing should be specified via
-#'   `options(proxyC.threads)` before calling the functions. If the value is -1,
-#'   all the available threads will be used. Unless the option is used, the
-#'   number of threads will be limited by the environmental variables
-#'   (`OMP_THREAD_LIMIT` or `RCPP_PARALLEL_NUM_THREADS`) to comply with CRAN
-#'   policy and offer backward compatibility.
+#' It performs parallel computing using Intel oneAPI Threads Building Blocks.
+#' The number of threads for parallel computing should be specified via
+#' `options(proxyC.threads)` before calling the functions. If the value is -1,
+#' all the available threads will be used. Unless the option is used, the number
+#' of threads will be limited by the environmental variables (`OMP_THREAD_LIMIT`
+#' or `RCPP_PARALLEL_NUM_THREADS`) to comply with CRAN policy and offer backward
+#' compatibility.
 #'
 #' @import methods Matrix
 #' @seealso zapsmall
@@ -202,6 +202,7 @@ proxy <- function(x, y = NULL, margin = 1,
             symm = symm,
             drop0 = drop0,
             use_nan = use_nan,
+            digits = digits,
             thread = getThreads()
         )
     } else {
@@ -221,12 +222,12 @@ proxy <- function(x, y = NULL, margin = 1,
             diag = diag,
             drop0 = drop0,
             use_nan = use_nan,
+            digits = digits,
             thread = getThreads()
         )
     }
     if (diag)
         result <- as(as(result, "diagonalMatrix"), "ddiMatrix")
-    result@x <- zapsmall(result@x, digits)
     dimnames(result) <- list(colnames(x), colnames(y))
     return(result)
 }

diff --git a/man/images/unnamed-chunk-5-1.png b/man/images/unnamed-chunk-5-1.png
diff --git a/man/images/unnamed-chunk-6-1.png b/man/images/unnamed-chunk-6-1.png
diff --git a/man/images/unnamed-chunk-8-1.png b/man/images/unnamed-chunk-8-1.png
diff --git a/man/simil.Rd b/man/simil.Rd
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -12,8 +12,8 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 #endif
 
 // cpp_linear
-S4 cpp_linear(arma::sp_mat& mt1, arma::sp_mat& mt2, const int method, unsigned int rank, const double limit, bool symm, const bool drop0, const bool use_nan, const int thread);
-RcppExport SEXP _proxyC_cpp_linear(SEXP mt1SEXP, SEXP mt2SEXP, SEXP methodSEXP, SEXP rankSEXP, SEXP limitSEXP, SEXP symmSEXP, SEXP drop0SEXP, SEXP use_nanSEXP, SEXP threadSEXP) {
+S4 cpp_linear(arma::sp_mat& mt1, arma::sp_mat& mt2, const int method, unsigned int rank, const double limit, bool symm, const bool drop0, const bool use_nan, const int digits, const int thread);
+RcppExport SEXP _proxyC_cpp_linear(SEXP mt1SEXP, SEXP mt2SEXP, SEXP methodSEXP, SEXP rankSEXP, SEXP limitSEXP, SEXP symmSEXP, SEXP drop0SEXP, SEXP use_nanSEXP, SEXP digitsSEXP, SEXP threadSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -25,8 +25,9 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< bool >::type symm(symmSEXP);
     Rcpp::traits::input_parameter< const bool >::type drop0(drop0SEXP);
     Rcpp::traits::input_parameter< const bool >::type use_nan(use_nanSEXP);
+    Rcpp::traits::input_parameter< const int >::type digits(digitsSEXP);
     Rcpp::traits::input_parameter< const int >::type thread(threadSEXP);
-    rcpp_result_gen = Rcpp::wrap(cpp_linear(mt1, mt2, method, rank, limit, symm, drop0, use_nan, thread));
+    rcpp_result_gen = Rcpp::wrap(cpp_linear(mt1, mt2, method, rank, limit, symm, drop0, use_nan, digits, thread));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -53,8 +54,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // cpp_pair
-S4 cpp_pair(arma::sp_mat& mt1, arma::sp_mat& mt2, const int method, unsigned int rank, const double limit, const double weight, const double smooth, bool symm, const bool diag, const bool drop0, const bool use_nan, const int thread);
-RcppExport SEXP _proxyC_cpp_pair(SEXP mt1SEXP, SEXP mt2SEXP, SEXP methodSEXP, SEXP rankSEXP, SEXP limitSEXP, SEXP weightSEXP, SEXP smoothSEXP, SEXP symmSEXP, SEXP diagSEXP, SEXP drop0SEXP, SEXP use_nanSEXP, SEXP threadSEXP) {
+S4 cpp_pair(arma::sp_mat& mt1, arma::sp_mat& mt2, const int method, unsigned int rank, const double limit, const double weight, const double smooth, bool symm, const bool diag, const bool drop0, const bool use_nan, const int digits, const int thread);
+RcppExport SEXP _proxyC_cpp_pair(SEXP mt1SEXP, SEXP mt2SEXP, SEXP methodSEXP, SEXP rankSEXP, SEXP limitSEXP, SEXP weightSEXP, SEXP smoothSEXP, SEXP symmSEXP, SEXP diagSEXP, SEXP drop0SEXP, SEXP use_nanSEXP, SEXP digitsSEXP, SEXP threadSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -69,8 +70,9 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const bool >::type diag(diagSEXP);
     Rcpp::traits::input_parameter< const bool >::type drop0(drop0SEXP);
     Rcpp::traits::input_parameter< const bool >::type use_nan(use_nanSEXP);
+    Rcpp::traits::input_parameter< const int >::type digits(digitsSEXP);
     Rcpp::traits::input_parameter< const int >::type thread(threadSEXP);
-    rcpp_result_gen = Rcpp::wrap(cpp_pair(mt1, mt2, method, rank, limit, weight, smooth, symm, diag, drop0, use_nan, thread));
+    rcpp_result_gen = Rcpp::wrap(cpp_pair(mt1, mt2, method, rank, limit, weight, smooth, symm, diag, drop0, use_nan, digits, thread));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -96,10 +98,10 @@ END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
-    {"_proxyC_cpp_linear", (DL_FUNC) &_proxyC_cpp_linear, 9},
+    {"_proxyC_cpp_linear", (DL_FUNC) &_proxyC_cpp_linear, 10},
     {"_proxyC_cpp_sd", (DL_FUNC) &_proxyC_cpp_sd, 1},
     {"_proxyC_cpp_nz", (DL_FUNC) &_proxyC_cpp_nz, 1},
-    {"_proxyC_cpp_pair", (DL_FUNC) &_proxyC_cpp_pair, 12},
+    {"_proxyC_cpp_pair", (DL_FUNC) &_proxyC_cpp_pair, 13},
     {"_proxyC_cpp_get_max_thread", (DL_FUNC) &_proxyC_cpp_get_max_thread, 0},
     {"_proxyC_cpp_tbb_enabled", (DL_FUNC) &_proxyC_cpp_tbb_enabled, 0},
     {NULL, NULL, 0}

diff --git a/src/linear.cpp b/src/linear.cpp
@@ -35,8 +35,8 @@ void proxy_linear(const uword i,
                   const rowvec& square1, const rowvec& center1,
                   const rowvec& square2, const rowvec& center2,
                   const int method,
-                  const unsigned int rank, const double limit,
-                  const bool symm, const bool drop0, const bool use_nan) {
+                  const unsigned int rank, const double limit, const bool symm,
+                  const bool drop0, const bool use_nan, const int digits) {
 
     uword nrow = mt1t.n_rows;
     uword ncol = mt1t.n_cols;
@@ -58,12 +58,14 @@ void proxy_linear(const uword i,
             simils = to_vector(sqrt(trans(mt1t * mt2.col(i)) * -2 + square1 + square2[i]));
             break;
         }
+        simils = round(simils, digits);
         double l = get_limit(simils, rank, limit);
         for (std::size_t k = 0; k < simils.size(); k++) {
+            double s = simils[k];
             if (symm && k > i) continue;
             if (drop0 && simils[k] == 0) continue;
-            if (simils[k] >= l || (use_nan && std::isnan(simils[k])))
-                simil_tri.push_back(std::make_tuple(k, i, simils[k]));
+            if (s >= l || (use_nan && std::isnan(s)))
+                simil_tri.push_back(std::make_tuple(k, i, s));
         }
     //}
 }
@@ -77,6 +79,7 @@ S4 cpp_linear(arma::sp_mat& mt1,
               bool symm = false,
               const bool drop0 = false,
               const bool use_nan = false,
+              const int digits = 14,
               const int thread = -1) {
 
     if (mt1.n_rows != mt2.n_rows)
@@ -120,15 +123,15 @@ S4 cpp_linear(arma::sp_mat& mt1,
             for (int i = r.begin(); i < r.end(); i++) {
                 proxy_linear(i, mt1, mt2, simil_tri,
                              square1, center1, square2, center2,
-                             method, rank, limit, symm, drop0, use_nan);
+                             method, rank, limit, symm, drop0, use_nan, digits);
             }
         });
     });
 # else
     for (std::size_t i = 0; i < I; i++) {
         proxy_linear(i, mt1, mt2, simil_tri,
                      square1, center1, square2, center2,
-                     method, rank, limit, symm, drop0, use_nan);
+                     method, rank, limit, symm, drop0, use_nan, digits);
     }
 # endif
 

diff --git a/src/pair.cpp b/src/pair.cpp
@@ -136,7 +136,7 @@ void proxy_pair(const uword i,
                 const int method,
                 const unsigned int rank, const double limit, const bool symm,
                 const bool diag, const double weight, const double smooth,
-                const bool drop0, const bool use_nan) {
+                const bool drop0, const bool use_nan, const int digits) {
 
     arma::uword nrow = mt1.n_rows;
     arma::uword ncol = mt1.n_cols;
@@ -215,14 +215,16 @@ void proxy_pair(const uword i,
         //Rcout << "simil=" << simil << "\n";
         simils.push_back(simil);
     }
+    simils = round(simils, digits);
     double l = get_limit(simils, rank, limit);
     for (std::size_t k = 0; k < simils.size(); k++) {
-        if (drop0 && simils[k] == 0) continue;
-        if (simils[k] >= l || (use_nan && std::isnan(simils[k]))) {
+        double s = simils[k];
+        if (drop0 && s == 0) continue;
+        if (s >= l || (use_nan && std::isnan(s))) {
             if (diag) {
-                simil_tri.push_back(std::make_tuple(i, i, simils[k]));
+                simil_tri.push_back(std::make_tuple(i, i, s));
             } else {
-                simil_tri.push_back(std::make_tuple(k, i, simils[k]));
+                simil_tri.push_back(std::make_tuple(k, i, s));
             }
         }
     }
@@ -240,6 +242,7 @@ S4 cpp_pair(arma::sp_mat& mt1,
             const bool diag = false,
             const bool drop0 = false,
             const bool use_nan = false,
+            const int digits = 14,
             const int thread = -1) {
 
     if (mt1.n_rows != mt2.n_rows)
@@ -258,14 +261,14 @@ S4 cpp_pair(arma::sp_mat& mt1,
         tbb::parallel_for(tbb::blocked_range<int>(0, I), [&](tbb::blocked_range<int> r) {
             for (int i = r.begin(); i < r.end(); i++) {
                 proxy_pair(i, mt1, mt2, simil_tri, method, rank, limit, symm,
-                           diag, weight, smooth, drop0, use_nan);
+                           diag, weight, smooth, drop0, use_nan, digits);
             }
         });
     });
 #else
     for (std::size_t i = 0; i < I; i++) {
         proxy_pair(i, mt1, mt2, simil_tri, method, rank, limit, symm,
-                   diag, weight, smooth, drop0, use_nan);
+                   diag, weight, smooth, drop0, use_nan, digits);
     }
 # endif
 

diff --git a/src/proxyc.h b/src/proxyc.h
@@ -38,11 +38,12 @@ namespace proxyc{
         IntegerVector i_(l), j_(l);
         NumericVector x_(l);
 
-        for (std::size_t k = 0; k < tri.size(); k++) {
-            Triplet t = tri[k];
+        std::size_t k = 0;
+        for (Triplet t : tri) {
             i_[k] = std::get<0>(t);
             j_[k] = std::get<1>(t);
             x_[k] = std::get<2>(t);
+            k++;
         }
         if (symmetric) {
             S4 simil_("dsTMatrix");
@@ -62,7 +63,8 @@ namespace proxyc{
         }
     }
 
-    inline double get_limit(std::vector<double> simils, const unsigned int rank, double limit) {
+    inline double get_limit(std::vector<double> simils, const unsigned int rank,
+                            double limit) {
 
         if (simils.size() > rank) {
             std::nth_element(simils.begin(), simils.begin() + rank - 1, simils.end(),
@@ -73,6 +75,14 @@ namespace proxyc{
         return limit;
     }
 
+    inline std::vector<double> round(std::vector<double> simils, const int digits) {
+        double shift = std::pow(10, digits);
+        for (auto it = simils.begin() ; it != simils.end(); ++it) {
+            *it = std::round(*it * shift) / shift;
+        }
+        return simils;
+    }
+
     inline std::vector<double> replace_inf(std::vector<double> simils) {
         for (auto it = simils.begin() ; it != simils.end(); ++it) {
             if (std::isinf(*it)) {