Merge pull request #703 from SebKrantz/development

Development
SebKrantz · Jan 6, 2025 · e1090e3 · e1090e3
2 parents f319f91 + 50f3994
commit e1090e3
Show file tree

Hide file tree

Showing 8 changed files with 20 additions and 10 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: collapse
 Title: Advanced and Fast Data Transformation
-Version: 2.0.18.9000
-Date: 2024-12-12
+Version: 2.0.19
+Date: 2025-01-06
 Authors@R: c(
            person("Sebastian", "Krantz", role = c("aut", "cre"), 
                   email = "[email protected]", 

diff --git a/NEWS.md b/NEWS.md
@@ -1,9 +1,10 @@
-# collapse 2.0.18.9000
+# collapse 2.0.19
 
 * `fmatch(factor(NA), NA)` now gives `1` instead of `NA`. Thanks @NicChr (#675).
 
 * New developer focused vignette on [developing with *collapse*](https://sebkrantz.github.io/collapse/articles/developing_with_collapse.html).
 
+* Fixed minor CRAN issues (#676, #702). 
 
 # collapse 2.0.18
 

diff --git a/R/join.R b/R/join.R
@@ -293,8 +293,10 @@ join <- function(x, y,
                  anti = structure(alloc(1L, fnrow(res)), levels = x_name, class = c("factor", "na.included")))
     attr(mc, "on.cols") <- `names<-`(list(xon, `names<-`(on, NULL)), c(x_name, y_name))
     mc_name <- if(is.character(column)) column else ".join"
-    if(keep.col.order == 1L) res[[mc_name]] <- mc
-    else res <- c(res[ixon], `names<-`(list(mc), mc_name), res[-ixon])
+    if(keep.col.order == 1L) res[[mc_name]] <- mc else {
+      if(keep.col.order == 2L) ixon <- seq_along(ixon)
+      res <- c(res[ixon], `names<-`(list(mc), mc_name), res[-ixon])
+    }
   } else if(!keep.col.order) res <- c(res[ixon], res[-ixon])
 
   # Final steps

diff --git a/src/fbstats.cpp b/src/fbstats.cpp
@@ -480,9 +480,7 @@ SEXP fbstatsCpp(const NumericVector& x, bool ext = false, int ng = 0, const Inte
         result(_,2) = replaceC12(as<NumericMatrix>(fbstatstemp(within, ext, ng, g, w, false, stable_algo)), gnpids, true);
         if(setn) {
           Rf_dimgets(result, Dimension(ng, d, 3));
-          Rf_dimnamesgets(result, List::create(gn, (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") :
-                                                    CharacterVector::create("N/T","Mean","SD","Min","Max"),
-                                                    CharacterVector::create("Overall","Between","Within")));
+          Rf_dimnamesgets(result, List::create(gn, get_stats_names(d, true), CharacterVector::create("Overall","Between","Within")));
           Rf_classgets(result, CharacterVector::create("qsu","array","table"));
         }
         return(result);

diff --git a/tests/testthat/test-join.R b/tests/testthat/test-join.R
@@ -22,6 +22,11 @@ for (sort in c(FALSE, TRUE)) {
   expect_identical(join(df1, df2, how = "full", sort = sort), merge(df1, df2, all = TRUE))
 }
 
+expect_identical(names(join(df1, df2, on = "id2", how = "full", keep.col.order = FALSE, column = TRUE))[1:2], c("id2", ".join"))
+expect_identical(names(join(df1, df2, on = "id2", how = "full", keep.col.order = FALSE, column = TRUE, multiple = TRUE))[1:2], c("id2", ".join"))
+expect_identical(names(join(df1, df2, on = "id2", how = "right", keep.col.order = FALSE, column = TRUE))[1:2], c("id2", ".join"))
+expect_identical(names(join(df1, df2, on = "id2", how = "right", keep.col.order = FALSE, column = TRUE, multiple = TRUE))[1:2], c("id2", ".join"))
+
 # Different types of joins
 # https://github.com/SebKrantz/collapse/issues/503
 x1 = data.frame(

diff --git a/tests/testthat/test-miscellaneous-issues.R b/tests/testthat/test-miscellaneous-issues.R
@@ -469,6 +469,10 @@ test_that("Misc bugs", {
   expect_visible(qF(c(4L, 1L, NA), sort = FALSE))
   expect_equal(fmatch(factor(NA, exclude = NULL), NA), 1L) # #675
   expect_equal(fmatch(factor(NA), NA), 1L)
+  expect_visible(qsu(mtcars$mpg, mtcars$cyl, mtcars$vs, mtcars$wt))
+  expect_visible(qsu(mtcars$mpg, mtcars$cyl, mtcars$vs, mtcars$wt, higher = TRUE))
+  expect_visible(qsu(mtcars$mpg, mtcars$cyl, mtcars$vs, mtcars$wt, array = FALSE))
+  expect_visible(qsu(mtcars$mpg, mtcars$cyl, mtcars$vs, mtcars$wt, higher = TRUE, array = FALSE))
 })
 
 

diff --git a/vignettes/developing_with_collapse.Rmd b/vignettes/developing_with_collapse.Rmd
@@ -464,7 +464,7 @@ microbenchmark::microbenchmark(pwnobs_list(mtcNA), pwnobs_list_opt(mtcNA))
 #  pwnobs_list_opt(mtcNA)  27.429  31.1600  33.38507  32.964  35.137  45.387   100
 ```
 
-Evidently, the optimized function is 6x faster on this (small) dataset and we have changed nothing to the loops doing the computation. With larger data the difference is less stark, but you never know what's going on in methods you have not written and how they scale. My advice is: try to avoid them, use simple objects and take full control over your code. This also makes your code more robust and you can create class-agnostic code.
+Evidently, the optimized function is 6x faster on this (small) dataset and we have changed nothing to the loops doing the computation. With larger data the difference is less stark, but you never know what's going on in methods you have not written and how they scale. My advice is: try to avoid them, use simple objects and take full control over your code. This also makes your code more robust and you can create class-agnostic code. If the latter is your intent the [vignette on *collapse*'s object handling](https://sebkrantz.github.io/collapse/articles/collapse_object_handling.html) will also be helpful. 
 
 If you only use *collapse* functions this discussion is void - all *collapse* functions designed for data frames, including `join()`, `pivot()`, `fsubset()`, etc., internally handle your data as a list and are equally efficient on data frames and lists. However, if you want to use base R semantics (`[`, etc.) alongside *collapse* and other functions, it makes sense to unclass incoming data frame-like objects and reclass them at the end. 
 

diff --git a/vignettes/developing_with_collapse.Rmd.orig b/vignettes/developing_with_collapse.Rmd.orig
@@ -323,7 +323,7 @@ identical(pwnobs_list(mtcNA), pwnobs_list_opt(mtcNA))
 microbenchmark::microbenchmark(pwnobs_list(mtcNA), pwnobs_list_opt(mtcNA))
 ```
 
-Evidently, the optimized function is 6x faster on this (small) dataset and we have changed nothing to the loops doing the computation. With larger data the difference is less stark, but you never know what's going on in methods you have not written and how they scale. My advice is: try to avoid them, use simple objects and take full control over your code. This also makes your code more robust and you can create class-agnostic code.
+Evidently, the optimized function is 6x faster on this (small) dataset and we have changed nothing to the loops doing the computation. With larger data the difference is less stark, but you never know what's going on in methods you have not written and how they scale. My advice is: try to avoid them, use simple objects and take full control over your code. This also makes your code more robust and you can create class-agnostic code. If the latter is your intent the [vignette on *collapse*'s object handling](https://sebkrantz.github.io/collapse/articles/collapse_object_handling.html) will also be helpful.
 
 If you only use *collapse* functions this discussion is void - all *collapse* functions designed for data frames, including `join()`, `pivot()`, `fsubset()`, etc., internally handle your data as a list and are equally efficient on data frames and lists. However, if you want to use base R semantics (`[`, etc.) alongside *collapse* and other functions, it makes sense to unclass incoming data frame-like objects and reclass them at the end.