From 1bfee02ee7c96a980900d8c74acef9d48e14eabb Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sat, 19 Oct 2024 14:51:18 +0200 Subject: [PATCH 1/8] Fix URL. --- man/collapse-options.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/collapse-options.Rd b/man/collapse-options.Rd index ab50a8cf..2fbef5d2 100644 --- a/man/collapse-options.Rd +++ b/man/collapse-options.Rd @@ -83,7 +83,7 @@ Setting keywords "fast-fun", "fast-stat-fun", "fast-trfm-fun" or "all" with \cod \emph{Note} also that masking does not change documentation links, so you need to look up the f- version of a function to get the right documentation. -A safe way to set options affecting startup behavior is by using a \code{\link{.Rprofile}} file in your user or project directory (see also \href{https://www.statmethods.net/interface/customizing.html}{here}, the user-level file is located at \code{file.path(Sys.getenv("HOME"), ".Rprofile")} and can be edited using \code{file.edit(Sys.getenv("HOME"), ".Rprofile")}), or by using a \href{https://fastverse.github.io/fastverse/articles/fastverse_intro.html#custom-fastverse-configurations-for-projects}{\code{.fastverse}} configuration file in the project directory. +A safe way to set options affecting startup behavior is by using a \code{\link{.Rprofile}} file in your user or project directory (see also \href{https://www.datacamp.com/doc/r/customizing}{here}, the user-level file is located at \code{file.path(Sys.getenv("HOME"), ".Rprofile")} and can be edited using \code{file.edit(Sys.getenv("HOME"), ".Rprofile")}), or by using a \href{https://fastverse.github.io/fastverse/articles/fastverse_intro.html#custom-fastverse-configurations-for-projects}{\code{.fastverse}} configuration file in the project directory. \code{options("collapse_remove")} does in fact remove functions from the namespace and cannot be reversed by \code{set_collapse(remove = NULL)} once the package is loaded. It is only reversed by re-loading \emph{collapse}. } From eeaa49c7a66bab5a6a6ab7d4eb62d64cba0ad74e Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sat, 26 Oct 2024 19:45:00 +0200 Subject: [PATCH 2/8] Update URL. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 13dc53cc..56950f52 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,7 @@ Description: A C/C++ based package for advanced data transformation and 'plm' (panel-series and data frames), and 'xts'/'zoo'. URL: https://sebkrantz.github.io/collapse/, https://github.com/SebKrantz/collapse, - https://twitter.com/collapse_R + https://x.com/collapse_R BugReports: https://github.com/SebKrantz/collapse/issues License: GPL (>= 2) | file LICENSE Encoding: UTF-8 From cea52fabc18b6c2ff8003b6f11ce458fcc50efea Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 27 Oct 2024 21:57:14 +0100 Subject: [PATCH 3/8] Return WeightSum column in qsu() (#650). --- src/fbstats.cpp | 421 +++++++++++++++++++++++++++++++----------------- 1 file changed, 275 insertions(+), 146 deletions(-) diff --git a/src/fbstats.cpp b/src/fbstats.cpp index 5e6b8fe8..41d5d85e 100644 --- a/src/fbstats.cpp +++ b/src/fbstats.cpp @@ -8,17 +8,30 @@ using namespace Rcpp; // return x != x; // } +CharacterVector get_stats_names(int n, bool panel = false) { + String N = panel ? "N/T" : "N"; + switch(n) { + case 5: return CharacterVector::create(N,"Mean","SD","Min","Max"); + case 6: return CharacterVector::create(N,"WeightSum","Mean","SD","Min","Max"); + case 7: return CharacterVector::create(N,"Mean","SD","Min","Max","Skew","Kurt"); + case 8: return CharacterVector::create(N,"WeightSum","Mean","SD","Min","Max","Skew","Kurt"); + default: stop("length of stats names needs to be between 5 and 8"); + } +} + // use constant references on the temp function also ? NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, IntegerVector g = 0, SEXP w = R_NilValue, bool setn = true, bool stable_algo = true, SEXP gn = R_NilValue) { int l = x.size(); + bool weights = !Rf_isNull(w); if(!ext) { if(ng == 0) { // No groups if(l == 1) { // need this so that qsu(1) works properly - NumericVector result = NumericVector::create(1,x[0],NA_REAL,x[0],x[0]); + NumericVector result = weights ? NumericVector::create(1,Rf_asReal(w),x[0],NA_REAL,x[0],x[0]) : + NumericVector::create(1,x[0],NA_REAL,x[0],x[0]); if(setn) { - Rf_namesgets(result, CharacterVector::create("N","Mean","SD","Min","Max")); + Rf_namesgets(result, get_stats_names(5+weights)); Rf_classgets(result, CharacterVector::create("qsu","table")); } return result; @@ -27,7 +40,7 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer // double n = 0, min = R_PosInf, max = R_NegInf; // long double mean = 0, d1 = 0, M2 = 0; double n = 0, min = R_PosInf, max = R_NegInf, mean = 0, d1 = 0, M2 = 0; - if(Rf_isNull(w)) { // No weights + if(!weights) { // No weights while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); if(stable_algo) { @@ -59,6 +72,13 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer mean = (double)sum; } } else mean = M2 = min = max = NA_REAL; + if(std::isnan(M2)) M2 = NA_REAL; + NumericVector result = NumericVector::create(n,mean,M2,min,max); // NumericVector::create(n,(double)mean,(double)M2,min,max); + if(setn) { + Rf_namesgets(result, CharacterVector::create("N","Mean","SD","Min","Max")); + Rf_classgets(result, CharacterVector::create("qsu","table")); + } + return result; } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); @@ -78,28 +98,27 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer } M2 = sqrt(M2/(sumw-1)); } else mean = M2 = min = max = NA_REAL; + if(std::isnan(M2)) M2 = NA_REAL; + NumericVector result = NumericVector::create(n,sumw,mean,M2,min,max); // NumericVector::create(n,(double)mean,(double)M2,min,max); + if(setn) { + Rf_namesgets(result, CharacterVector::create("N","WeightSum","Mean","SD","Min","Max")); + Rf_classgets(result, CharacterVector::create("qsu","table")); + } + return result; } - if(std::isnan(M2)) M2 = NA_REAL; - NumericVector result = NumericVector::create(n,mean,M2,min,max); // NumericVector::create(n,(double)mean,(double)M2,min,max); - if(setn) { - Rf_namesgets(result, CharacterVector::create("N","Mean","SD","Min","Max")); - Rf_classgets(result, CharacterVector::create("qsu","table")); - } - return result; - } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); // long double d1 = 0; double d1 = 0; int k = 0; - NumericMatrix result(ng, 5); // = no_init_matrix initializing is better -> valgrind + NumericMatrix result(ng, 5+weights); // = no_init_matrix initializing is better -> valgrind NumericMatrix::Column n = result( _ , 0); - NumericMatrix::Column mean = result( _ , 1); - NumericMatrix::Column M2 = result( _ , 2); - NumericMatrix::Column min = result( _ , 3); - NumericMatrix::Column max = result( _ , 4); + NumericMatrix::Column mean = result( _ , 1+weights); + NumericMatrix::Column M2 = result( _ , 2+weights); + NumericMatrix::Column min = result( _ , 3+weights); + NumericMatrix::Column max = result( _ , 4+weights); std::fill(M2.begin(), M2.end(), NA_REAL); - if(Rf_isNull(w)) { // No weights + if(!weights) { // No weights if(stable_algo) { for(int i = l; i--; ) { if(std::isnan(x[i])) continue; @@ -143,7 +162,8 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); - NumericVector sumw(ng); // = no_init_vector(ng); // better for valgrind + // NumericVector sumw(ng); // = no_init_vector(ng); // better for valgrind + NumericMatrix::Column sumw = result( _ , 1); for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; k = g[i]-1; @@ -165,7 +185,7 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer for(int i = ng; i--; ) if(!std::isnan(M2[i])) M2[i] = sqrt(M2[i]/(sumw[i]-1)); } if(setn) { - Rf_dimnamesgets(result, List::create(gn, CharacterVector::create("N","Mean","SD","Min","Max"))); + Rf_dimnamesgets(result, List::create(gn, get_stats_names(5+weights))); Rf_classgets(result, CharacterVector::create("qsu","matrix","table")); } return result; @@ -176,7 +196,7 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer // double n = 0, min = R_PosInf, max = R_NegInf; // long double mean = 0, d1 = 0, dn = 0, dn2 = 0, term1 = 0, M2 = 0, M3 = 0, M4 = 0; double n = 0, min = R_PosInf, max = R_NegInf, mean = 0, d1 = 0, dn = 0, dn2 = 0, term1 = 0, M2 = 0, M3 = 0, M4 = 0; - if(Rf_isNull(w)) { // No weights + if(!weights) { // No weights while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); for(int i = j+1; i--; ) { @@ -196,6 +216,12 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer M3 = (sqrt(n)*M3) / sqrt(pow(M2,3)); // Skewness M2 = sqrt(M2/(n-1)); // Standard Deviation } else mean = M2 = M3 = M4 = min = max = NA_REAL; + NumericVector result = NumericVector::create(n,mean,M2,min,max,M3,M4); // NumericVector::create(n,(double)mean,(double)M2,min,max,(double)M3,(double)M4); + if(setn) { + Rf_namesgets(result, CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt")); + Rf_classgets(result, CharacterVector::create("qsu","table")); + } + return result; } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); @@ -225,28 +251,27 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer M3 = (sqrt(sumw)*M3l) / sqrt(pow(M2l,3)); // Skewness M2 = sqrt(M2l/(sumw-1)); // Standard Deviation } else mean = M2 = M3 = M4 = min = max = NA_REAL; + NumericVector result = NumericVector::create(n,sumw,mean,M2,min,max,M3,M4); // NumericVector::create(n,(double)mean,(double)M2,min,max,(double)M3,(double)M4); + if(setn) { + Rf_namesgets(result, CharacterVector::create("N","WeightSum","Mean","SD","Min","Max","Skew","Kurt")); + Rf_classgets(result, CharacterVector::create("qsu","table")); + } + return result; } - NumericVector result = NumericVector::create(n,mean,M2,min,max,M3,M4); // NumericVector::create(n,(double)mean,(double)M2,min,max,(double)M3,(double)M4); - if(setn) { - Rf_namesgets(result, CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt")); - Rf_classgets(result, CharacterVector::create("qsu","table")); - } - return result; - } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); double d1 = 0, dn = 0, dn2 = 0, term1 = 0; int k = 0; - NumericMatrix result(ng, 7); // = no_init_matrix // Initializing better -> valgrind + NumericMatrix result(ng, 7+weights); // = no_init_matrix // Initializing better -> valgrind NumericMatrix::Column n = result( _ , 0); - NumericMatrix::Column mean = result( _ , 1); - NumericMatrix::Column M2 = result( _ , 2); - NumericMatrix::Column min = result( _ , 3); - NumericMatrix::Column max = result( _ , 4); - NumericMatrix::Column M3 = result( _ , 5); - NumericMatrix::Column M4 = result( _ , 6); + NumericMatrix::Column mean = result( _ , 1+weights); + NumericMatrix::Column M2 = result( _ , 2+weights); + NumericMatrix::Column min = result( _ , 3+weights); + NumericMatrix::Column max = result( _ , 4+weights); + NumericMatrix::Column M3 = result( _ , 5+weights); + NumericMatrix::Column M4 = result( _ , 6+weights); std::fill(M2.begin(), M2.end(), NA_REAL); - if(Rf_isNull(w)) { // No weights + if(!weights) { // No weights for(int i = l; i--; ) { if(std::isnan(x[i])) continue; k = g[i]-1; @@ -275,7 +300,8 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); - NumericVector sumw(ng); // = no_init_vector(ng); // better for valgrind + // NumericVector sumw(ng); // = no_init_vector(ng); // better for valgrind + NumericMatrix::Column sumw = result( _ , 1); for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; k = g[i]-1; @@ -309,7 +335,7 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer } } if(setn) { - Rf_dimnamesgets(result, List::create(gn, CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt"))); + Rf_dimnamesgets(result, List::create(gn, get_stats_names(7+weights))); Rf_classgets(result, CharacterVector::create("qsu","matrix","table")); } return result; @@ -325,12 +351,20 @@ NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, Integer // } } -inline NumericVector replaceC1(NumericMatrix x, NumericVector y, bool div = false) { +inline NumericVector replaceC12(NumericMatrix x, NumericVector y, bool div = false) { + int nc = x.ncol(); if(div) { NumericMatrix::Column C1 = x(_, 0); // best ? C1 = C1 / y; + if(nc == 6 || nc == 8) { // WeightSum column + NumericMatrix::Column C2 = x(_, 1); + C2 = C2 / y; + } } else { x(_, 0) = y; // best way ? use NumericMatrix::Column ? + if(nc == 6 || nc == 8) { // WeightSum column + x(_, 1) = y; + } } return x; } @@ -340,7 +374,6 @@ SEXP fbstatsCpp(const NumericVector& x, bool ext = false, int ng = 0, const Inte int npg = 0, const IntegerVector& pg = 0, const SEXP& w = R_NilValue, bool stable_algo = true, bool array = true, bool setn = true, const SEXP& gn = R_NilValue) { - int l = x.size(), d = (ext) ? 7 : 5; if(npg == 0) { // No panel if(ng == 0) { // No groups @@ -349,13 +382,14 @@ SEXP fbstatsCpp(const NumericVector& x, bool ext = false, int ng = 0, const Inte return(fbstatstemp(x, ext, ng, g, w, setn, stable_algo, gn)); } } else { + int l = x.size(); if(pg.size() != l) stop("length(pid) must match nrow(X)"); bool weights = !Rf_isNull(w); + int d = d = ((ext) ? 7 : 5) + weights; NumericVector sum(npg, NA_REAL); NumericVector sumw((weights) ? npg : 1); // no_init_vector(npg) : no_init_vector(1); // better for valgrind double osum = 0; - if(!weights) { IntegerVector n(npg, 1); for(int i = l; i--; ) { @@ -409,10 +443,13 @@ SEXP fbstatsCpp(const NumericVector& x, bool ext = false, int ng = 0, const Inte result(1, _) = (weights) ? fbstatstemp(sum, ext, 0, 0, sumw, false, stable_algo) : fbstatstemp(sum, ext, 0, 0, w, false, stable_algo); result(2, _) = fbstatstemp(within, ext, 0, 0, w, false, stable_algo); result[2] /= result[1]; + if(weights) { + result[4] = result[1]; + result[5] /= result[1]; + } if(setn) { Rf_dimnamesgets(result, List::create(CharacterVector::create("Overall","Between","Within"), - (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N/T","Mean","SD","Min","Max"))); + get_stats_names(d, true))); Rf_classgets(result, CharacterVector::create("qsu","matrix","table")); } return(result); @@ -439,8 +476,8 @@ SEXP fbstatsCpp(const NumericVector& x, bool ext = false, int ng = 0, const Inte if(array) { NumericMatrix result = no_init_matrix(d*ng, 3); result(_,0) = fbstatstemp(x, ext, ng, g, w, false, stable_algo); - result(_,1) = replaceC1(as(fbstatstemp(between, ext, ng, g, w, false, stable_algo)), gnpids); // how to do this ? -> above best approach ? - result(_,2) = replaceC1(as(fbstatstemp(within, ext, ng, g, w, false, stable_algo)), gnpids, true); + result(_,1) = replaceC12(as(fbstatstemp(between, ext, ng, g, w, false, stable_algo)), gnpids); // how to do this ? -> above best approach ? + result(_,2) = replaceC12(as(fbstatstemp(within, ext, ng, g, w, false, stable_algo)), gnpids, true); if(setn) { Rf_dimgets(result, Dimension(ng, d, 3)); Rf_dimnamesgets(result, List::create(gn, (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : @@ -452,8 +489,8 @@ SEXP fbstatsCpp(const NumericVector& x, bool ext = false, int ng = 0, const Inte } else { List result(3); // option array ? result[0] = fbstatstemp(x, ext, ng, g, w, true, stable_algo, gn); - result[1] = replaceC1(as(fbstatstemp(between, ext, ng, g, w, true, stable_algo, gn)), gnpids); // how to do this ? -> above best approach ? - result[2] = replaceC1(as(fbstatstemp(within, ext, ng, g, w, true, stable_algo, gn)), gnpids, true); + result[1] = replaceC12(as(fbstatstemp(between, ext, ng, g, w, true, stable_algo, gn)), gnpids); // how to do this ? -> above best approach ? + result[2] = replaceC12(as(fbstatstemp(within, ext, ng, g, w, true, stable_algo, gn)), gnpids, true); Rf_namesgets(result, CharacterVector::create("Overall","Between","Within")); return(result); } @@ -467,14 +504,14 @@ SEXP fbstatsmCpp(const NumericMatrix& x, bool ext = false, int ng = 0, const Int int npg = 0, const IntegerVector& pg = 0, const SEXP& w = R_NilValue, bool stable_algo = true, bool array = true, const SEXP& gn = R_NilValue) { - int col = x.ncol(), d = (ext) ? 7 : 5; // l = x.nrow(), + bool weights = !Rf_isNull(w); + int col = x.ncol(), d = ((ext) ? 7 : 5) + weights; // l = x.nrow(), if(npg == 0) { // No panel if(ng == 0) { // No groups NumericMatrix out = no_init_matrix(col, d); for(int j = col; j--; ) out(j, _) = fbstatstemp(x(_, j), ext, 0, 0, w, false, stable_algo); - Rf_dimnamesgets(out, List::create(colnames(x), (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N","Mean","SD","Min","Max"))); + Rf_dimnamesgets(out, List::create(colnames(x), get_stats_names(d))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); return out; } else { @@ -483,8 +520,7 @@ SEXP fbstatsmCpp(const NumericMatrix& x, bool ext = false, int ng = 0, const Int NumericMatrix out = no_init_matrix(d*ng, col); for(int j = col; j--; ) out(_, j) = fbstatstemp(x(_, j), ext, ng, g, w, false, stable_algo); Rf_dimgets(out, Dimension(ng, d, col)); - Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N","Mean","SD","Min","Max"), colnames(x))); + Rf_dimnamesgets(out, List::create(gn, get_stats_names(d), colnames(x))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { @@ -501,8 +537,7 @@ SEXP fbstatsmCpp(const NumericMatrix& x, bool ext = false, int ng = 0, const Int for(int j = col; j--; ) out(_, j) = as(fbstatsCpp(x(_, j), ext, 0, 0, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? Rf_dimgets(out, Dimension(3, d, col)); Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), - (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N/T","Mean","SD","Min","Max"), colnames(x))); + get_stats_names(d, true), colnames(x))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { @@ -516,9 +551,8 @@ SEXP fbstatsmCpp(const NumericMatrix& x, bool ext = false, int ng = 0, const Int NumericMatrix out = no_init_matrix(d*3*ng, col); for(int j = col; j--; ) out(_, j) = as(fbstatsCpp(x(_, j), ext, ng, g, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? Rf_dimgets(out, IntegerVector::create(ng, d, 3, col)); - Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N/T","Mean","SD","Min","Max"), - CharacterVector::create("Overall","Between","Within"), colnames(x))); + Rf_dimnamesgets(out, List::create(gn, get_stats_names(d, true), + CharacterVector::create("Overall","Between","Within"), colnames(x))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { @@ -533,39 +567,76 @@ SEXP fbstatsmCpp(const NumericMatrix& x, bool ext = false, int ng = 0, const Int template -NumericVector fnobs5Impl(Vector x, bool ext = false, int ng = 0, IntegerVector g = 0, bool real = false, bool setn = false, SEXP gn = R_NilValue) { +NumericVector fnobs5Impl(Vector x, bool ext = false, int ng = 0, IntegerVector g = 0, SEXP w = R_NilValue, bool real = false, bool setn = false, SEXP gn = R_NilValue) { - int l = x.size(), d = (ext) ? 7 : 5; + bool weights = !Rf_isNull(w); + int l = x.size(), d = ((ext) ? 7 : 5) + weights; if(ng == 0) { int n = 0; - if(real) { - for(int i = 0; i != l; ++i) if(x[i] == x[i]) ++n; // This loop is faster + double wsum = 0.0; + NumericVector out(d, NA_REAL); + if(weights) { + NumericVector wg = w; + if(real) { + for(int i = 0; i != l; ++i) { + if(x[i] == x[i] && wg[i] == wg[i] && wg[i] != 0) { + wsum += wg[i]; ++n; + } + } + } else { + for(int i = 0; i != l; ++i) { + if(x[i] != Vector::get_na() && wg[i] == wg[i] && wg[i] != 0) { + wsum += wg[i]; ++n; + } + } + } + out[0] = (double)n; + out[1] = wsum; } else { - for(int i = 0; i != l; ++i) if(x[i] != Vector::get_na()) ++n; + if(real) { + for(int i = 0; i != l; ++i) if(x[i] == x[i]) ++n; // This loop is faster + } else { + for(int i = 0; i != l; ++i) if(x[i] != Vector::get_na()) ++n; + } + out[0] = (double)n; } - NumericVector out(d, NA_REAL); if(setn) { - Rf_namesgets(out, (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N","Mean","SD","Min","Max")); + Rf_namesgets(out, get_stats_names(d)); Rf_classgets(out, CharacterVector::create("qsu","table")); } - out[0] = (double)n; return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); NumericMatrix out = no_init_matrix(ng, d); - std::fill_n(out.begin(), ng, 0.0); // works ?? -> yes - std::fill(out.begin()+ng, out.end(), NA_REAL); + std::fill_n(out.begin(), ng*(1+weights), 0.0); // works ?? -> yes + std::fill(out.begin()+ng*(1+weights), out.end(), NA_REAL); NumericMatrix::Column n = out(_, 0); - if(real) { - for(int i = 0; i != l; ++i) if(x[i] == x[i]) ++n[g[i]-1]; + if(weights) { + NumericVector wg = w; + NumericMatrix::Column wsum = out(_, 1); + if(real) { + for(int i = 0; i != l; ++i) { + if(x[i] == x[i] && wg[i] == wg[i] && wg[i] != 0) { + wsum[g[i]-1] += wg[i]; ++n[g[i]-1]; + } + } + } else { + for(int i = 0; i != l; ++i) { + if(x[i] != Vector::get_na() && wg[i] == wg[i] && wg[i] != 0) { + wsum[g[i]-1] += wg[i]; ++n[g[i]-1]; + } + } + } } else { - for(int i = 0; i != l; ++i) if(x[i] != Vector::get_na()) ++n[g[i]-1]; + if(real) { + for(int i = 0; i != l; ++i) if(x[i] == x[i]) ++n[g[i]-1]; + } else { + for(int i = 0; i != l; ++i) if(x[i] != Vector::get_na()) ++n[g[i]-1]; + } } if(setn) { - Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N","Mean","SD","Min","Max"))); + Rf_dimnamesgets(out, List::create(gn, get_stats_names(d))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); } return out; @@ -573,9 +644,10 @@ NumericVector fnobs5Impl(Vector x, bool ext = false, int ng = 0, IntegerV } template -NumericMatrix fnobs5pImpl(Vector x, bool ext = false, int ng = 0, IntegerVector g = 0, int npg = 0, IntegerVector pg = 0, bool real = false, bool array = true, SEXP gn = R_NilValue) { +NumericMatrix fnobs5pImpl(Vector x, bool ext = false, int ng = 0, IntegerVector g = 0, int npg = 0, IntegerVector pg = 0, SEXP w = R_NilValue, bool real = false, bool array = true, SEXP gn = R_NilValue) { - int l = x.size(), d = (ext) ? 7 : 5; + bool weights = !Rf_isNull(w); + int l = x.size(), d = ((ext) ? 7 : 5) + weights; if(pg.size() != l) stop("length(pid) must match nrow(X)"); if(ng == 0) { @@ -583,20 +655,46 @@ NumericMatrix fnobs5pImpl(Vector x, bool ext = false, int ng = 0, Integer // bool npgs[npg+1]; // memset(npgs, true, sizeof(bool)*(npg+1)); std::vector npgs(npg+1, true); - if(real) { - for(int i = 0; i != l; ++i) { - if(x[i] == x[i]) ++n; - if(npgs[pg[i]-1]) { - ++npgc; - npgs[pg[i]-1] = false; + double wsum = 0.0; + if(weights) { + NumericVector wg = w; + if(real) { + for(int i = 0; i != l; ++i) { + if(x[i] == x[i] && wg[i] == wg[i] && wg[i] != 0) { + wsum += wg[i]; ++n; + } + if(npgs[pg[i]-1]) { + ++npgc; + npgs[pg[i]-1] = false; + } + } + } else { + for(int i = 0; i != l; ++i) { + if(x[i] != Vector::get_na() && wg[i] == wg[i] && wg[i] != 0) { + wsum += wg[i]; ++n; + } + if(npgs[pg[i]-1]) { + ++npgc; + npgs[pg[i]-1] = false; + } } } } else { - for(int i = 0; i != l; ++i) { - if(x[i] != Vector::get_na()) ++n; - if(npgs[pg[i]-1]) { - ++npgc; - npgs[pg[i]-1] = false; + if(real) { + for(int i = 0; i != l; ++i) { + if(x[i] == x[i]) ++n; + if(npgs[pg[i]-1]) { + ++npgc; + npgs[pg[i]-1] = false; + } + } + } else { + for(int i = 0; i != l; ++i) { + if(x[i] != Vector::get_na()) ++n; + if(npgs[pg[i]-1]) { + ++npgc; + npgs[pg[i]-1] = false; + } } } } @@ -604,19 +702,22 @@ NumericMatrix fnobs5pImpl(Vector x, bool ext = false, int ng = 0, Integer out[0] = (double)n; out[1] = (double)npgc; out[2] = out[0]/out[1]; - std::fill(out.begin()+3, out.end(), NA_REAL); + if(weights) { + out[3] = (double)wsum; + out[4] = (double)npgc; + out[5] = out[3]/out[4]; + } + std::fill(out.begin()+3*(1+weights), out.end(), NA_REAL); if(!array) { - Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), - (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N/T","Mean","SD","Min","Max"))); + Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), get_stats_names(d, true))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); } return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); NumericMatrix out = no_init_matrix(ng*d, 3); - std::fill_n(out.begin(), ng, 0.0); // works ? -> yes - std::fill(out.begin()+ng, out.end(), NA_REAL); + std::fill_n(out.begin(), ng*(1+weights), 0.0); // works ? -> yes + std::fill(out.begin()+ng*(1+weights), out.end(), NA_REAL); NumericMatrix::Column n = out(_, 0); NumericMatrix::Column gnpids = out(_, 1); std::fill_n(gnpids.begin(), ng, 0.0); @@ -624,34 +725,65 @@ NumericMatrix fnobs5pImpl(Vector x, bool ext = false, int ng = 0, Integer // memset(groupids, true, sizeof(bool)*ng*npg); LogicalMatrix groupids = no_init_matrix(ng, npg); std::fill(groupids.begin(), groupids.end(), true); - if(real) { - for(int i = 0; i != l; ++i) { - if(x[i] == x[i]) { - ++n[g[i]-1]; - if(groupids(g[i]-1, pg[i]-1)) { - ++gnpids[g[i]-1]; - groupids(g[i]-1, pg[i]-1) = false; + if(weights) { + NumericVector wg = w; + if(real) { + for(int i = 0; i != l; ++i) { + if(x[i] == x[i] && wg[i] == wg[i] && wg[i] != 0) { + n[g[i]+ng-1] += wg[i]; ++n[g[i]-1]; + if(groupids(g[i]-1, pg[i]-1)) { + ++gnpids[g[i]-1]; + groupids(g[i]-1, pg[i]-1) = false; + } + } + } + } else { + for(int i = 0; i != l; ++i) { + if(x[i] != Vector::get_na() && wg[i] == wg[i] && wg[i] != 0) { + n[g[i]+ng-1] += wg[i]; ++n[g[i]-1]; + if(groupids(g[i]-1, pg[i]-1)) { + ++gnpids[g[i]-1]; + groupids(g[i]-1, pg[i]-1) = false; + } } } } } else { - for(int i = 0; i != l; ++i) { - if(x[i] != Vector::get_na()) { - ++n[g[i]-1]; - if(groupids(g[i]-1, pg[i]-1)) { - ++gnpids[g[i]-1]; - groupids(g[i]-1, pg[i]-1) = false; + if(real) { + for(int i = 0; i != l; ++i) { + if(x[i] == x[i]) { + ++n[g[i]-1]; + if(groupids(g[i]-1, pg[i]-1)) { + ++gnpids[g[i]-1]; + groupids(g[i]-1, pg[i]-1) = false; + } + } + } + } else { + for(int i = 0; i != l; ++i) { + if(x[i] != Vector::get_na()) { + ++n[g[i]-1]; + if(groupids(g[i]-1, pg[i]-1)) { + ++gnpids[g[i]-1]; + groupids(g[i]-1, pg[i]-1) = false; + } } } } } NumericMatrix::Column nt = out(_, 2); - for(int i = 0; i != ng; ++i) nt[i] = n[i] / gnpids[i]; + if(weights) { + for(int i = 0; i != ng; ++i) { + gnpids[ng+i] = gnpids[i]; + nt[i] = n[i] / gnpids[i]; + nt[ng+i] = n[ng+i] / gnpids[i]; + } + } else { + for(int i = 0; i != ng; ++i) nt[i] = n[i] / gnpids[i]; + } if(!array) { Rf_dimgets(out, Dimension(ng, d, 3)); - Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N/T","Mean","SD","Min","Max"), - CharacterVector::create("Overall","Between","Within"))); + Rf_dimnamesgets(out, List::create(gn, get_stats_names(d, true), CharacterVector::create("Overall","Between","Within"))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); } return out; @@ -664,7 +796,8 @@ SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVecto int npg = 0, const IntegerVector& pg = 0, const SEXP& w = R_NilValue, bool stable_algo = true, bool array = true, const SEXP& gn = R_NilValue) { - int col = x.size(), d = (ext) ? 7 : 5; + bool weights = !Rf_isNull(w); + int col = x.size(), d = ((ext) ? 7 : 5) + weights; if(npg == 0) { // No panel if(ng == 0) { // No groups @@ -673,25 +806,24 @@ SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVecto switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; - if(Rf_isObject(column)) out(j, _) = fnobs5Impl(column, ext, 0, 0, true); + if(Rf_isObject(column)) out(j, _) = fnobs5Impl(column, ext, 0, 0, w, true); else out(j, _) = fbstatstemp(column, ext, 0, 0, w, false, stable_algo); break; } case INTSXP: { IntegerVector column = x[j]; - if(Rf_isObject(column)) out(j, _) = fnobs5Impl(column, ext); + if(Rf_isObject(column)) out(j, _) = fnobs5Impl(column, ext, 0, 0, w); else out(j, _) = fbstatstemp(x[j], ext, 0, 0, w, false, stable_algo); break; } - case STRSXP: out(j, _) = fnobs5Impl(x[j], ext); + case STRSXP: out(j, _) = fnobs5Impl(x[j], ext, 0, 0, w); break; - case LGLSXP: out(j, _) = fnobs5Impl(x[j], ext); + case LGLSXP: out(j, _) = fnobs5Impl(x[j], ext, 0, 0, w); break; default: stop("Not supported SEXP type!"); } } - Rf_dimnamesgets(out, List::create(Rf_getAttrib(x, R_NamesSymbol), (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N","Mean","SD","Min","Max"))); + Rf_dimnamesgets(out, List::create(Rf_getAttrib(x, R_NamesSymbol), get_stats_names(d))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); return out; } else { @@ -701,27 +833,26 @@ SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVecto switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; - if(Rf_isObject(column)) out(_, j) = fnobs5Impl(column, ext, ng, g, true); + if(Rf_isObject(column)) out(_, j) = fnobs5Impl(column, ext, ng, g, w, true); else out(_, j) = fbstatstemp(column, ext, ng, g, w, false, stable_algo); break; } case INTSXP: { IntegerVector column = x[j]; - if(Rf_isObject(column)) out(_, j) = fnobs5Impl(column, ext, ng, g); + if(Rf_isObject(column)) out(_, j) = fnobs5Impl(column, ext, ng, g, w); else out(_, j) = fbstatstemp(x[j], ext, ng, g, w, false, stable_algo); break; } - case STRSXP: out(_, j) = fnobs5Impl(x[j], ext, ng, g); + case STRSXP: out(_, j) = fnobs5Impl(x[j], ext, ng, g, w); break; - case LGLSXP: out(_, j) = fnobs5Impl(x[j], ext, ng, g); + case LGLSXP: out(_, j) = fnobs5Impl(x[j], ext, ng, g, w); break; default: stop("Not supported SEXP type!"); } } Rf_dimgets(out, Dimension(ng, d, col)); - Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N","Mean","SD","Min","Max"), Rf_getAttrib(x, R_NamesSymbol))); - Rf_classgets(out, CharacterVector::create("qsu","array","table")); + Rf_dimnamesgets(out, List::create(gn, get_stats_names(d), Rf_getAttrib(x, R_NamesSymbol))); + Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); @@ -729,19 +860,19 @@ SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVecto switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; - if(Rf_isObject(column)) out[j] = fnobs5Impl(column, ext, ng, g, true, true, gn); + if(Rf_isObject(column)) out[j] = fnobs5Impl(column, ext, ng, g, w, true, true, gn); else out[j] = fbstatstemp(column, ext, ng, g, w, true, stable_algo, gn); break; } case INTSXP: { IntegerVector column = x[j]; - if(Rf_isObject(column)) out[j] = fnobs5Impl(column, ext, ng, g, false, true, gn); + if(Rf_isObject(column)) out[j] = fnobs5Impl(column, ext, ng, g, w, false, true, gn); else out[j] = fbstatstemp(x[j], ext, ng, g, w, true, stable_algo, gn); break; } - case STRSXP: out[j] = fnobs5Impl(x[j], ext, ng, g, false, true, gn); + case STRSXP: out[j] = fnobs5Impl(x[j], ext, ng, g, w, false, true, gn); break; - case LGLSXP: out[j] = fnobs5Impl(x[j], ext, ng, g, false, true, gn); + case LGLSXP: out[j] = fnobs5Impl(x[j], ext, ng, g, w, false, true, gn); break; default: stop("Not supported SEXP type!"); } @@ -758,27 +889,26 @@ SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVecto switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; - if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, 0, 0, npg, pg, true); + if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, 0, 0, npg, pg, w, true); else out(_, j) = as(fbstatsCpp(column, ext, 0, 0, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? break; } case INTSXP: { IntegerVector column = x[j]; - if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, 0, 0, npg, pg); + if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, 0, 0, npg, pg, w); else out(_, j) = as(fbstatsCpp(x[j], ext, 0, 0, npg, pg, w, stable_algo, true, false)); break; } - case STRSXP: out(_, j) = fnobs5pImpl(x[j], ext, 0, 0, npg, pg); + case STRSXP: out(_, j) = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, w); break; - case LGLSXP: out(_, j) = fnobs5pImpl(x[j], ext, 0, 0, npg, pg); + case LGLSXP: out(_, j) = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, w); break; default: stop("Not supported SEXP type!"); } } Rf_dimgets(out, Dimension(3, d, col)); Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), - (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N/T","Mean","SD","Min","Max"), Rf_getAttrib(x, R_NamesSymbol))); + get_stats_names(d, true), Rf_getAttrib(x, R_NamesSymbol))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { @@ -787,19 +917,19 @@ SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVecto switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; - if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, 0, 0, npg, pg, true, false, gn); + if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, 0, 0, npg, pg, w, true, false, gn); else out[j] = fbstatsCpp(column, ext, 0, 0, npg, pg, w, stable_algo, false, true, gn); break; } case INTSXP: { IntegerVector column = x[j]; - if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, 0, 0, npg, pg, false, false, gn); + if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, 0, 0, npg, pg, w, false, false, gn); else out[j] = fbstatsCpp(x[j], ext, 0, 0, npg, pg, w, stable_algo, false, true, gn); break; } - case STRSXP: out[j] = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, false, false, gn); + case STRSXP: out[j] = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, w, false, false, gn); break; - case LGLSXP: out[j] = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, false, false, gn); + case LGLSXP: out[j] = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, w, false, false, gn); break; default: stop("Not supported SEXP type!"); } @@ -814,26 +944,25 @@ SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVecto switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; - if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, ng, g, npg, pg, true); + if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, ng, g, npg, pg, w, true); else out(_, j) = as(fbstatsCpp(column, ext, ng, g, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? break; } case INTSXP: { IntegerVector column = x[j]; - if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, ng, g, npg, pg); + if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, ng, g, npg, pg, w); else out(_, j) = as(fbstatsCpp(x[j], ext, ng, g, npg, pg, w, stable_algo, true, false)); break; } - case STRSXP: out(_, j) = fnobs5pImpl(x[j], ext, ng, g, npg, pg); + case STRSXP: out(_, j) = fnobs5pImpl(x[j], ext, ng, g, npg, pg, w); break; - case LGLSXP: out(_, j) = fnobs5pImpl(x[j], ext, ng, g, npg, pg); + case LGLSXP: out(_, j) = fnobs5pImpl(x[j], ext, ng, g, npg, pg, w); break; default: stop("Not supported SEXP type!"); } } Rf_dimgets(out, IntegerVector::create(ng, d, 3, col)); - Rf_dimnamesgets(out, List::create(gn, (ext) ? CharacterVector::create("N/T","Mean","SD","Min","Max","Skew","Kurt") : - CharacterVector::create("N/T","Mean","SD","Min","Max"), + Rf_dimnamesgets(out, List::create(gn, get_stats_names(d, true), CharacterVector::create("Overall","Between","Within"), Rf_getAttrib(x, R_NamesSymbol))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; @@ -843,19 +972,19 @@ SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVecto switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; - if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, ng, g, npg, pg, true, false, gn); + if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, ng, g, npg, pg, w, true, false, gn); else out[j] = fbstatsCpp(column, ext, ng, g, npg, pg, w, stable_algo, false, true, gn); break; } case INTSXP: { IntegerVector column = x[j]; - if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, ng, g, npg, pg, false, false, gn); + if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, ng, g, npg, pg, w, false, false, gn); else out[j] = fbstatsCpp(x[j], ext, ng, g, npg, pg, w, stable_algo, false, true, gn); break; } - case STRSXP: out[j] = fnobs5pImpl(x[j], ext, ng, g, npg, pg, false, false, gn); + case STRSXP: out[j] = fnobs5pImpl(x[j], ext, ng, g, npg, pg, w, false, false, gn); break; - case LGLSXP: out[j] = fnobs5pImpl(x[j], ext, ng, g, npg, pg, false, false, gn); + case LGLSXP: out[j] = fnobs5pImpl(x[j], ext, ng, g, npg, pg, w, false, false, gn); break; default: stop("Not supported SEXP type!"); } From 1be19017636344c2f97946c82e79faa7837ae69a Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 27 Oct 2024 22:10:43 +0100 Subject: [PATCH 4/8] Adjust docs and NEWS for #650. --- NEWS.md | 3 ++- man/qsu.Rd | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index a4d428cd..31dae5a2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,11 +3,12 @@ * In `GRP.default()`, the `"group.starts"` attribute is always returned, even if there is only one group or every observation is its own group. Thanks @JamesThompsonC (#631). * Fixed a bug in `pivot()` if `na.rm = TRUE` and `how = "wider"|"recast"` and there are multiple `value` columns with different missingness patterns. In this case `na_omit(values)` was applied with default settings to the original (long) value columns, implying potential loss of information. The fix applies `na_omit(values, prop = 1)`, i.e., only removes completely missing rows. - * `qDF()/qDT()/qTBL()` now allow a length-2 vector of names to `row.names.col` if `X` is a named atomic vector, e.g., `qDF(fmean(mtcars), c("cars", "mean"))` gives the same as `pivot(fmean(mtcars, drop = FALSE), names = list("car", "mean"))`. * Added a subsection on using internal (ad-hoc) grouping to the *collapse* for *tidyverse* users vignette. +* `qsu()` now adds a `WeightSum` column giving the sum of (non-zero or missing) weights if the `w` argument is used. Thanks @mayer79 for suggesting (#650). For panel data (`pid`) the 'Between' sum of weights is also simply the number of groups, and the 'Within' sum of weights is the 'Overall' sum of weights divided by the number of groups. + # collapse 2.0.16 * Fixes an installation bug on some Linux systems (conflicting types) (#613). diff --git a/man/qsu.Rd b/man/qsu.Rd index 68870ae6..9fb881dd 100644 --- a/man/qsu.Rd +++ b/man/qsu.Rd @@ -58,7 +58,7 @@ qsu(x, \dots) \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{by}{\emph{(p)data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1 + group2} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{pid}{same input as \code{g/by}: Specify a panel-identifier to also compute statistics on between- and within- transformed data. Data frame method also supports one- or two-sided formulas, grouped_df method supports expressions evaluated in the data environment. Transformations are taken independently from grouping with \code{g/by} (grouped statistics are computed on the transformed data if \code{g/by} is also used). However, passing any LHS variables to \code{pid} will overwrite any \code{LHS} variables passed to \code{by}.} - \item{w}{a vector of (non-negative) weights. Adding weights will compute the weighted mean, sd, skewness and kurtosis, and transform the data using weighted individual means if \code{pid} is used. Data frame method supports formula, grouped_df method supports expression.} + \item{w}{a vector of (non-negative) weights. Adding weights will compute the weighted mean, sd, skewness and kurtosis, and transform the data using weighted individual means if \code{pid} is used. A \code{"WeightSum"} column will be added giving the sume of weights (See Details). Data frame method supports formula, grouped_df method supports expression.} \item{cols}{select columns to summarize using column names, indices, a logical vector or a function (e.g. \code{is.numeric}). Two-sided formulas passed to \code{by} or \code{pid} overwrite \code{cols}.} \item{higher}{logical. Add higher moments (skewness and kurtosis).} @@ -84,7 +84,7 @@ If \code{pid} is used, \code{qsu} performs a panel-decomposition of each variabl More formally, let \bold{\code{x}} (bold) be a panel vector of data for \code{N} individuals indexed by \code{i}, recorded for \code{T} periods, indexed by \code{t}. \code{xit} then denotes a single data-point belonging to individual \code{i} in time-period \code{t} (\code{t/T} must not represent time). Then \code{xi.} denotes the average of all values for individual \code{i} (averaged over \code{t}), and by extension \bold{\code{xN.}} is the vector (length \code{N}) of such averages for all individuals. If no groups are supplied to \code{g/by}, the 'Between' statistics are computed on \bold{\code{xN.}}, the vector of individual averages. (This means that for a non-balanced panel or in the presence of missing values, the 'Overall' mean computed on \bold{\code{x}} can be slightly different than the 'Between' mean computed on \bold{\code{xN.}}, and the variance decomposition is not exact). If groups are supplied to \code{g/by}, \bold{\code{xN.}} is expanded to the vector \bold{\code{xi.}} (length \code{N x T}) by replacing each value \code{xit} in \bold{\code{x}} with \code{xi.}, while preserving missing values in \bold{\code{x}}. Grouped Between-statistics are then computed on \bold{\code{xi.}}, with the only difference that the number of observations ('Between-N') reported for each group is the number of distinct non-missing values of \bold{\code{xi.}} in each group (not the total number of non-missing values of \bold{\code{xi.}} in each group, which is already reported in 'Overall-N'). See Examples. -'Within' statistics are always computed on the vector \bold{\code{x - xi. + x..}}, where \bold{\code{x..}} is simply the 'Overall' mean computed from \bold{\code{x}}, which is added back to preserve the level of the data. The 'Within' mean computed on this data will always be identical to the 'Overall' mean. In the summary output, \code{qsu} reports not 'N', which would be identical to the 'Overall-N', but 'T', the average number of time-periods of data available for each individual obtained as 'T' = 'Overall-N / 'Between-N'. See Examples. +'Within' statistics are always computed on the vector \bold{\code{x - xi. + x..}}, where \bold{\code{x..}} is simply the 'Overall' mean computed from \bold{\code{x}}, which is added back to preserve the level of the data. The 'Within' mean computed on this data will always be identical to the 'Overall' mean. In the summary output, \code{qsu} reports not 'N', which would be identical to the 'Overall-N', but 'T', the average number of time-periods of data available for each individual obtained as 'T' = 'Overall-N / 'Between-N'. When using weights (\code{w}) with panel data (\code{pid}), the 'Between' sum of weights is also simply the number of groups, and the 'Within' sum of weights is the 'Overall' sum of weights divided by the number of groups. See Examples. Apart from 'N/T' and the extrema, the standard-deviations ('SD') computed on between- and within- transformed data are extremely valuable because they indicate how much of the variation in a panel-variable is between-individuals and how much of the variation is within-individuals (over time). At the extremes, variables that have common values across individuals (such as the time-variable(s) 't' in a balanced panel), can readily be identified as individual-invariant because the 'Between-SD' on this variable is 0 and the 'Within-SD' is equal to the 'Overall-SD'. Analogous, time-invariant individual characteristics (such as the individual-id 'i') have a 0 'Within-SD' and a 'Between-SD' equal to the 'Overall-SD'. See Examples. From f7b87a9af0fc9063249ae78099ed8b7cdcee26cc Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 27 Oct 2024 22:59:17 +0100 Subject: [PATCH 5/8] Computing percentages in terms of weighs, thanks to (#650). --- R/descr.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/descr.R b/R/descr.R index a02ff00c..840f5cea 100644 --- a/R/descr.R +++ b/R/descr.R @@ -371,7 +371,14 @@ print_descr_grouped <- function(x, n = 14, perc = TRUE, digits = 2, t.table = TR ncol = 2, dimnames = list(NULL, c("Min", "Max")))), quote = FALSE, right = TRUE, print.gap = 2) } else { - if(perc) stat <- cbind(stat[, 1L, drop = FALSE], Perc = stat[, 1L]/bsum(stat[, 1L])*100, stat[, -1L, drop = FALSE]) + if(perc) { + if(wsuml && ncol(stat) > 4L) { # If weights and non-character + ncolf <- 1:(2L + (dimnames(stat)[[2L]][2L] == "Ndist")) + stat <- if(wsuml) cbind(stat[, ncolf, drop = FALSE], Perc = stat[, "WeightSum"]/bsum(stat[, "WeightSum"])*100, stat[, -ncolf, drop = FALSE]) + } else { + stat <- cbind(stat[, 1L, drop = FALSE], Perc = stat[, 1L]/bsum(stat[, 1L])*100, stat[, -1L, drop = FALSE]) + } + } print.qsu(stat, digits) } if(length(xi) > 3L) { # Table or quantiles From bfd91dcad225d76af931f13b712754d5486df435 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 27 Oct 2024 23:20:07 +0100 Subject: [PATCH 6/8] Adjust tests (#650). --- tests/testthat/test-fscale-STD.R | 4 +- tests/testthat/test-miscellaneous-issues.R | 8 +-- tests/testthat/test-qsu.R | 74 +++++++++++----------- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/tests/testthat/test-fscale-STD.R b/tests/testthat/test-fscale-STD.R index 3788b5dd..9f262e12 100644 --- a/tests/testthat/test-fscale-STD.R +++ b/tests/testthat/test-fscale-STD.R @@ -308,8 +308,8 @@ test_that("fscale with weights performs like wbscale (defined above)", { expect_equal(replace_Inf(fscale(mtcNA, g, wdatNA), NA), BY(mtcNA, g, wbscale, wdatNA, na.rm = TRUE)) }) -wsu <- function(x, w) if(is.null(dim(x))) `attributes<-`(qsu.default(x, w = w)[2:3], NULL) else `attributes<-`(qsu(x, w = w)[,2:3], NULL) -wsuby <- function(x, f, w) if(is.null(dim(x))) `attributes<-`(qsu.default(x, f, w = w)[, 2:3], NULL) else `attributes<-`(qsu(x, f, w = w)[,2:3,], NULL) +wsu <- function(x, w) if(is.null(dim(x))) `attributes<-`(qsu.default(x, w = w)[3:4], NULL) else `attributes<-`(qsu(x, w = w)[,3:4], NULL) +wsuby <- function(x, f, w) if(is.null(dim(x))) `attributes<-`(qsu.default(x, f, w = w)[, 3:4], NULL) else `attributes<-`(qsu(x, f, w = w)[,3:4,], NULL) test_that("Weighted customized scaling works as intended", { expect_equal(wsu(fscale(x, w = w, mean = 5.1, sd = 3.9), w = w), c(5.1, 3.9)) diff --git a/tests/testthat/test-miscellaneous-issues.R b/tests/testthat/test-miscellaneous-issues.R index 5f719485..4a05d179 100644 --- a/tests/testthat/test-miscellaneous-issues.R +++ b/tests/testthat/test-miscellaneous-issues.R @@ -322,10 +322,10 @@ test_that("functions using welfords method properly deal with zero weights", { expect_equal(unattrib(fsd(x = c(2, 1, 3), g = g, w = c(0, 1, 1), na.rm = FALSE)), sqrt(2)) expect_equal(unattrib(fscale(x = c(2, 1, 0), g = g, w = c(1, 1, 0), na.rm = TRUE)), (c(2, 1, 0)-1.5)/sqrt(0.5)) expect_equal(unattrib(fscale(x = c(2, 1, 3), g = g, w = c(0, 1, 1), na.rm = FALSE)), (c(2, 1, 3)-2)/sqrt(2)) - expect_equal(unattrib(qsu(x = c(2, 1, 0), g = g, w = c(1, 1, 0))), c(2, 1.5, sqrt(0.5), 1, 2)) - expect_equal(unattrib(qsu(x = c(2, 1, 3), g = g, w = c(0, 1, 1))), c(2, 2, sqrt(2), 1, 3)) - expect_equal(unattrib(qsu(x = c(2, 1, 0), g = g, w = c(1, 1, 0), higher = TRUE))[1:5], c(2, 1.5, sqrt(0.5), 1, 2)) - expect_equal(unattrib(qsu(x = c(2, 1, 3), g = g, w = c(0, 1, 1), higher = TRUE))[1:5], c(2, 2, sqrt(2), 1, 3)) + expect_equal(unattrib(qsu(x = c(2, 1, 0), g = g, w = c(1, 1, 0)))[-2L], c(2, 1.5, sqrt(0.5), 1, 2)) + expect_equal(unattrib(qsu(x = c(2, 1, 3), g = g, w = c(0, 1, 1)))[-2L], c(2, 2, sqrt(2), 1, 3)) + expect_equal(unattrib(qsu(x = c(2, 1, 0), g = g, w = c(1, 1, 0), higher = TRUE))[c(1L, 3:6)], c(2, 1.5, sqrt(0.5), 1, 2)) + expect_equal(unattrib(qsu(x = c(2, 1, 3), g = g, w = c(0, 1, 1), higher = TRUE))[c(1L, 3:6)], c(2, 2, sqrt(2), 1, 3)) } }) diff --git a/tests/testthat/test-qsu.R b/tests/testthat/test-qsu.R index 6049a0b5..c2a39653 100644 --- a/tests/testthat/test-qsu.R +++ b/tests/testthat/test-qsu.R @@ -39,25 +39,25 @@ test_that("qsu works properly for simple cases (including unit groups and weight expect_equal(qsu(wldNA), base_qsu(wldNA)) expect_equal(qsu(GGDC10S), base_qsu(GGDC10S)) - expect_equal(qsu(1:10, w = rep(1, 10)), base_qsu(1:10)) - expect_equal(qsu(10:1, w = rep(1, 10)), base_qsu(10:1)) - expect_equal(qsu(xNA, w = rep(1, 100)), base_qsu(xNA)) - expect_equal(qsu(wlddev, w = ones), base_qsu(wlddev)) - expect_equal(qsu(wldNA, w = ones), base_qsu(wldNA)) - expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S))), base_qsu(GGDC10S)) + expect_equal(qsu(1:10, w = rep(1, 10))[-2L], base_qsu(1:10)) + expect_equal(qsu(10:1, w = rep(1, 10))[-2L], base_qsu(10:1)) + expect_equal(qsu(xNA, w = rep(1, 100))[-2L], base_qsu(xNA)) + expect_equal(qsu(wlddev, w = ones)[,-2L], base_qsu(wlddev)) + expect_equal(qsu(wldNA, w = ones)[,-2L], base_qsu(wldNA)) + expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)))[,-2L], base_qsu(GGDC10S)) expect_equal(unattrib(qsu(1:10, g = rep(1, 10))), unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10))), unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100))), unattrib(base_qsu(xNA))) expect_equal(unattrib(qsu(wlddev, by = ones)), unattrib(t(base_qsu(wlddev)))) # This should be an array... or oriented the other way around... - expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10))), unattrib(base_qsu(1:10))) - expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10))), unattrib(base_qsu(10:1))) - expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100))), unattrib(base_qsu(xNA))) - expect_equal(qsu(wldNA, w = ones), base_qsu(wldNA)) - expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S))), base_qsu(GGDC10S)) - expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones))), unclass(base_qsu(wldNA))) - expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S))))), unclass(base_qsu(GGDC10S))) + expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10)))[-2L], unattrib(base_qsu(1:10))) + expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10)))[-2L], unattrib(base_qsu(10:1))) + expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100)))[-2L], unattrib(base_qsu(xNA))) + expect_equal(qsu(wldNA, w = ones)[,-2L], base_qsu(wldNA)) + expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)))[,-2L], base_qsu(GGDC10S)) + expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones)))[,-2L], unclass(base_qsu(wldNA))) + expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S)))))[,-2L], unclass(base_qsu(GGDC10S))) }) @@ -73,25 +73,25 @@ test_that("qsu works properly for simple cases with higher-order statistics (inc expect_equal(qsu(wldNA, higher = TRUE)[,1:5], base_qsu(wldNA)) expect_equal(qsu(GGDC10S, higher = TRUE)[,1:5], base_qsu(GGDC10S)) - expect_equal(qsu(1:10, w = rep(1, 10), higher = TRUE)[1:5], base_qsu(1:10)) - expect_equal(qsu(10:1, w = rep(1, 10), higher = TRUE)[1:5], base_qsu(10:1)) - expect_equal(qsu(xNA, w = rep(1, 100), higher = TRUE)[1:5], base_qsu(xNA)) - expect_equal(qsu(wlddev, w = ones, higher = TRUE)[,1:5], base_qsu(wlddev)) - expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,1:5], base_qsu(wldNA)) - expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,1:5], base_qsu(GGDC10S)) + expect_equal(qsu(1:10, w = rep(1, 10), higher = TRUE)[c(1L, 3:6)], base_qsu(1:10)) + expect_equal(qsu(10:1, w = rep(1, 10), higher = TRUE)[c(1L, 3:6)], base_qsu(10:1)) + expect_equal(qsu(xNA, w = rep(1, 100), higher = TRUE)[c(1L, 3:6)], base_qsu(xNA)) + expect_equal(qsu(wlddev, w = ones, higher = TRUE)[,c(1L, 3:6)], base_qsu(wlddev)) + expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,c(1L, 3:6)], base_qsu(wldNA)) + expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,c(1L, 3:6)], base_qsu(GGDC10S)) expect_equal(unattrib(qsu(1:10, g = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100), higher = TRUE)[1:5]), unattrib(base_qsu(xNA))) expect_equal(unattrib(qsu(wlddev, by = ones, higher = TRUE)[1:5, ]), unattrib(t(base_qsu(wlddev)))) # This should be an array... or oriented the other way around... - expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(1:10))) - expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(10:1))) - expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100), higher = TRUE)[1:5]), unattrib(base_qsu(xNA))) - expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,1:5], base_qsu(wldNA)) - expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,1:5], base_qsu(GGDC10S)) - expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones, higher = TRUE)[1:5,])), unclass(base_qsu(wldNA))) - expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S)), higher = TRUE)))[,1:5], unclass(base_qsu(GGDC10S))) + expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[c(1L, 3:6)]), unattrib(base_qsu(1:10))) + expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[c(1L, 3:6)]), unattrib(base_qsu(10:1))) + expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100), higher = TRUE)[c(1L, 3:6)]), unattrib(base_qsu(xNA))) + expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,c(1L, 3:6)], base_qsu(wldNA)) + expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,c(1L, 3:6)], base_qsu(GGDC10S)) + expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones, higher = TRUE)[c(1L, 3:6),])), unclass(base_qsu(wldNA))) + expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S)), higher = TRUE)))[,c(1L, 3:6)], unclass(base_qsu(GGDC10S))) }) @@ -118,9 +118,9 @@ test_that("Proper performance of weighted statsistics", { w <- ceiling(mtcars$wt*10) wx <- rep(x, w) expect_equal(base_w_qsu(x, w)[-1L], qsu(wx, higher = TRUE)[-1L]) - expect_equal(qsu(wx)[-1L], qsu(x, w = w)[-1L]) - expect_equal(qsu(wx, higher = TRUE)[-1L], qsu(x, w = w, higher = TRUE)[-1L]) - expect_equal(drop(qsu(wx, g = rep(1L, length(wx)), higher = TRUE))[-1L], drop(qsu(x, g = rep(1L, length(x)), w = w, higher = TRUE))[-1L]) + expect_equal(qsu(wx)[-1L], qsu(x, w = w)[-(1:2)]) + expect_equal(qsu(wx, higher = TRUE)[-1L], qsu(x, w = w, higher = TRUE)[-(1:2)]) + expect_equal(drop(qsu(wx, g = rep(1L, length(wx)), higher = TRUE))[-1L], drop(qsu(x, g = rep(1L, length(x)), w = w, higher = TRUE))[-(1:2)]) }) g <- GRP(wlddev, ~ income) @@ -136,15 +136,15 @@ test_that("qsu works properly for grouped and panel data computations", { expect_equal(qsu(wldNA, g), base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable), base_qsu(GGDC10S, GGDC10S$Variable)) # Grouped and Weighted Statistics - expect_equal(qsu(wldNA, g, w = ones), base_qsu(wldNA, g)) - expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S))), base_qsu(GGDC10S, GGDC10S$Variable)) + expect_equal(qsu(wldNA, g, w = ones)[,-2L,], base_qsu(wldNA, g)) + expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S)))[,-2L,], base_qsu(GGDC10S, GGDC10S$Variable)) # Panel Data Statistics ps <- qsu(wldNA, pid = p, cols = is.numeric) expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) expect_equal(unattrib(t(ps["Between",,])), unattrib(base_qsu(fmean(nv(wldNA), p)))) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Weighted Panel Data Statistics - ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric) + ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric)[,-2L,] expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) expect_equal(unattrib(t(ps["Between",-1,])), unattrib(base_qsu(fbetween(nv(wldNA), p))[,-1])) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) @@ -154,7 +154,7 @@ test_that("qsu works properly for grouped and panel data computations", { expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) # Grouped and Weighted Panel Data Statistics - ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric) + ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric)[,-2L,,] expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) @@ -170,15 +170,15 @@ test_that("qsu works properly for grouped and panel data computations with highe expect_equal(qsu(wldNA, g, higher = TRUE)[,1:5,], base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable, higher = TRUE)[,1:5,], base_qsu(GGDC10S, GGDC10S$Variable)) # Grouped and Weighted Statistics - expect_equal(qsu(wldNA, g, w = ones, higher = TRUE)[,1:5,], base_qsu(wldNA, g)) - expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,1:5,], base_qsu(GGDC10S, GGDC10S$Variable)) + expect_equal(qsu(wldNA, g, w = ones, higher = TRUE)[,c(1L, 3:6),], base_qsu(wldNA, g)) + expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,c(1L, 3:6),], base_qsu(GGDC10S, GGDC10S$Variable)) # Panel Data Statistics ps <- qsu(wldNA, pid = p, cols = is.numeric, higher = TRUE)[,1:5,] expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) expect_equal(unattrib(t(ps["Between",,])), unattrib(base_qsu(fmean(nv(wldNA), p)))) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Weighted Panel Data Statistics - ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,1:5,] + ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,c(1L, 3:6),] expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) # TODO: Figure out why this test fails !!!!!! # expect_equal(unattrib(t(ps["Between",-1,])), unattrib(base_qsu(fbetween(nv(wldNA), p))[,-1])) @@ -189,7 +189,7 @@ test_that("qsu works properly for grouped and panel data computations with highe expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) # Grouped and Weighted Panel Data Statistics - ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,1:5,,] + ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,c(1L, 3:6),,] expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) From 3be9b2185b1e86b37c4567c952b88576c54e884c Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 27 Oct 2024 23:37:59 +0100 Subject: [PATCH 7/8] Minors. --- src/fbstats.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fbstats.cpp b/src/fbstats.cpp index 41d5d85e..898bd980 100644 --- a/src/fbstats.cpp +++ b/src/fbstats.cpp @@ -385,7 +385,7 @@ SEXP fbstatsCpp(const NumericVector& x, bool ext = false, int ng = 0, const Inte int l = x.size(); if(pg.size() != l) stop("length(pid) must match nrow(X)"); bool weights = !Rf_isNull(w); - int d = d = ((ext) ? 7 : 5) + weights; + int d = ((ext) ? 7 : 5) + weights; NumericVector sum(npg, NA_REAL); NumericVector sumw((weights) ? npg : 1); // no_init_vector(npg) : no_init_vector(1); // better for valgrind double osum = 0; From 552bc29fd1d5dacea5780abae531f11917f9d9e0 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 27 Oct 2024 23:38:19 +0100 Subject: [PATCH 8/8] Update date. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 56950f52..67d94373 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: collapse Title: Advanced and Fast Data Transformation Version: 2.0.17 -Date: 2024-10-19 +Date: 2024-10-27 Authors@R: c( person("Sebastian", "Krantz", role = c("aut", "cre"), email = "sebastian.krantz@graduateinstitute.ch",