From 2a35d96073c8911ed25b30c6b9dd504bcf7eb919 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sat, 15 Feb 2014 12:13:57 -0500 Subject: [PATCH 01/31] added minMax arg to print.ContTable --- R/modules.R | 50 ++++++++++++++++++++++++++++++----- R/print.ContTable.R | 63 +++++++++++++++++---------------------------- 2 files changed, 67 insertions(+), 46 deletions(-) diff --git a/R/modules.R b/R/modules.R index 906d9b1..cef999d 100644 --- a/R/modules.R +++ b/R/modules.R @@ -15,10 +15,10 @@ ModuleStopIfNotDataFrame <- function(data) { } ## Extract variables that exist in the data frame ModuleReturnVarsExist <- function(vars, data) { - + ## Check if variables exist. Drop them if not. varsNotInData <- setdiff(vars, names(data)) - + if (length(varsNotInData) > 0) { warning("The data frame does not have ", paste0(varsNotInData, sep = " "), " Dropped") @@ -43,7 +43,7 @@ ModuleReturnFalseIfNoStrata <- function(strata, test) { # Give strata variable n } ## Check statra variables and conditionally create ModuleReturnStrata <- function(strata, data, dat) { # Give strata variable names - + if(missing(strata)) { ## If there is no strata, give "Overall" to every subject strata <- rep("Overall", dim(dat)[1]) # Check if dim(dat)[[1]] is correct. @@ -105,7 +105,7 @@ ModuleCreateStrataVarName <- function(obj) { ## Try catch function # Taken from demo(error.catching) ## Used to define non-failing functions, that return NA when there is an error -tryCatch.W.E <- function(expr) { +tryCatch.W.E <- function(expr) { W <- NULL w.handler <- function(w) { # warning handler W <<- w @@ -142,7 +142,43 @@ sasKurtosis <- function(x) { ### Modules intented for the print methods ################################################################################ -### ModuleQuoteAndPrintMat() +## Define a function to format a normal variable +ModuleConvertNormal <- function(rowMat, digits) { + + ## Format for SD + fmt <- paste0(" (%.", digits,"f",")") + + ## Create a DF with numeric mean column and character (SD) column + data.frame(col1 = rowMat[,"mean"], + col2 = sprintf(fmt = fmt, rowMat[,"sd"]), + stringsAsFactors = FALSE) +} + +## Define a function to format a nonnormal variable +ModuleConvertNonNormal <- function(rowMat, digits, minMax = FALSE) { + + ## Format for [p25, p75] + fmt <- paste0(" [%.", digits,"f, %.",digits,"f]") + + if (minMax == FALSE) { + ## Create a DF with numeric median column and character [p25, p75] column + out <- data.frame(col1 = rowMat[,"median"], + col2 = sprintf(fmt = fmt, rowMat[,"p25"], rowMat[,"p75"]), + stringsAsFactors = FALSE) + } else if (minMax == TRUE) { + ## Create a DF with numeric median column and character [p25, p75] column + out <- data.frame(col1 = rowMat[,"median"], + col2 = sprintf(fmt = fmt, rowMat[,"min"], rowMat[,"max"]), + stringsAsFactors = FALSE) + } else { + stop("minMax must be a logical vector of one: FALSE or TRUE") + } + + return(out) +} + +### Modules by both print and summary methods +## ModuleQuoteAndPrintMat() ## Takes an matrix object format, print, and (invisibly) return it ## Requires quote and printToggle argument in the printToggle method ModuleQuoteAndPrintMat <- function(matObj, quote = FALSE, printToggle = TRUE) { @@ -161,10 +197,10 @@ ModuleQuoteAndPrintMat <- function(matObj, quote = FALSE, printToggle = TRUE) { ## print if required and return if (printToggle) { - + print(matObj, quote = quote) return(matObj) - + } else if (!printToggle) { return(matObj) diff --git a/R/print.ContTable.R b/R/print.ContTable.R index 3fc39ad..5aaaa84 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -3,26 +3,17 @@ ##' This is the \code{print} method for the \code{ContTable} class objects created by \code{\link{CreateContTable}} function. ##' ##' @param x The result of a call to the \code{\link{CreateContTable}} function. -##' @param missing Whether to show missing data information (not implemented -##' yet, placeholder) +##' @param missing Whether to show missing data information (not implemented yet, placeholder) ##' @param digits Number of digits to print in the table. -##' @param nonnormal A character vector to specify the variables for which the -##' p-values should be those of nonparametric tests. By default all p-values -##' are from normal assumption-based tests (oneway.test). -##' @param quote Whether to show everything in quotes. The default is FALSE. If -##' TRUE, everything including the row and column names are quoted so that you -##' can copy it to Excel easily. -##' @param test Whether to show the p-values. TRUE by default. If FALSE, only -##' the numerical summaries are shown. +##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). +##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. +##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. +##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param pDigits Number of digits to print for p-values. -##' @param explain Whether to add explanation to the variable names, i.e., -##' (mean (sd) or median [IQR]) is added to the variable names. -##' @param printToggle Whether to print the output. If FLASE, no output is -##' created, and a matrix is invisibly returned. +##' @param explain Whether to add explanation to the variable names, i.e., (mean (sd) or median [IQR]) is added to the variable names. +##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param ... For compatibility with generic. Ignored. -##' @return It is mainly for printing the result. But this function does return -##' a matrix containing what you see in the output invisibly. You can assign it -##' to an object to save it. +##' @return It is mainly for printing the result. But this function does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. ##' @author Kazuki Yoshida ##' @seealso ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, @@ -81,7 +72,7 @@ ##' ##' @export print.ContTable <- function(x, missing = FALSE, - digits = 2, nonnormal = NULL, quote = FALSE, + digits = 2, nonnormal = NULL, minMax = FALSE, quote = FALSE, test = TRUE, pDigits = 3, explain = TRUE, printToggle = TRUE, @@ -160,30 +151,17 @@ print.ContTable <- function(x, missing = FALSE, ### Conversion of data for printing - ## These may want to be moved to separate files later. - ## Define a function to print a normal variable + ## Define the nonnormal formatter depending on the minMax status ConvertNormal <- function(rowMat) { - - ## Format for SD - fmt <- paste0(" (%.", digits,"f",")") - - ## Create a DF with numeric mean column and character (SD) column - data.frame(col1 = rowMat[,"mean"], - col2 = sprintf(fmt = fmt, rowMat[,"sd"]), - stringsAsFactors = FALSE) + ## Take minMax value from outside (NOT A STANDALONE FUNCTION!!) + ModuleConvertNormal(rowMat, digits) } - ## Define a function to print a nonnormal variable + ## Define the nonnormal formatter depending on the minMax status ConvertNonNormal <- function(rowMat) { - ## Format for [p25, p75] - fmt <- paste0(" [%.", digits,"f, %.",digits,"f]") - - ## Create a DF with numeric median column and character [p25, p75] column - data.frame(col1 = rowMat[,"median"], - col2 = sprintf(fmt = fmt, rowMat[,"p25"], rowMat[,"p75"]), - stringsAsFactors = FALSE) + ## Take minMax value from outside (NOT A STANDALONE FUNCTION!!) + ModuleConvertNonNormal(rowMat, digits, minMax = minMax) } - ## Create a list of these two functions listOfFunctions <- list(normal = ConvertNormal, nonnormal = ConvertNonNormal) @@ -322,9 +300,16 @@ print.ContTable <- function(x, missing = FALSE, } - ## Add mean (sd) or median [IQR] explanation if requested + ## Add mean (sd) or median [IQR]/median [range] explanation if requested if (explain) { - what <- c(" (mean (sd))"," (median [IQR])")[nonnormal] + + ## Create a vector of explanations to be pasted + if (minMax == FALSE) { + what <- c(" (mean (sd))"," (median [IQR])")[nonnormal] + } else if (minMax == TRUE) { + what <- c(" (mean (sd))"," (median [range])")[nonnormal] + } + ## Paste to the rownames rownames(out) <- paste0(rownames(out), what) } From ff24f9c15c7dd41eef8585e9005a6955989f1c44 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sat, 15 Feb 2014 12:26:04 -0500 Subject: [PATCH 02/31] doc fix unrelated to minMax (should be cherry picked) --- R/CreateCatTable.R | 2 +- R/CreateContTable.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index d22fd05..a082d76 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -72,7 +72,7 @@ ##' ## (3 by default). It does <0.001 for you. ##' catTableBySexTrt ##' -##' ## The exact argument will toggle the p-values to the example test result from +##' ## The exact argument toggles the p-values to the exact test result from ##' ## fisher.test. It will show which ones are from exact tests. ##' print(catTableBySexTrt, exact = "ascites") ##' diff --git a/R/CreateContTable.R b/R/CreateContTable.R index 9564427..d9d81a7 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -77,7 +77,7 @@ ##' ## by the pDigits argument (3 by default). It does <0.001 for you. ##' contTableBySexTrt ##' -##' ## The nonnormal argument will toggle the p-values to the nonparametric result from +##' ## The nonnormal argument toggles the p-values to the nonparametric result from ##' ## kruskal.test (wilcox.test equivalent for the two group case). ##' print(contTableBySexTrt, nonnormal = nonNormalVars) ##' From 1e95e20076b01aed5f0aa07b8328f5c2a2ed2ab0 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sat, 15 Feb 2014 12:28:49 -0500 Subject: [PATCH 03/31] minMax implemented in print.TableOne --- R/print.CatTable.R | 2 +- R/print.ContTable.R | 5 ++++- R/print.TableOne.R | 39 ++++++++++++++------------------------- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 22de2ef..84dab65 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -77,7 +77,7 @@ ##' ## (3 by default). It does <0.001 for you. ##' catTableBySexTrt ##' -##' ## The exact argument will toggle the p-values to the example test result from +##' ## The exact argument toggles the p-values to the exact test result from ##' ## fisher.test. It will show which ones are from exact tests. ##' print(catTableBySexTrt, exact = "ascites") ##' diff --git a/R/print.ContTable.R b/R/print.ContTable.R index 5aaaa84..d2fe7ad 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -58,10 +58,13 @@ ##' ## by the pDigits argument (3 by default). It does <0.001 for you. ##' contTableBySexTrt ##' -##' ## The nonnormal argument will toggle the p-values to the nonparametric result from +##' ## The nonnormal argument toggles the p-values to the nonparametric result from ##' ## kruskal.test (wilcox.test equivalent for the two group case). ##' print(contTableBySexTrt, nonnormal = nonNormalVars) ##' +##' ## The minMax argument toggles whether to show median [range] +##' print(contTableBySexTrt, nonnormal = nonNormalVars, minMax = TRUE) +##' ##' ## summary now includes both types of p-values ##' summary(contTableBySexTrt) ##' diff --git a/R/print.TableOne.R b/R/print.TableOne.R index 111183c..2f43263 100644 --- a/R/print.TableOne.R +++ b/R/print.TableOne.R @@ -3,33 +3,20 @@ ##' This is the \code{print} method for the \code{TableOne} class objects created by \code{\link{CreateTableOne}} function. ##' ##' @param x The result of a call to the \code{\link{CreateTableOne}} function. -##' @param missing Whether to show missing data information (not implemented -##' yet, placeholder) -##' @param quote Whether to show everything in quotes. The default is FALSE. If -##' TRUE, everything including the row and column names are quoted so that you -##' can copy it to Excel easily. -##' @param test Whether to show the p-values. TRUE by default. If FALSE, only -##' the numerical summaries are shown. +##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. +##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param catDigits Number of digits to print for proportions. Default 1. ##' @param contDigits Number of digits to print for continuous variables. Default 2. ##' @param pDigits Number of digits to print for p-values. Default 3. -##' @param format The default is "fp" frequency (percentage). You can also -##' choose from "f" frequency only, "p" percentage only, and "pf" percentage -##' (frequency). -##' @param exact A character vector to specify the variables for which the -##' p-values should be those of exact tests. By default all p-values are from -##' large sample approximation tests (chisq.test). -##' @param nonnormal A character vector to specify the variables for which the -##' p-values should be those of nonparametric tests. By default all p-values -##' are from normal assumption-based tests (oneway.test). -##' @param explain Whether to add explanation to the variable names, i.e., (\%) -##' is added to the variable names when percentage is shown. -##' @param printToggle Whether to print the output. If FLASE, no output is -##' created, and a matrix is invisibly returned. +##' @param format The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). +##' @param exact A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test). +##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). +##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. +##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. +##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param ... For compatibility with generic. Ignored. -##' @return It is mainly for printing the result. But this function does return -##' a matrix containing what you see in the output invisibly. You can assign it -##' to an object to save it. +##' @return It is mainly for printing the result. But this function does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. ##' @author Kazuki Yoshida, Justin Bohn ##' @seealso ##' \code{\link{CreateTableOne}}, \code{\link{print.TableOne}}, \code{\link{summary.TableOne}}, @@ -88,6 +75,7 @@ print.TableOne <- function(x, missing = FALSE, ## Continuous options nonnormal = NULL, + minMax = FALSE, ## Common options explain = TRUE, @@ -109,9 +97,10 @@ print.TableOne <- function(x, missing = FALSE, print(TableOne[[i]], printToggle = FALSE, test = test, explain = explain, digits = digits[i], ## print.CatTable arguments - format = format, exact = exact, showAllLevels = FALSE, + format = format, exact = exact, + showAllLevels = FALSE, # must be FALSE to get same column counts ## print.ContTable argument - nonnormal = nonnormal + nonnormal = nonnormal, minMax = minMax ) # Method dispatch at work }, simplify = FALSE) From 9d01c88168ba5915042af03ea0ca4534ec9cf91d Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sat, 15 Feb 2014 15:12:00 -0500 Subject: [PATCH 04/31] cramming two levels into one row. CatTable messed up. TableOne ok. --- R/print.CatTable.R | 56 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 84dab65..2205cf6 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -20,6 +20,7 @@ ##' @param pDigits Number of digits to print for p-values. ##' @param showAllLevels Whether to show all levels. FALSE by default, i.e., ##' for 2-level categorical variables, only the higher level is shown to avoid +##' @param cramLevels For two level factors, cram them together in one row. ##' @param explain Whether to add explanation to the variable names, i.e., (\%) ##' is added to the variable names when percentage is shown. ##' @param CrossTable Whether to show the cross table objects held internally @@ -95,6 +96,7 @@ print.CatTable <- function(x, missing = FALSE, digits = 1, exact = NULL, quote = FALSE, test = TRUE, pDigits = 3, showAllLevels = FALSE, + cramLevels = TRUE, # TRUE for testing explain = TRUE, CrossTable = FALSE, printToggle = TRUE, @@ -214,6 +216,8 @@ print.CatTable <- function(x, missing = FALSE, ## Add first row indicator column DF$firstRowInd <- "" + ## Add crammed row indicator column + DF$crammedRowInd <- "" ## Format based on the number of levels if (!showAllLevels & nRow == 1) { @@ -224,12 +228,29 @@ print.CatTable <- function(x, missing = FALSE, DF[1,"firstRowInd"] <- "first" } else if (!showAllLevels & nRow == 2) { - ## If showAllLevels is FALSE AND there are only TWO levels, - ## change variable name, and delete the first level. - DF$var <- with(DF, paste0(var, " = ", level)) - DF <- DF[-1, , drop = FALSE] - ## Add first row indicator (used to add (%)) - DF[1,"firstRowInd"] <- "first" + + if (cramLevels) { + ## If cramLevels is true. Cram in one line + ## Cram two freq and count with / in between + DF$freq <- paste0(DF$freq, collapse = "/") + DF$percent <- paste0(DF$percent, collapse = "/") + DF$var <- paste0(DF$var, " = ", + paste0(DF$level, collapse = "/")) + ## Delete the first row + DF <- DF[-1, , drop = FALSE] + ## Add first row indicator (used to add (%)) + DF[1,"firstRowInd"] <- "first" + DF[1,"crammedRowInd"] <- "crammed" + + } else { + ## Otherwise, keep the second level only + ## If showAllLevels is FALSE AND there are only TWO levels, + ## change variable name, and delete the first level. + DF$var <- with(DF, paste0(var, " = ", level)) + DF <- DF[-1, , drop = FALSE] + ## Add first row indicator (used to add (%)) + DF[1,"firstRowInd"] <- "first" + } } else if (!showAllLevels & nRow > 2) { ## If showAllLevels is FALSE AND there are MORE THAN two levels, @@ -266,15 +287,22 @@ print.CatTable <- function(x, missing = FALSE, ## Check non-empty rows posNonEmptyRows <- DF$freq != "" - ## Right justify frequency - DF$freq <- format(DF$freq, justify = "right") - ## Obtain the width of characters - nCharFreq <- nchar(DF$freq[1]) + ## Right justify frequency (crammed and non crammed separately) + DF[DF$crammedRowInd == "crammed","freq"] <- + format(DF[DF$crammedRowInd == "crammed","freq"], justify = "right") + DF[DF$crammedRowInd == "","freq"] <- + format(DF[DF$crammedRowInd == "","freq"], justify = "right") + + ## Obtain the max width of characters + nCharFreq <- max(nchar(DF$freq)) ## Right justify percent - DF$percent <- format(DF$percent, justify = "right") - ## Obtain the width of characters - nCharPercent <- nchar(DF$percent[1]) + DF[DF$crammedRowInd == "crammed","percent"] <- + format(DF[DF$crammedRowInd == "crammed","percent"], justify = "right") + DF[DF$crammedRowInd == "","percent"] <- + format(DF[DF$crammedRowInd == "","percent"], justify = "right") + ## Obtain the max width of characters + nCharPercent <- max(nchar(DF$percent)) ## Add freq (percent) column (only in non-empty rows) DF$freqPer <- "" @@ -409,7 +437,7 @@ print.CatTable <- function(x, missing = FALSE, ## If exact test is used at least onece, add a test type indicator. ## if (any(exact == 2)) { - if (TRUE) { + if (TRUE) { ## Create an empty test type column out <- cbind(out, test = rep("", nrow(out))) # Column for test types From c644f9cc981e03a5c23d1e1f252ceb2f61b6ae1b Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 01:40:38 -0500 Subject: [PATCH 05/31] SugarSync error reverting to previous state? --- DESCRIPTION | 12 +++---- NEWS | 24 +++++++------- R/CreateCatTable.R | 4 +-- R/CreateContTable.R | 12 ++++--- R/CreateTableOne.R | 26 ++++----------- R/ShowRegTable.R | 24 ++++++++++---- R/modules.R | 50 ++++------------------------ R/print.CatTable.R | 6 ++-- R/print.ContTable.R | 72 ++++++++++++++++++++++++---------------- R/print.TableOne.R | 54 ++++++++++++++++-------------- R/summary.CatTable.R | 6 +++- R/summary.ContTable.R | 6 +++- R/summary.TableOne.R | 5 +++ R/tableone-package.R | 15 ++------- man/CreateCatTable.Rd | 2 +- man/CreateContTable.Rd | 11 +++--- man/CreateTableOne.Rd | 37 +++++++-------------- man/ShowRegTable.Rd | 12 +++++-- man/print.CatTable.Rd | 5 ++- man/print.ContTable.Rd | 5 ++- man/print.TableOne.Rd | 16 ++------- man/summary.CatTable.Rd | 9 +++-- man/summary.ContTable.Rd | 7 ++-- man/summary.TableOne.Rd | 5 +++ man/tableone-package.Rd | 15 +-------- 25 files changed, 199 insertions(+), 241 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9797ef6..77904e9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,17 @@ Package: tableone Type: Package Title: Create "Table 1" to describe baseline characteristics -Version: 0.2.1 -Date: 2014-02-15 +Version: 0.2.0 +Date: 2014-02-13 Author: Kazuki Yoshida, Justin Bohn Maintainer: Kazuki Yoshida Description: This package creates "Table 1", i.e., description of baseline patient characteristics, which is essential every medical research. This package provides functions to create such summaries for continuous and - categorical variables, optionally with subgroups comparisons. The package - was insipired by and based on descriptive statistics functions in Deducer, - a Java-based GUI package by Ian Fellows. This package does not require GUI - or Java, and intended for CUI users. + categorical variables, optionally with subgroups and groupwise comparison. + The package was insipired by and based on descriptive statistics functions + in Deducer, a Java-based GUI package by Ian Fellows. This package does not + require GUI or Java, and intended for CUI users. License: GPL-2 Depends: e1071, diff --git a/NEWS b/NEWS index 284c9ff..dacfcd9 100644 --- a/NEWS +++ b/NEWS @@ -1,27 +1,27 @@ -tableone 0.2.1 (2014-02-15) ----------------------------------------------------------------- -BUG FIXES - -* Documentations were fixed to represent the current version. - - -tableone 0.2.0 (2014-02-14) +tableone 0.2.0 ---------------------------------------------------------------- NEW FEATURES * CreateTableOne and related print/summary methods were added. * CreateTableOne can crate a table with both categorical and - continuous variables. + continuous variables. + + +tableone 0.1.3 +---------------------------------------------------------------- +NEW FEATURES * The print method can suppress printing by printToggle option. +* (CreateTableOne and related print and summary methods are introduced). + BUG FIXES * In the Roxygen part of the code, only at_export followed by nothing is used. -tableone 0.1.2 (2014-02-09) +tableone 0.1.2 ---------------------------------------------------------------- BUG FIXES @@ -33,7 +33,7 @@ BUG FIXES * Passed all the default tests by R CMD check file.tar.gz -tableone 0.1.1 (2014-02-09) +tableone 0.1.1 ---------------------------------------------------------------- BUG FIXES @@ -42,7 +42,7 @@ BUG FIXES * Documents are now included in -tableone 0.1.0 (2014-02-08) +tableone 0.1.0 ---------------------------------------------------------------- FIRST DEVELOPMENTAL VERSION diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index a082d76..1904018 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -26,7 +26,7 @@ ##' @return An object of class \code{CatTable}, which really is a \code{\link{by}} object with ##' additional attributes. Each element of the \code{\link{by}} part is a matrix with rows ##' representing variables, and columns representing summary statistics. -##' @author Kazuki Yoshida (based on \code{Deducer::frequencies()}) +##' @author Kazuki Yoshida ##' @seealso ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, ##' \code{\link{CreateContTable}}, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}}, @@ -72,7 +72,7 @@ ##' ## (3 by default). It does <0.001 for you. ##' catTableBySexTrt ##' -##' ## The exact argument toggles the p-values to the exact test result from +##' ## The exact argument will toggle the p-values to the example test result from ##' ## fisher.test. It will show which ones are from exact tests. ##' print(catTableBySexTrt, exact = "ascites") ##' diff --git a/R/CreateContTable.R b/R/CreateContTable.R index d9d81a7..008f0b1 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -16,8 +16,9 @@ ##' kurtosis (same definition as in SAS). All of them can be seen in the ##' summary method output. The print method uses subset of these. You can ##' choose subset of them or reorder them. They are all configure to omit NA -##' values (\code{na.rm = TRUE}). -##' @param func.additional Additional functions can be given as a named list. For example, \code{list(sum = sum)}. +##' values (na.rm = TRUE). +##' @param func.additional Additional functions can be given as a named list. +##' For example, list(sum = sum). ##' @param test If TRUE, as in the default and there are more than two groups, ##' groupwise comparisons are performed. Both tests that assume normality and ##' tests that do not are performed. Either one of the result can be obtained @@ -26,14 +27,14 @@ ##' tests. The default is \code{\link{oneway.test}}. This is equivalent of the t-test when there are only two groups. ##' @param argsNormal A named list of arguments passed to the function specified in \code{testNormal}. The default is \code{list(var.equal = TRUE)}, which makes it the ordinary ANOVA that assumes equal variance across groups. ##' @param testNonNormal A function used to perform the nonparametric tests. -##' The default is \code{kruskal.test} (Kruskal-Wallis rank sum test). This is +##' The default is \code{kruskal.test} (Kruskal-Wallis Rank Sum Test). This is ##' equivalent of the wilcox.test (Man-Whitney U test) when there are only two ##' groups. ##' @param argsNonNormal A named list of arguments passed to the function specified in \code{testNonNormal}. The default is \code{list(NULL)}, which is just a placeholder. ##' @return An object of class \code{ContTable}, which really is a \code{\link{by}} object with ##' additional attributes. Each element of the \code{\link{by}} part is a matrix with rows ##' representing variables, and columns representing summary statistics. -##' @author Kazuki Yoshida (based on \code{Deducer::descriptive.table()}) +##' @author Kazuki Yoshida ##' @seealso ##' \code{\link{CreateContTable}}, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}}, ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, @@ -52,6 +53,7 @@ ##' ## Create an overall table for continuous variables ##' contVars <- c("time","age","bili","chol","albumin","copper", ##' "alk.phos","ast","trig","platelet","protime") +##' ##' contTableOverall <- CreateContTable(vars = contVars, data = pbc) ##' ##' ## Simply typing the object name will invoke the print.ContTable method, @@ -77,7 +79,7 @@ ##' ## by the pDigits argument (3 by default). It does <0.001 for you. ##' contTableBySexTrt ##' -##' ## The nonnormal argument toggles the p-values to the nonparametric result from +##' ## The nonnormal argument will toggle the p-values to the nonparametric result from ##' ## kruskal.test (wilcox.test equivalent for the two group case). ##' print(contTableBySexTrt, nonnormal = nonNormalVars) ##' diff --git a/R/CreateTableOne.R b/R/CreateTableOne.R index 771ee11..845c3f9 100644 --- a/R/CreateTableOne.R +++ b/R/CreateTableOne.R @@ -1,7 +1,9 @@ ##' Create an object summarizing both categorical and continuous variables ##' -##' Create an object summarizing all baseline variables optionally stratifying by one or more startifying variables and performing statistical tests. The object gives a table that is easy to use in medical research papers. See also \code{\link{print.TableOne}} and \code{\link{summary.TableOne}}. -##' +##' Create an object summarizing categorical variables optionally stratifying +##' by one or more startifying variables and performing statistical tests. The +##' object gives a table that is easy to use in medical research papers. See also \code{\link{print.TableOne}} and \code{\link{summary.TableOne}}. +##' ##' @param vars Variables to be summarized given as a character vector. Factors are ##' handled as categorical variables, whereas numeric variables are handled as continuous variables. ##' @param strata Stratifying (grouping) variable name(s) given as a character @@ -27,19 +29,14 @@ ##' memory limitation. In this situation, the large sample approximation based ##' should suffice. ##' @param argsExact A named list of arguments passed to the function specified in testExact. The default is \code{list(workspace = 2*10^5)}, which specifies the memory space allocated for \code{\link{fisher.test}}. -##' @return An object of class \code{TableOne}, which really is a list of three objects. -##' @return \item{TableOne}{a categorical-continuous mixture data formatted and printed by the \code{\link{print.TableOne}} method} -##' @return \item{ContTable}{an object of class \code{ContTable}, containing continuous variables only} -##' @return \item{CatTable}{ an object of class \code{CatTable}, containing categorical variables only} -##' @return The second and third objects can be then be examined with the \code{print} and \code{summary} method, for example, \code{summary(object$CatTable)} to examine the categorical variables in detail. -##' +##' @return An object of class \code{TableOne}, which really is a list of three objects. The first object named \code{object$TableOne} is the categorical-continuous mixture table formatted and printed by the \code{\link{print.TableOne}} method. The second object named \code{object$ContTable} is the table object containing continuous variables only. The third object named \code{object$CatTable} is the table object containing categorical variables only. The second and third objects can be then be examined with the \code{print} and \code{summary} method, for example, \code{summary(object$CatTable)} to examine the categorical variables in detail. ##' @author Justin Bohn, Kazuki Yoshida ##' @seealso ##' \code{\link{CreateTableOne}}, \code{\link{print.TableOne}}, \code{\link{summary.TableOne}}, ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, ##' \code{\link{CreateContTable}}, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}} ##' @examples -##' +##' ##' ## Load ##' library(tableone) ##' @@ -69,25 +66,16 @@ ##' ## Specifying nonnormal variables will show the variables appropriately, ##' ## and show nonparametric test p-values. Specify variables in the exact ##' ## argument to obtain the exact test p-values. -##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), -##' exact = c("status","stage")) +##' print(tableOne, nonnormal = c("time"), exact = c("ascites")) ##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) ##' ##' ## See the categorical part only using $ operator ##' tableOne$CatTable -##' summary(tableOne$CatTable) ##' ##' ## See the continuous part only using $ operator ##' tableOne$ContTable -##' summary(tableOne$ContTable) -##' -##' ## If your work flow includes copying to Excel and Word when writing manuscripts, -##' ## you may benefit from the quote argument. This will quote everything so that -##' ## Excel does not mess up the cells. -##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), -##' exact = c("status","stage"), quote = TRUE) ##' ##' @export CreateTableOne <- diff --git a/R/ShowRegTable.R b/R/ShowRegTable.R index 96b4040..bb49457 100644 --- a/R/ShowRegTable.R +++ b/R/ShowRegTable.R @@ -1,15 +1,23 @@ ##' Format regression results in medically decent format ##' -##' It shows the regression result in the HR [95\% CI] p-value format, which is usually the form used in medical research papers. +##' It shows the regression result in the HR [95\% CI] p-value format, which is +##' usually the form used in medical research papers. ##' ##' -##' @param model Regression model result objects that have the summary and confint methods. -##' @param exp TRUE by default. You need to specify exp = FALSE if your model is has the indentity link function (linear regression, etc). +##' @param model Regression model result objects that have the summary and +##' confint methods. +##' @param exp TRUE by default. You need to specify exp = FALSE if your model +##' is has the indentity link function (linear regression, etc). ##' @param digits Number of digits to print for the main part. ##' @param pDigits Number of digits to print for the p-values. -##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @return A matrix containing what you see is returned invisibly. You can capture it by assignment to an object. +##' @param quote Whether to show everything in quotes. The default is FALSE. If +##' TRUE, everything including the row and column names are quoted so that you +##' can copy it to Excel easily. +##' @return A matrix containing what you see is returned invisibly. You can +##' capture it by assignment to an object. ##' @author Kazuki Yoshida +##' @seealso CreateContTable, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}}, +##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}} ##' @examples ##' ##' ## Load @@ -22,8 +30,10 @@ ##' head(pbc) ##' ##' ## Fit a Cox regression model -##' objCoxph <- coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, -##' data = pbc) +##' objCoxph <- +##' coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, +##' data = pbc, +##' ties = c("efron","breslow","exact")[1]) ##' ##' ## Show the simple table ##' ShowRegTable(objCoxph) diff --git a/R/modules.R b/R/modules.R index cef999d..906d9b1 100644 --- a/R/modules.R +++ b/R/modules.R @@ -15,10 +15,10 @@ ModuleStopIfNotDataFrame <- function(data) { } ## Extract variables that exist in the data frame ModuleReturnVarsExist <- function(vars, data) { - + ## Check if variables exist. Drop them if not. varsNotInData <- setdiff(vars, names(data)) - + if (length(varsNotInData) > 0) { warning("The data frame does not have ", paste0(varsNotInData, sep = " "), " Dropped") @@ -43,7 +43,7 @@ ModuleReturnFalseIfNoStrata <- function(strata, test) { # Give strata variable n } ## Check statra variables and conditionally create ModuleReturnStrata <- function(strata, data, dat) { # Give strata variable names - + if(missing(strata)) { ## If there is no strata, give "Overall" to every subject strata <- rep("Overall", dim(dat)[1]) # Check if dim(dat)[[1]] is correct. @@ -105,7 +105,7 @@ ModuleCreateStrataVarName <- function(obj) { ## Try catch function # Taken from demo(error.catching) ## Used to define non-failing functions, that return NA when there is an error -tryCatch.W.E <- function(expr) { +tryCatch.W.E <- function(expr) { W <- NULL w.handler <- function(w) { # warning handler W <<- w @@ -142,43 +142,7 @@ sasKurtosis <- function(x) { ### Modules intented for the print methods ################################################################################ -## Define a function to format a normal variable -ModuleConvertNormal <- function(rowMat, digits) { - - ## Format for SD - fmt <- paste0(" (%.", digits,"f",")") - - ## Create a DF with numeric mean column and character (SD) column - data.frame(col1 = rowMat[,"mean"], - col2 = sprintf(fmt = fmt, rowMat[,"sd"]), - stringsAsFactors = FALSE) -} - -## Define a function to format a nonnormal variable -ModuleConvertNonNormal <- function(rowMat, digits, minMax = FALSE) { - - ## Format for [p25, p75] - fmt <- paste0(" [%.", digits,"f, %.",digits,"f]") - - if (minMax == FALSE) { - ## Create a DF with numeric median column and character [p25, p75] column - out <- data.frame(col1 = rowMat[,"median"], - col2 = sprintf(fmt = fmt, rowMat[,"p25"], rowMat[,"p75"]), - stringsAsFactors = FALSE) - } else if (minMax == TRUE) { - ## Create a DF with numeric median column and character [p25, p75] column - out <- data.frame(col1 = rowMat[,"median"], - col2 = sprintf(fmt = fmt, rowMat[,"min"], rowMat[,"max"]), - stringsAsFactors = FALSE) - } else { - stop("minMax must be a logical vector of one: FALSE or TRUE") - } - - return(out) -} - -### Modules by both print and summary methods -## ModuleQuoteAndPrintMat() +### ModuleQuoteAndPrintMat() ## Takes an matrix object format, print, and (invisibly) return it ## Requires quote and printToggle argument in the printToggle method ModuleQuoteAndPrintMat <- function(matObj, quote = FALSE, printToggle = TRUE) { @@ -197,10 +161,10 @@ ModuleQuoteAndPrintMat <- function(matObj, quote = FALSE, printToggle = TRUE) { ## print if required and return if (printToggle) { - + print(matObj, quote = quote) return(matObj) - + } else if (!printToggle) { return(matObj) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 84dab65..04cac71 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -1,6 +1,8 @@ ##' Format and print the \code{CatTable} class objects ##' -##' This is the \code{print} method for the \code{CatTable} class objects created by \code{\link{CreateCatTable}} function. +##' This is the print method for the CatTable class objects created by +##' CreateCatTable function. +##' ##' ##' @param x The result of a call to the \code{\link{CreateCatTable}} function. ##' @param missing Whether to show missing data information (not implemented @@ -77,7 +79,7 @@ ##' ## (3 by default). It does <0.001 for you. ##' catTableBySexTrt ##' -##' ## The exact argument toggles the p-values to the exact test result from +##' ## The exact argument will toggle the p-values to the example test result from ##' ## fisher.test. It will show which ones are from exact tests. ##' print(catTableBySexTrt, exact = "ascites") ##' diff --git a/R/print.ContTable.R b/R/print.ContTable.R index d2fe7ad..146aedc 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -1,19 +1,30 @@ ##' Format and print the \code{ContTable} class objects ##' -##' This is the \code{print} method for the \code{ContTable} class objects created by \code{\link{CreateContTable}} function. +##' This is the print method for the ContTable class objects created by +##' CreateContTable function. +##' ##' ##' @param x The result of a call to the \code{\link{CreateContTable}} function. -##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param missing Whether to show missing data information (not implemented +##' yet, placeholder) ##' @param digits Number of digits to print in the table. -##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). -##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. -##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. +##' @param nonnormal A character vector to specify the variables for which the +##' p-values should be those of nonparametric tests. By default all p-values +##' are from normal assumption-based tests (oneway.test). +##' @param quote Whether to show everything in quotes. The default is FALSE. If +##' TRUE, everything including the row and column names are quoted so that you +##' can copy it to Excel easily. +##' @param test Whether to show the p-values. TRUE by default. If FALSE, only +##' the numerical summaries are shown. ##' @param pDigits Number of digits to print for p-values. -##' @param explain Whether to add explanation to the variable names, i.e., (mean (sd) or median [IQR]) is added to the variable names. -##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. +##' @param explain Whether to add explanation to the variable names, i.e., +##' (mean (sd) or median [IQR]) is added to the variable names. +##' @param printToggle Whether to print the output. If FLASE, no output is +##' created, and a matrix is invisibly returned. ##' @param ... For compatibility with generic. Ignored. -##' @return It is mainly for printing the result. But this function does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. +##' @return It is mainly for printing the result. But this function does return +##' a matrix containing what you see in the output invisibly. You can assign it +##' to an object to save it. ##' @author Kazuki Yoshida ##' @seealso ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, @@ -58,13 +69,10 @@ ##' ## by the pDigits argument (3 by default). It does <0.001 for you. ##' contTableBySexTrt ##' -##' ## The nonnormal argument toggles the p-values to the nonparametric result from +##' ## The nonnormal argument will toggle the p-values to the nonparametric result from ##' ## kruskal.test (wilcox.test equivalent for the two group case). ##' print(contTableBySexTrt, nonnormal = nonNormalVars) ##' -##' ## The minMax argument toggles whether to show median [range] -##' print(contTableBySexTrt, nonnormal = nonNormalVars, minMax = TRUE) -##' ##' ## summary now includes both types of p-values ##' summary(contTableBySexTrt) ##' @@ -75,7 +83,7 @@ ##' ##' @export print.ContTable <- function(x, missing = FALSE, - digits = 2, nonnormal = NULL, minMax = FALSE, quote = FALSE, + digits = 2, nonnormal = NULL, quote = FALSE, test = TRUE, pDigits = 3, explain = TRUE, printToggle = TRUE, @@ -154,17 +162,30 @@ print.ContTable <- function(x, missing = FALSE, ### Conversion of data for printing - ## Define the nonnormal formatter depending on the minMax status + ## These may want to be moved to separate files later. + ## Define a function to print a normal variable ConvertNormal <- function(rowMat) { - ## Take minMax value from outside (NOT A STANDALONE FUNCTION!!) - ModuleConvertNormal(rowMat, digits) + + ## Format for SD + fmt <- paste0(" (%.", digits,"f",")") + + ## Create a DF with numeric mean column and character (SD) column + data.frame(col1 = rowMat[,"mean"], + col2 = sprintf(fmt = fmt, rowMat[,"sd"]), + stringsAsFactors = FALSE) } - ## Define the nonnormal formatter depending on the minMax status + ## Define a function to print a nonnormal variable ConvertNonNormal <- function(rowMat) { - ## Take minMax value from outside (NOT A STANDALONE FUNCTION!!) - ModuleConvertNonNormal(rowMat, digits, minMax = minMax) + ## Format for [p25, p75] + fmt <- paste0(" [%.", digits,"f, %.",digits,"f]") + + ## Create a DF with numeric median column and character [p25, p75] column + data.frame(col1 = rowMat[,"median"], + col2 = sprintf(fmt = fmt, rowMat[,"p25"], rowMat[,"p75"]), + stringsAsFactors = FALSE) } + ## Create a list of these two functions listOfFunctions <- list(normal = ConvertNormal, nonnormal = ConvertNonNormal) @@ -303,16 +324,9 @@ print.ContTable <- function(x, missing = FALSE, } - ## Add mean (sd) or median [IQR]/median [range] explanation if requested + ## Add mean (sd) or median [IQR] explanation if requested if (explain) { - - ## Create a vector of explanations to be pasted - if (minMax == FALSE) { - what <- c(" (mean (sd))"," (median [IQR])")[nonnormal] - } else if (minMax == TRUE) { - what <- c(" (mean (sd))"," (median [range])")[nonnormal] - } - ## Paste to the rownames + what <- c(" (mean (sd))"," (median [IQR])")[nonnormal] rownames(out) <- paste0(rownames(out), what) } diff --git a/R/print.TableOne.R b/R/print.TableOne.R index 2f43263..29b0bde 100644 --- a/R/print.TableOne.R +++ b/R/print.TableOne.R @@ -1,22 +1,37 @@ ##' Format and print the \code{TableOne} class objects ##' -##' This is the \code{print} method for the \code{TableOne} class objects created by \code{\link{CreateTableOne}} function. +##' This is the print method for the TableOne class objects created by +##' CreateTableOne function. +##' ##' ##' @param x The result of a call to the \code{\link{CreateTableOne}} function. -##' @param missing Whether to show missing data information (not implemented yet, placeholder) -##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. +##' @param missing Whether to show missing data information (not implemented +##' yet, placeholder) +##' @param quote Whether to show everything in quotes. The default is FALSE. If +##' TRUE, everything including the row and column names are quoted so that you +##' can copy it to Excel easily. +##' @param test Whether to show the p-values. TRUE by default. If FALSE, only +##' the numerical summaries are shown. ##' @param catDigits Number of digits to print for proportions. Default 1. ##' @param contDigits Number of digits to print for continuous variables. Default 2. ##' @param pDigits Number of digits to print for p-values. Default 3. -##' @param format The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). -##' @param exact A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test). -##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). -##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. -##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. -##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. +##' @param format The default is "fp" frequency (percentage). You can also +##' choose from "f" frequency only, "p" percentage only, and "pf" percentage +##' (frequency). +##' @param exact A character vector to specify the variables for which the +##' p-values should be those of exact tests. By default all p-values are from +##' large sample approximation tests (chisq.test). +##' @param nonnormal A character vector to specify the variables for which the +##' p-values should be those of nonparametric tests. By default all p-values +##' are from normal assumption-based tests (oneway.test). +##' @param explain Whether to add explanation to the variable names, i.e., (\%) +##' is added to the variable names when percentage is shown. +##' @param printToggle Whether to print the output. If FLASE, no output is +##' created, and a matrix is invisibly returned. ##' @param ... For compatibility with generic. Ignored. -##' @return It is mainly for printing the result. But this function does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. +##' @return It is mainly for printing the result. But this function does return +##' a matrix containing what you see in the output invisibly. You can assign it +##' to an object to save it. ##' @author Kazuki Yoshida, Justin Bohn ##' @seealso ##' \code{\link{CreateTableOne}}, \code{\link{print.TableOne}}, \code{\link{summary.TableOne}}, @@ -50,19 +65,10 @@ ##' ## Specifying nonnormal variables will show the variables appropriately, ##' ## and show nonparametric test p-values. Specify variables in the exact ##' ## argument to obtain the exact test p-values. -##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), -##' exact = c("status","stage")) +##' print(tableOne, nonnormal = c("time"), exact = c("ascites")) ##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) -##' -##' ## See the categorical part only using $ operator -##' tableOne$CatTable -##' summary(tableOne$CatTable) -##' -##' ## See the continuous part only using $ operator -##' tableOne$ContTable -##' summary(tableOne$ContTable) ##' ##' @export print.TableOne <- function(x, missing = FALSE, @@ -75,7 +81,6 @@ print.TableOne <- function(x, missing = FALSE, ## Continuous options nonnormal = NULL, - minMax = FALSE, ## Common options explain = TRUE, @@ -97,10 +102,9 @@ print.TableOne <- function(x, missing = FALSE, print(TableOne[[i]], printToggle = FALSE, test = test, explain = explain, digits = digits[i], ## print.CatTable arguments - format = format, exact = exact, - showAllLevels = FALSE, # must be FALSE to get same column counts + format = format, exact = exact, showAllLevels = FALSE, ## print.ContTable argument - nonnormal = nonnormal, minMax = minMax + nonnormal = nonnormal ) # Method dispatch at work }, simplify = FALSE) diff --git a/R/summary.CatTable.R b/R/summary.CatTable.R index 0c1e1d2..f9174c6 100644 --- a/R/summary.CatTable.R +++ b/R/summary.CatTable.R @@ -1,6 +1,10 @@ ##' Shows all results in a \code{CatTable} class object ##' -##' This method shows all the data a \code{CatTable} class object has. This includes the (optionally stratified) part with summary statistics and , if available, p-values from the approximation method test (\code{\link{chisq.test}} by default) and exact method test (\code{\link{fisher.test}} by default). +##' This method shows all the data a CatTable class object has. This includes +##' the (optionally stratified) part with summary statistics and p-values from +##' the approximation method test (chisq.test by default) and exact method test +##' (fisher.test by default). +##' ##' ##' @param object An object that has the \code{CatTable} class to be shown. ##' @param digits Number of digits to print. diff --git a/R/summary.ContTable.R b/R/summary.ContTable.R index 869e0aa..a464115 100644 --- a/R/summary.ContTable.R +++ b/R/summary.ContTable.R @@ -1,6 +1,10 @@ ##' Shows all results in a \code{ContTable} class object ##' -##' This method shows all the data a \code{ContTable} class object has. This includes the (optionally stratified) part with summary statistics and , if available, p-values from the normal assupmtion-based test (\code{\link{oneway.test}} by default) and nonparametric test (\code{\link{kruskal.test}} by default). +##' This method shows all the data a \code{ContTable} class object has. This includes +##' the (optionally stratified) part with summary statistics and p-values from +##' the normal assupmtion-based test (oneway.test by default) and nonparametric +##' test (kruskal.test by default). +##' ##' ##' @param object An object that has the \code{ContTable} class to be shown. ##' @param digits Number of digits to print. diff --git a/R/summary.TableOne.R b/R/summary.TableOne.R index 6dcfed6..fddf893 100644 --- a/R/summary.TableOne.R +++ b/R/summary.TableOne.R @@ -40,6 +40,11 @@ ##' ## Just typing the object name will invoke the print.TableOne method ##' tableOne ##' +##' ## Specifying nonnormal variables will show the variables appropriately, +##' ## and show nonparametric test p-values. Specify variables in the exact +##' ## argument to obtain the exact test p-values. +##' print(tableOne, nonnormal = c("time"), exact = c("ascites")) +##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) ##' diff --git a/R/tableone-package.R b/R/tableone-package.R index 176ebb9..a3b6d7c 100644 --- a/R/tableone-package.R +++ b/R/tableone-package.R @@ -11,8 +11,6 @@ ##' ##' Hadley Wickham for packaging advice and for creating tools this package was made with (roxygen2, devtools, testthat). ##' -##' Members of Facebook Organization of R Users for Medical Statistics in Japan (FORUMS-J) for testing pre-release versions. -##' ##' Developmental repository is on github. Your contributions are appreciated. ##' ##' https://github.com/kaz-yos/tableone @@ -56,24 +54,15 @@ ##' ## Specifying nonnormal variables will show the variables appropriately, ##' ## and show nonparametric test p-values. Specify variables in the exact ##' ## argument to obtain the exact test p-values. -##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), -##' exact = c("status","stage")) +##' print(tableOne, nonnormal = c("time"), exact = c("ascites")) ##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) -##' +##' ##' ## See the categorical part only using $ operator ##' tableOne$CatTable -##' summary(tableOne$CatTable) ##' ##' ## See the continuous part only using $ operator ##' tableOne$ContTable -##' summary(tableOne$ContTable) -##' -##' ## If your work flow includes copying to Excel and Word when writing manuscripts, -##' ## you may benefit from the quote argument. This will quote everything so that -##' ## Excel does not mess up the cells. -##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), -##' exact = c("status","stage"), quote = TRUE) ##' NULL diff --git a/man/CreateCatTable.Rd b/man/CreateCatTable.Rd index 04b8791..6b63843 100644 --- a/man/CreateCatTable.Rd +++ b/man/CreateCatTable.Rd @@ -113,7 +113,7 @@ summary(catTableBySexTrt) print(catTableBySexTrt, exact = "ascites", quote = TRUE) } \author{ -Kazuki Yoshida (based on \code{Deducer::frequencies()}) +Kazuki Yoshida } \seealso{ \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, diff --git a/man/CreateContTable.Rd b/man/CreateContTable.Rd index ef0fcdd..7cc4abc 100644 --- a/man/CreateContTable.Rd +++ b/man/CreateContTable.Rd @@ -26,11 +26,10 @@ CreateContTable(vars, strata, data, func.names = c("n", "miss", "mean", "sd", (same definition as in SAS). All of them can be seen in the summary method output. The print method uses subset of these. You can choose subset of them or reorder them. - They are all configure to omit NA values (\code{na.rm = - TRUE}).} + They are all configure to omit NA values (na.rm = TRUE).} \item{func.additional}{Additional functions can be given - as a named list. For example, \code{list(sum = sum)}.} + as a named list. For example, list(sum = sum).} \item{test}{If TRUE, as in the default and there are more than two groups, groupwise comparisons are performed. @@ -51,7 +50,7 @@ CreateContTable(vars, strata, data, func.names = c("n", "miss", "mean", "sd", \item{testNonNormal}{A function used to perform the nonparametric tests. The default is \code{kruskal.test} - (Kruskal-Wallis rank sum test). This is equivalent of the + (Kruskal-Wallis Rank Sum Test). This is equivalent of the wilcox.test (Man-Whitney U test) when there are only two groups.} @@ -88,6 +87,7 @@ head(pbc) ## Create an overall table for continuous variables contVars <- c("time","age","bili","chol","albumin","copper", "alk.phos","ast","trig","platelet","protime") + contTableOverall <- CreateContTable(vars = contVars, data = pbc) ## Simply typing the object name will invoke the print.ContTable method, @@ -126,8 +126,7 @@ summary(contTableBySexTrt) print(contTableBySexTrt, nonnormal = nonNormalVars, quote = TRUE) } \author{ -Kazuki Yoshida (based on -\code{Deducer::descriptive.table()}) +Kazuki Yoshida } \seealso{ \code{\link{CreateContTable}}, diff --git a/man/CreateTableOne.Rd b/man/CreateTableOne.Rd index bc8e095..53996da 100644 --- a/man/CreateTableOne.Rd +++ b/man/CreateTableOne.Rd @@ -70,25 +70,21 @@ CreateTableOne(vars, strata, data, test = TRUE, testApprox = chisq.test, } \value{ An object of class \code{TableOne}, which really is a list -of three objects. - -\item{TableOne}{a categorical-continuous mixture data -formatted and printed by the \code{\link{print.TableOne}} -method} - -\item{ContTable}{an object of class \code{ContTable}, -containing continuous variables only} - -\item{CatTable}{ an object of class \code{CatTable}, -containing categorical variables only} - -The second and third objects can be then be examined with -the \code{print} and \code{summary} method, for example, +of three objects. The first object named +\code{object$TableOne} is the categorical-continuous +mixture table formatted and printed by the +\code{\link{print.TableOne}} method. The second object +named \code{object$ContTable} is the table object +containing continuous variables only. The third object +named \code{object$CatTable} is the table object containing +categorical variables only. The second and third objects +can be then be examined with the \code{print} and +\code{summary} method, for example, \code{summary(object$CatTable)} to examine the categorical variables in detail. } \description{ -Create an object summarizing all baseline variables +Create an object summarizing categorical variables optionally stratifying by one or more startifying variables and performing statistical tests. The object gives a table that is easy to use in medical research papers. See also @@ -125,25 +121,16 @@ tableOne ## Specifying nonnormal variables will show the variables appropriately, ## and show nonparametric test p-values. Specify variables in the exact ## argument to obtain the exact test p-values. -print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), - exact = c("status","stage")) +print(tableOne, nonnormal = c("time"), exact = c("ascites")) ## Use the summary.TableOne method for detailed summary summary(tableOne) ## See the categorical part only using $ operator tableOne$CatTable -summary(tableOne$CatTable) ## See the continuous part only using $ operator tableOne$ContTable -summary(tableOne$ContTable) - -## If your work flow includes copying to Excel and Word when writing manuscripts, -## you may benefit from the quote argument. This will quote everything so that -## Excel does not mess up the cells. -print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), - exact = c("status","stage"), quote = TRUE) } \author{ Justin Bohn, Kazuki Yoshida diff --git a/man/ShowRegTable.Rd b/man/ShowRegTable.Rd index e6b32b5..37ac8c7 100644 --- a/man/ShowRegTable.Rd +++ b/man/ShowRegTable.Rd @@ -43,8 +43,10 @@ data(pbc) head(pbc) ## Fit a Cox regression model -objCoxph <- coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, - data = pbc) +objCoxph <- + coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, + data = pbc, + ties = c("efron","breslow","exact")[1]) ## Show the simple table ShowRegTable(objCoxph) @@ -55,4 +57,10 @@ ShowRegTable(objCoxph, quote = TRUE) \author{ Kazuki Yoshida } +\seealso{ +CreateContTable, \code{\link{print.ContTable}}, +\code{\link{summary.ContTable}}, +\code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, +\code{\link{summary.CatTable}} +} diff --git a/man/print.CatTable.Rd b/man/print.CatTable.Rd index 37da460..4168660 100644 --- a/man/print.CatTable.Rd +++ b/man/print.CatTable.Rd @@ -60,9 +60,8 @@ does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. } \description{ -This is the \code{print} method for the \code{CatTable} -class objects created by \code{\link{CreateCatTable}} -function. +This is the print method for the CatTable class objects +created by CreateCatTable function. } \examples{ ## Load diff --git a/man/print.ContTable.Rd b/man/print.ContTable.Rd index 070cab4..505a725 100644 --- a/man/print.ContTable.Rd +++ b/man/print.ContTable.Rd @@ -47,9 +47,8 @@ does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. } \description{ -This is the \code{print} method for the \code{ContTable} -class objects created by \code{\link{CreateContTable}} -function. +This is the print method for the ContTable class objects +created by CreateContTable function. } \examples{ ## Load diff --git a/man/print.TableOne.Rd b/man/print.TableOne.Rd index 93ad684..3c763a4 100644 --- a/man/print.TableOne.Rd +++ b/man/print.TableOne.Rd @@ -62,9 +62,8 @@ does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. } \description{ -This is the \code{print} method for the \code{TableOne} -class objects created by \code{\link{CreateTableOne}} -function. +This is the print method for the TableOne class objects +created by CreateTableOne function. } \examples{ ## Load @@ -93,19 +92,10 @@ tableOne ## Specifying nonnormal variables will show the variables appropriately, ## and show nonparametric test p-values. Specify variables in the exact ## argument to obtain the exact test p-values. -print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), - exact = c("status","stage")) +print(tableOne, nonnormal = c("time"), exact = c("ascites")) ## Use the summary.TableOne method for detailed summary summary(tableOne) - -## See the categorical part only using $ operator -tableOne$CatTable -summary(tableOne$CatTable) - -## See the continuous part only using $ operator -tableOne$ContTable -summary(tableOne$ContTable) } \author{ Kazuki Yoshida, Justin Bohn diff --git a/man/summary.CatTable.Rd b/man/summary.CatTable.Rd index 5fd33c7..c01ba16 100644 --- a/man/summary.CatTable.Rd +++ b/man/summary.CatTable.Rd @@ -16,11 +16,10 @@ It will print the results. } \description{ -This method shows all the data a \code{CatTable} class -object has. This includes the (optionally stratified) part -with summary statistics and , if available, p-values from -the approximation method test (\code{\link{chisq.test}} by -default) and exact method test (\code{\link{fisher.test}} +This method shows all the data a CatTable class object has. +This includes the (optionally stratified) part with summary +statistics and p-values from the approximation method test +(chisq.test by default) and exact method test (fisher.test by default). } \examples{ diff --git a/man/summary.ContTable.Rd b/man/summary.ContTable.Rd index c0cdfa2..75a6710 100644 --- a/man/summary.ContTable.Rd +++ b/man/summary.ContTable.Rd @@ -18,10 +18,9 @@ It will print the results. \description{ This method shows all the data a \code{ContTable} class object has. This includes the (optionally stratified) part -with summary statistics and , if available, p-values from -the normal assupmtion-based test (\code{\link{oneway.test}} -by default) and nonparametric test -(\code{\link{kruskal.test}} by default). +with summary statistics and p-values from the normal +assupmtion-based test (oneway.test by default) and +nonparametric test (kruskal.test by default). } \examples{ ## Load diff --git a/man/summary.TableOne.Rd b/man/summary.TableOne.Rd index bba3168..2407d2f 100644 --- a/man/summary.TableOne.Rd +++ b/man/summary.TableOne.Rd @@ -46,6 +46,11 @@ tableOne <- CreateTableOne(vars = c("time","status","age","ascites","hepato", ## Just typing the object name will invoke the print.TableOne method tableOne +## Specifying nonnormal variables will show the variables appropriately, +## and show nonparametric test p-values. Specify variables in the exact +## argument to obtain the exact test p-values. +print(tableOne, nonnormal = c("time"), exact = c("ascites")) + ## Use the summary.TableOne method for detailed summary summary(tableOne) } diff --git a/man/tableone-package.Rd b/man/tableone-package.Rd index eea76e5..fa50250 100644 --- a/man/tableone-package.Rd +++ b/man/tableone-package.Rd @@ -23,10 +23,6 @@ package is based on. Hadley Wickham for packaging advice and for creating tools this package was made with (roxygen2, devtools, testthat). -Members of Facebook Organization of R Users for Medical -Statistics in Japan (FORUMS-J) for testing pre-release -versions. - Developmental repository is on github. Your contributions are appreciated. @@ -62,25 +58,16 @@ tableOne ## Specifying nonnormal variables will show the variables appropriately, ## and show nonparametric test p-values. Specify variables in the exact ## argument to obtain the exact test p-values. -print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), - exact = c("status","stage")) +print(tableOne, nonnormal = c("time"), exact = c("ascites")) ## Use the summary.TableOne method for detailed summary summary(tableOne) ## See the categorical part only using $ operator tableOne$CatTable -summary(tableOne$CatTable) ## See the continuous part only using $ operator tableOne$ContTable -summary(tableOne$ContTable) - -## If your work flow includes copying to Excel and Word when writing manuscripts, -## you may benefit from the quote argument. This will quote everything so that -## Excel does not mess up the cells. -print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), - exact = c("status","stage"), quote = TRUE) } \author{ Kazuki Yoshida, Justin Bohn From 112fd6ab1f2ca9d8642c5308a2298f6c202ee205 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 01:41:41 -0500 Subject: [PATCH 06/31] Correct status of develop (recovering from SugarSync revert) --- DESCRIPTION | 12 +++---- NEWS | 24 +++++++------- R/CreateCatTable.R | 4 +-- R/CreateContTable.R | 12 +++---- R/CreateTableOne.R | 26 +++++++++++---- R/ShowRegTable.R | 24 ++++---------- R/modules.R | 50 ++++++++++++++++++++++++---- R/print.CatTable.R | 6 ++-- R/print.ContTable.R | 72 ++++++++++++++++------------------------ R/print.TableOne.R | 54 ++++++++++++++---------------- R/summary.CatTable.R | 6 +--- R/summary.ContTable.R | 6 +--- R/summary.TableOne.R | 5 --- R/tableone-package.R | 15 +++++++-- man/CreateCatTable.Rd | 2 +- man/CreateContTable.Rd | 11 +++--- man/CreateTableOne.Rd | 37 ++++++++++++++------- man/ShowRegTable.Rd | 12 ++----- man/print.CatTable.Rd | 5 +-- man/print.ContTable.Rd | 5 +-- man/print.TableOne.Rd | 16 +++++++-- man/summary.CatTable.Rd | 9 ++--- man/summary.ContTable.Rd | 7 ++-- man/summary.TableOne.Rd | 5 --- man/tableone-package.Rd | 15 ++++++++- 25 files changed, 241 insertions(+), 199 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 77904e9..9797ef6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,17 @@ Package: tableone Type: Package Title: Create "Table 1" to describe baseline characteristics -Version: 0.2.0 -Date: 2014-02-13 +Version: 0.2.1 +Date: 2014-02-15 Author: Kazuki Yoshida, Justin Bohn Maintainer: Kazuki Yoshida Description: This package creates "Table 1", i.e., description of baseline patient characteristics, which is essential every medical research. This package provides functions to create such summaries for continuous and - categorical variables, optionally with subgroups and groupwise comparison. - The package was insipired by and based on descriptive statistics functions - in Deducer, a Java-based GUI package by Ian Fellows. This package does not - require GUI or Java, and intended for CUI users. + categorical variables, optionally with subgroups comparisons. The package + was insipired by and based on descriptive statistics functions in Deducer, + a Java-based GUI package by Ian Fellows. This package does not require GUI + or Java, and intended for CUI users. License: GPL-2 Depends: e1071, diff --git a/NEWS b/NEWS index dacfcd9..284c9ff 100644 --- a/NEWS +++ b/NEWS @@ -1,27 +1,27 @@ -tableone 0.2.0 +tableone 0.2.1 (2014-02-15) ---------------------------------------------------------------- -NEW FEATURES - -* CreateTableOne and related print/summary methods were added. +BUG FIXES -* CreateTableOne can crate a table with both categorical and - continuous variables. +* Documentations were fixed to represent the current version. -tableone 0.1.3 +tableone 0.2.0 (2014-02-14) ---------------------------------------------------------------- NEW FEATURES -* The print method can suppress printing by printToggle option. +* CreateTableOne and related print/summary methods were added. -* (CreateTableOne and related print and summary methods are introduced). +* CreateTableOne can crate a table with both categorical and + continuous variables. + +* The print method can suppress printing by printToggle option. BUG FIXES * In the Roxygen part of the code, only at_export followed by nothing is used. -tableone 0.1.2 +tableone 0.1.2 (2014-02-09) ---------------------------------------------------------------- BUG FIXES @@ -33,7 +33,7 @@ BUG FIXES * Passed all the default tests by R CMD check file.tar.gz -tableone 0.1.1 +tableone 0.1.1 (2014-02-09) ---------------------------------------------------------------- BUG FIXES @@ -42,7 +42,7 @@ BUG FIXES * Documents are now included in -tableone 0.1.0 +tableone 0.1.0 (2014-02-08) ---------------------------------------------------------------- FIRST DEVELOPMENTAL VERSION diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index 1904018..a082d76 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -26,7 +26,7 @@ ##' @return An object of class \code{CatTable}, which really is a \code{\link{by}} object with ##' additional attributes. Each element of the \code{\link{by}} part is a matrix with rows ##' representing variables, and columns representing summary statistics. -##' @author Kazuki Yoshida +##' @author Kazuki Yoshida (based on \code{Deducer::frequencies()}) ##' @seealso ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, ##' \code{\link{CreateContTable}}, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}}, @@ -72,7 +72,7 @@ ##' ## (3 by default). It does <0.001 for you. ##' catTableBySexTrt ##' -##' ## The exact argument will toggle the p-values to the example test result from +##' ## The exact argument toggles the p-values to the exact test result from ##' ## fisher.test. It will show which ones are from exact tests. ##' print(catTableBySexTrt, exact = "ascites") ##' diff --git a/R/CreateContTable.R b/R/CreateContTable.R index 008f0b1..d9d81a7 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -16,9 +16,8 @@ ##' kurtosis (same definition as in SAS). All of them can be seen in the ##' summary method output. The print method uses subset of these. You can ##' choose subset of them or reorder them. They are all configure to omit NA -##' values (na.rm = TRUE). -##' @param func.additional Additional functions can be given as a named list. -##' For example, list(sum = sum). +##' values (\code{na.rm = TRUE}). +##' @param func.additional Additional functions can be given as a named list. For example, \code{list(sum = sum)}. ##' @param test If TRUE, as in the default and there are more than two groups, ##' groupwise comparisons are performed. Both tests that assume normality and ##' tests that do not are performed. Either one of the result can be obtained @@ -27,14 +26,14 @@ ##' tests. The default is \code{\link{oneway.test}}. This is equivalent of the t-test when there are only two groups. ##' @param argsNormal A named list of arguments passed to the function specified in \code{testNormal}. The default is \code{list(var.equal = TRUE)}, which makes it the ordinary ANOVA that assumes equal variance across groups. ##' @param testNonNormal A function used to perform the nonparametric tests. -##' The default is \code{kruskal.test} (Kruskal-Wallis Rank Sum Test). This is +##' The default is \code{kruskal.test} (Kruskal-Wallis rank sum test). This is ##' equivalent of the wilcox.test (Man-Whitney U test) when there are only two ##' groups. ##' @param argsNonNormal A named list of arguments passed to the function specified in \code{testNonNormal}. The default is \code{list(NULL)}, which is just a placeholder. ##' @return An object of class \code{ContTable}, which really is a \code{\link{by}} object with ##' additional attributes. Each element of the \code{\link{by}} part is a matrix with rows ##' representing variables, and columns representing summary statistics. -##' @author Kazuki Yoshida +##' @author Kazuki Yoshida (based on \code{Deducer::descriptive.table()}) ##' @seealso ##' \code{\link{CreateContTable}}, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}}, ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, @@ -53,7 +52,6 @@ ##' ## Create an overall table for continuous variables ##' contVars <- c("time","age","bili","chol","albumin","copper", ##' "alk.phos","ast","trig","platelet","protime") -##' ##' contTableOverall <- CreateContTable(vars = contVars, data = pbc) ##' ##' ## Simply typing the object name will invoke the print.ContTable method, @@ -79,7 +77,7 @@ ##' ## by the pDigits argument (3 by default). It does <0.001 for you. ##' contTableBySexTrt ##' -##' ## The nonnormal argument will toggle the p-values to the nonparametric result from +##' ## The nonnormal argument toggles the p-values to the nonparametric result from ##' ## kruskal.test (wilcox.test equivalent for the two group case). ##' print(contTableBySexTrt, nonnormal = nonNormalVars) ##' diff --git a/R/CreateTableOne.R b/R/CreateTableOne.R index 845c3f9..771ee11 100644 --- a/R/CreateTableOne.R +++ b/R/CreateTableOne.R @@ -1,9 +1,7 @@ ##' Create an object summarizing both categorical and continuous variables ##' -##' Create an object summarizing categorical variables optionally stratifying -##' by one or more startifying variables and performing statistical tests. The -##' object gives a table that is easy to use in medical research papers. See also \code{\link{print.TableOne}} and \code{\link{summary.TableOne}}. -##' +##' Create an object summarizing all baseline variables optionally stratifying by one or more startifying variables and performing statistical tests. The object gives a table that is easy to use in medical research papers. See also \code{\link{print.TableOne}} and \code{\link{summary.TableOne}}. +##' ##' @param vars Variables to be summarized given as a character vector. Factors are ##' handled as categorical variables, whereas numeric variables are handled as continuous variables. ##' @param strata Stratifying (grouping) variable name(s) given as a character @@ -29,14 +27,19 @@ ##' memory limitation. In this situation, the large sample approximation based ##' should suffice. ##' @param argsExact A named list of arguments passed to the function specified in testExact. The default is \code{list(workspace = 2*10^5)}, which specifies the memory space allocated for \code{\link{fisher.test}}. -##' @return An object of class \code{TableOne}, which really is a list of three objects. The first object named \code{object$TableOne} is the categorical-continuous mixture table formatted and printed by the \code{\link{print.TableOne}} method. The second object named \code{object$ContTable} is the table object containing continuous variables only. The third object named \code{object$CatTable} is the table object containing categorical variables only. The second and third objects can be then be examined with the \code{print} and \code{summary} method, for example, \code{summary(object$CatTable)} to examine the categorical variables in detail. +##' @return An object of class \code{TableOne}, which really is a list of three objects. +##' @return \item{TableOne}{a categorical-continuous mixture data formatted and printed by the \code{\link{print.TableOne}} method} +##' @return \item{ContTable}{an object of class \code{ContTable}, containing continuous variables only} +##' @return \item{CatTable}{ an object of class \code{CatTable}, containing categorical variables only} +##' @return The second and third objects can be then be examined with the \code{print} and \code{summary} method, for example, \code{summary(object$CatTable)} to examine the categorical variables in detail. +##' ##' @author Justin Bohn, Kazuki Yoshida ##' @seealso ##' \code{\link{CreateTableOne}}, \code{\link{print.TableOne}}, \code{\link{summary.TableOne}}, ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, ##' \code{\link{CreateContTable}}, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}} ##' @examples -##' +##' ##' ## Load ##' library(tableone) ##' @@ -66,16 +69,25 @@ ##' ## Specifying nonnormal variables will show the variables appropriately, ##' ## and show nonparametric test p-values. Specify variables in the exact ##' ## argument to obtain the exact test p-values. -##' print(tableOne, nonnormal = c("time"), exact = c("ascites")) +##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), +##' exact = c("status","stage")) ##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) ##' ##' ## See the categorical part only using $ operator ##' tableOne$CatTable +##' summary(tableOne$CatTable) ##' ##' ## See the continuous part only using $ operator ##' tableOne$ContTable +##' summary(tableOne$ContTable) +##' +##' ## If your work flow includes copying to Excel and Word when writing manuscripts, +##' ## you may benefit from the quote argument. This will quote everything so that +##' ## Excel does not mess up the cells. +##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), +##' exact = c("status","stage"), quote = TRUE) ##' ##' @export CreateTableOne <- diff --git a/R/ShowRegTable.R b/R/ShowRegTable.R index bb49457..96b4040 100644 --- a/R/ShowRegTable.R +++ b/R/ShowRegTable.R @@ -1,23 +1,15 @@ ##' Format regression results in medically decent format ##' -##' It shows the regression result in the HR [95\% CI] p-value format, which is -##' usually the form used in medical research papers. +##' It shows the regression result in the HR [95\% CI] p-value format, which is usually the form used in medical research papers. ##' ##' -##' @param model Regression model result objects that have the summary and -##' confint methods. -##' @param exp TRUE by default. You need to specify exp = FALSE if your model -##' is has the indentity link function (linear regression, etc). +##' @param model Regression model result objects that have the summary and confint methods. +##' @param exp TRUE by default. You need to specify exp = FALSE if your model is has the indentity link function (linear regression, etc). ##' @param digits Number of digits to print for the main part. ##' @param pDigits Number of digits to print for the p-values. -##' @param quote Whether to show everything in quotes. The default is FALSE. If -##' TRUE, everything including the row and column names are quoted so that you -##' can copy it to Excel easily. -##' @return A matrix containing what you see is returned invisibly. You can -##' capture it by assignment to an object. +##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. +##' @return A matrix containing what you see is returned invisibly. You can capture it by assignment to an object. ##' @author Kazuki Yoshida -##' @seealso CreateContTable, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}}, -##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}} ##' @examples ##' ##' ## Load @@ -30,10 +22,8 @@ ##' head(pbc) ##' ##' ## Fit a Cox regression model -##' objCoxph <- -##' coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, -##' data = pbc, -##' ties = c("efron","breslow","exact")[1]) +##' objCoxph <- coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, +##' data = pbc) ##' ##' ## Show the simple table ##' ShowRegTable(objCoxph) diff --git a/R/modules.R b/R/modules.R index 906d9b1..cef999d 100644 --- a/R/modules.R +++ b/R/modules.R @@ -15,10 +15,10 @@ ModuleStopIfNotDataFrame <- function(data) { } ## Extract variables that exist in the data frame ModuleReturnVarsExist <- function(vars, data) { - + ## Check if variables exist. Drop them if not. varsNotInData <- setdiff(vars, names(data)) - + if (length(varsNotInData) > 0) { warning("The data frame does not have ", paste0(varsNotInData, sep = " "), " Dropped") @@ -43,7 +43,7 @@ ModuleReturnFalseIfNoStrata <- function(strata, test) { # Give strata variable n } ## Check statra variables and conditionally create ModuleReturnStrata <- function(strata, data, dat) { # Give strata variable names - + if(missing(strata)) { ## If there is no strata, give "Overall" to every subject strata <- rep("Overall", dim(dat)[1]) # Check if dim(dat)[[1]] is correct. @@ -105,7 +105,7 @@ ModuleCreateStrataVarName <- function(obj) { ## Try catch function # Taken from demo(error.catching) ## Used to define non-failing functions, that return NA when there is an error -tryCatch.W.E <- function(expr) { +tryCatch.W.E <- function(expr) { W <- NULL w.handler <- function(w) { # warning handler W <<- w @@ -142,7 +142,43 @@ sasKurtosis <- function(x) { ### Modules intented for the print methods ################################################################################ -### ModuleQuoteAndPrintMat() +## Define a function to format a normal variable +ModuleConvertNormal <- function(rowMat, digits) { + + ## Format for SD + fmt <- paste0(" (%.", digits,"f",")") + + ## Create a DF with numeric mean column and character (SD) column + data.frame(col1 = rowMat[,"mean"], + col2 = sprintf(fmt = fmt, rowMat[,"sd"]), + stringsAsFactors = FALSE) +} + +## Define a function to format a nonnormal variable +ModuleConvertNonNormal <- function(rowMat, digits, minMax = FALSE) { + + ## Format for [p25, p75] + fmt <- paste0(" [%.", digits,"f, %.",digits,"f]") + + if (minMax == FALSE) { + ## Create a DF with numeric median column and character [p25, p75] column + out <- data.frame(col1 = rowMat[,"median"], + col2 = sprintf(fmt = fmt, rowMat[,"p25"], rowMat[,"p75"]), + stringsAsFactors = FALSE) + } else if (minMax == TRUE) { + ## Create a DF with numeric median column and character [p25, p75] column + out <- data.frame(col1 = rowMat[,"median"], + col2 = sprintf(fmt = fmt, rowMat[,"min"], rowMat[,"max"]), + stringsAsFactors = FALSE) + } else { + stop("minMax must be a logical vector of one: FALSE or TRUE") + } + + return(out) +} + +### Modules by both print and summary methods +## ModuleQuoteAndPrintMat() ## Takes an matrix object format, print, and (invisibly) return it ## Requires quote and printToggle argument in the printToggle method ModuleQuoteAndPrintMat <- function(matObj, quote = FALSE, printToggle = TRUE) { @@ -161,10 +197,10 @@ ModuleQuoteAndPrintMat <- function(matObj, quote = FALSE, printToggle = TRUE) { ## print if required and return if (printToggle) { - + print(matObj, quote = quote) return(matObj) - + } else if (!printToggle) { return(matObj) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 04cac71..84dab65 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -1,8 +1,6 @@ ##' Format and print the \code{CatTable} class objects ##' -##' This is the print method for the CatTable class objects created by -##' CreateCatTable function. -##' +##' This is the \code{print} method for the \code{CatTable} class objects created by \code{\link{CreateCatTable}} function. ##' ##' @param x The result of a call to the \code{\link{CreateCatTable}} function. ##' @param missing Whether to show missing data information (not implemented @@ -79,7 +77,7 @@ ##' ## (3 by default). It does <0.001 for you. ##' catTableBySexTrt ##' -##' ## The exact argument will toggle the p-values to the example test result from +##' ## The exact argument toggles the p-values to the exact test result from ##' ## fisher.test. It will show which ones are from exact tests. ##' print(catTableBySexTrt, exact = "ascites") ##' diff --git a/R/print.ContTable.R b/R/print.ContTable.R index 146aedc..d2fe7ad 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -1,30 +1,19 @@ ##' Format and print the \code{ContTable} class objects ##' -##' This is the print method for the ContTable class objects created by -##' CreateContTable function. -##' +##' This is the \code{print} method for the \code{ContTable} class objects created by \code{\link{CreateContTable}} function. ##' ##' @param x The result of a call to the \code{\link{CreateContTable}} function. -##' @param missing Whether to show missing data information (not implemented -##' yet, placeholder) +##' @param missing Whether to show missing data information (not implemented yet, placeholder) ##' @param digits Number of digits to print in the table. -##' @param nonnormal A character vector to specify the variables for which the -##' p-values should be those of nonparametric tests. By default all p-values -##' are from normal assumption-based tests (oneway.test). -##' @param quote Whether to show everything in quotes. The default is FALSE. If -##' TRUE, everything including the row and column names are quoted so that you -##' can copy it to Excel easily. -##' @param test Whether to show the p-values. TRUE by default. If FALSE, only -##' the numerical summaries are shown. +##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). +##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. +##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. +##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param pDigits Number of digits to print for p-values. -##' @param explain Whether to add explanation to the variable names, i.e., -##' (mean (sd) or median [IQR]) is added to the variable names. -##' @param printToggle Whether to print the output. If FLASE, no output is -##' created, and a matrix is invisibly returned. +##' @param explain Whether to add explanation to the variable names, i.e., (mean (sd) or median [IQR]) is added to the variable names. +##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param ... For compatibility with generic. Ignored. -##' @return It is mainly for printing the result. But this function does return -##' a matrix containing what you see in the output invisibly. You can assign it -##' to an object to save it. +##' @return It is mainly for printing the result. But this function does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. ##' @author Kazuki Yoshida ##' @seealso ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, @@ -69,10 +58,13 @@ ##' ## by the pDigits argument (3 by default). It does <0.001 for you. ##' contTableBySexTrt ##' -##' ## The nonnormal argument will toggle the p-values to the nonparametric result from +##' ## The nonnormal argument toggles the p-values to the nonparametric result from ##' ## kruskal.test (wilcox.test equivalent for the two group case). ##' print(contTableBySexTrt, nonnormal = nonNormalVars) ##' +##' ## The minMax argument toggles whether to show median [range] +##' print(contTableBySexTrt, nonnormal = nonNormalVars, minMax = TRUE) +##' ##' ## summary now includes both types of p-values ##' summary(contTableBySexTrt) ##' @@ -83,7 +75,7 @@ ##' ##' @export print.ContTable <- function(x, missing = FALSE, - digits = 2, nonnormal = NULL, quote = FALSE, + digits = 2, nonnormal = NULL, minMax = FALSE, quote = FALSE, test = TRUE, pDigits = 3, explain = TRUE, printToggle = TRUE, @@ -162,30 +154,17 @@ print.ContTable <- function(x, missing = FALSE, ### Conversion of data for printing - ## These may want to be moved to separate files later. - ## Define a function to print a normal variable + ## Define the nonnormal formatter depending on the minMax status ConvertNormal <- function(rowMat) { - - ## Format for SD - fmt <- paste0(" (%.", digits,"f",")") - - ## Create a DF with numeric mean column and character (SD) column - data.frame(col1 = rowMat[,"mean"], - col2 = sprintf(fmt = fmt, rowMat[,"sd"]), - stringsAsFactors = FALSE) + ## Take minMax value from outside (NOT A STANDALONE FUNCTION!!) + ModuleConvertNormal(rowMat, digits) } - ## Define a function to print a nonnormal variable + ## Define the nonnormal formatter depending on the minMax status ConvertNonNormal <- function(rowMat) { - ## Format for [p25, p75] - fmt <- paste0(" [%.", digits,"f, %.",digits,"f]") - - ## Create a DF with numeric median column and character [p25, p75] column - data.frame(col1 = rowMat[,"median"], - col2 = sprintf(fmt = fmt, rowMat[,"p25"], rowMat[,"p75"]), - stringsAsFactors = FALSE) + ## Take minMax value from outside (NOT A STANDALONE FUNCTION!!) + ModuleConvertNonNormal(rowMat, digits, minMax = minMax) } - ## Create a list of these two functions listOfFunctions <- list(normal = ConvertNormal, nonnormal = ConvertNonNormal) @@ -324,9 +303,16 @@ print.ContTable <- function(x, missing = FALSE, } - ## Add mean (sd) or median [IQR] explanation if requested + ## Add mean (sd) or median [IQR]/median [range] explanation if requested if (explain) { - what <- c(" (mean (sd))"," (median [IQR])")[nonnormal] + + ## Create a vector of explanations to be pasted + if (minMax == FALSE) { + what <- c(" (mean (sd))"," (median [IQR])")[nonnormal] + } else if (minMax == TRUE) { + what <- c(" (mean (sd))"," (median [range])")[nonnormal] + } + ## Paste to the rownames rownames(out) <- paste0(rownames(out), what) } diff --git a/R/print.TableOne.R b/R/print.TableOne.R index 29b0bde..2f43263 100644 --- a/R/print.TableOne.R +++ b/R/print.TableOne.R @@ -1,37 +1,22 @@ ##' Format and print the \code{TableOne} class objects ##' -##' This is the print method for the TableOne class objects created by -##' CreateTableOne function. -##' +##' This is the \code{print} method for the \code{TableOne} class objects created by \code{\link{CreateTableOne}} function. ##' ##' @param x The result of a call to the \code{\link{CreateTableOne}} function. -##' @param missing Whether to show missing data information (not implemented -##' yet, placeholder) -##' @param quote Whether to show everything in quotes. The default is FALSE. If -##' TRUE, everything including the row and column names are quoted so that you -##' can copy it to Excel easily. -##' @param test Whether to show the p-values. TRUE by default. If FALSE, only -##' the numerical summaries are shown. +##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. +##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param catDigits Number of digits to print for proportions. Default 1. ##' @param contDigits Number of digits to print for continuous variables. Default 2. ##' @param pDigits Number of digits to print for p-values. Default 3. -##' @param format The default is "fp" frequency (percentage). You can also -##' choose from "f" frequency only, "p" percentage only, and "pf" percentage -##' (frequency). -##' @param exact A character vector to specify the variables for which the -##' p-values should be those of exact tests. By default all p-values are from -##' large sample approximation tests (chisq.test). -##' @param nonnormal A character vector to specify the variables for which the -##' p-values should be those of nonparametric tests. By default all p-values -##' are from normal assumption-based tests (oneway.test). -##' @param explain Whether to add explanation to the variable names, i.e., (\%) -##' is added to the variable names when percentage is shown. -##' @param printToggle Whether to print the output. If FLASE, no output is -##' created, and a matrix is invisibly returned. +##' @param format The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). +##' @param exact A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test). +##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). +##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. +##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. +##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param ... For compatibility with generic. Ignored. -##' @return It is mainly for printing the result. But this function does return -##' a matrix containing what you see in the output invisibly. You can assign it -##' to an object to save it. +##' @return It is mainly for printing the result. But this function does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. ##' @author Kazuki Yoshida, Justin Bohn ##' @seealso ##' \code{\link{CreateTableOne}}, \code{\link{print.TableOne}}, \code{\link{summary.TableOne}}, @@ -65,10 +50,19 @@ ##' ## Specifying nonnormal variables will show the variables appropriately, ##' ## and show nonparametric test p-values. Specify variables in the exact ##' ## argument to obtain the exact test p-values. -##' print(tableOne, nonnormal = c("time"), exact = c("ascites")) +##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), +##' exact = c("status","stage")) ##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) +##' +##' ## See the categorical part only using $ operator +##' tableOne$CatTable +##' summary(tableOne$CatTable) +##' +##' ## See the continuous part only using $ operator +##' tableOne$ContTable +##' summary(tableOne$ContTable) ##' ##' @export print.TableOne <- function(x, missing = FALSE, @@ -81,6 +75,7 @@ print.TableOne <- function(x, missing = FALSE, ## Continuous options nonnormal = NULL, + minMax = FALSE, ## Common options explain = TRUE, @@ -102,9 +97,10 @@ print.TableOne <- function(x, missing = FALSE, print(TableOne[[i]], printToggle = FALSE, test = test, explain = explain, digits = digits[i], ## print.CatTable arguments - format = format, exact = exact, showAllLevels = FALSE, + format = format, exact = exact, + showAllLevels = FALSE, # must be FALSE to get same column counts ## print.ContTable argument - nonnormal = nonnormal + nonnormal = nonnormal, minMax = minMax ) # Method dispatch at work }, simplify = FALSE) diff --git a/R/summary.CatTable.R b/R/summary.CatTable.R index f9174c6..0c1e1d2 100644 --- a/R/summary.CatTable.R +++ b/R/summary.CatTable.R @@ -1,10 +1,6 @@ ##' Shows all results in a \code{CatTable} class object ##' -##' This method shows all the data a CatTable class object has. This includes -##' the (optionally stratified) part with summary statistics and p-values from -##' the approximation method test (chisq.test by default) and exact method test -##' (fisher.test by default). -##' +##' This method shows all the data a \code{CatTable} class object has. This includes the (optionally stratified) part with summary statistics and , if available, p-values from the approximation method test (\code{\link{chisq.test}} by default) and exact method test (\code{\link{fisher.test}} by default). ##' ##' @param object An object that has the \code{CatTable} class to be shown. ##' @param digits Number of digits to print. diff --git a/R/summary.ContTable.R b/R/summary.ContTable.R index a464115..869e0aa 100644 --- a/R/summary.ContTable.R +++ b/R/summary.ContTable.R @@ -1,10 +1,6 @@ ##' Shows all results in a \code{ContTable} class object ##' -##' This method shows all the data a \code{ContTable} class object has. This includes -##' the (optionally stratified) part with summary statistics and p-values from -##' the normal assupmtion-based test (oneway.test by default) and nonparametric -##' test (kruskal.test by default). -##' +##' This method shows all the data a \code{ContTable} class object has. This includes the (optionally stratified) part with summary statistics and , if available, p-values from the normal assupmtion-based test (\code{\link{oneway.test}} by default) and nonparametric test (\code{\link{kruskal.test}} by default). ##' ##' @param object An object that has the \code{ContTable} class to be shown. ##' @param digits Number of digits to print. diff --git a/R/summary.TableOne.R b/R/summary.TableOne.R index fddf893..6dcfed6 100644 --- a/R/summary.TableOne.R +++ b/R/summary.TableOne.R @@ -40,11 +40,6 @@ ##' ## Just typing the object name will invoke the print.TableOne method ##' tableOne ##' -##' ## Specifying nonnormal variables will show the variables appropriately, -##' ## and show nonparametric test p-values. Specify variables in the exact -##' ## argument to obtain the exact test p-values. -##' print(tableOne, nonnormal = c("time"), exact = c("ascites")) -##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) ##' diff --git a/R/tableone-package.R b/R/tableone-package.R index a3b6d7c..176ebb9 100644 --- a/R/tableone-package.R +++ b/R/tableone-package.R @@ -11,6 +11,8 @@ ##' ##' Hadley Wickham for packaging advice and for creating tools this package was made with (roxygen2, devtools, testthat). ##' +##' Members of Facebook Organization of R Users for Medical Statistics in Japan (FORUMS-J) for testing pre-release versions. +##' ##' Developmental repository is on github. Your contributions are appreciated. ##' ##' https://github.com/kaz-yos/tableone @@ -54,15 +56,24 @@ ##' ## Specifying nonnormal variables will show the variables appropriately, ##' ## and show nonparametric test p-values. Specify variables in the exact ##' ## argument to obtain the exact test p-values. -##' print(tableOne, nonnormal = c("time"), exact = c("ascites")) +##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), +##' exact = c("status","stage")) ##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) -##' +##' ##' ## See the categorical part only using $ operator ##' tableOne$CatTable +##' summary(tableOne$CatTable) ##' ##' ## See the continuous part only using $ operator ##' tableOne$ContTable +##' summary(tableOne$ContTable) +##' +##' ## If your work flow includes copying to Excel and Word when writing manuscripts, +##' ## you may benefit from the quote argument. This will quote everything so that +##' ## Excel does not mess up the cells. +##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), +##' exact = c("status","stage"), quote = TRUE) ##' NULL diff --git a/man/CreateCatTable.Rd b/man/CreateCatTable.Rd index 6b63843..04b8791 100644 --- a/man/CreateCatTable.Rd +++ b/man/CreateCatTable.Rd @@ -113,7 +113,7 @@ summary(catTableBySexTrt) print(catTableBySexTrt, exact = "ascites", quote = TRUE) } \author{ -Kazuki Yoshida +Kazuki Yoshida (based on \code{Deducer::frequencies()}) } \seealso{ \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, diff --git a/man/CreateContTable.Rd b/man/CreateContTable.Rd index 7cc4abc..ef0fcdd 100644 --- a/man/CreateContTable.Rd +++ b/man/CreateContTable.Rd @@ -26,10 +26,11 @@ CreateContTable(vars, strata, data, func.names = c("n", "miss", "mean", "sd", (same definition as in SAS). All of them can be seen in the summary method output. The print method uses subset of these. You can choose subset of them or reorder them. - They are all configure to omit NA values (na.rm = TRUE).} + They are all configure to omit NA values (\code{na.rm = + TRUE}).} \item{func.additional}{Additional functions can be given - as a named list. For example, list(sum = sum).} + as a named list. For example, \code{list(sum = sum)}.} \item{test}{If TRUE, as in the default and there are more than two groups, groupwise comparisons are performed. @@ -50,7 +51,7 @@ CreateContTable(vars, strata, data, func.names = c("n", "miss", "mean", "sd", \item{testNonNormal}{A function used to perform the nonparametric tests. The default is \code{kruskal.test} - (Kruskal-Wallis Rank Sum Test). This is equivalent of the + (Kruskal-Wallis rank sum test). This is equivalent of the wilcox.test (Man-Whitney U test) when there are only two groups.} @@ -87,7 +88,6 @@ head(pbc) ## Create an overall table for continuous variables contVars <- c("time","age","bili","chol","albumin","copper", "alk.phos","ast","trig","platelet","protime") - contTableOverall <- CreateContTable(vars = contVars, data = pbc) ## Simply typing the object name will invoke the print.ContTable method, @@ -126,7 +126,8 @@ summary(contTableBySexTrt) print(contTableBySexTrt, nonnormal = nonNormalVars, quote = TRUE) } \author{ -Kazuki Yoshida +Kazuki Yoshida (based on +\code{Deducer::descriptive.table()}) } \seealso{ \code{\link{CreateContTable}}, diff --git a/man/CreateTableOne.Rd b/man/CreateTableOne.Rd index 53996da..bc8e095 100644 --- a/man/CreateTableOne.Rd +++ b/man/CreateTableOne.Rd @@ -70,21 +70,25 @@ CreateTableOne(vars, strata, data, test = TRUE, testApprox = chisq.test, } \value{ An object of class \code{TableOne}, which really is a list -of three objects. The first object named -\code{object$TableOne} is the categorical-continuous -mixture table formatted and printed by the -\code{\link{print.TableOne}} method. The second object -named \code{object$ContTable} is the table object -containing continuous variables only. The third object -named \code{object$CatTable} is the table object containing -categorical variables only. The second and third objects -can be then be examined with the \code{print} and -\code{summary} method, for example, +of three objects. + +\item{TableOne}{a categorical-continuous mixture data +formatted and printed by the \code{\link{print.TableOne}} +method} + +\item{ContTable}{an object of class \code{ContTable}, +containing continuous variables only} + +\item{CatTable}{ an object of class \code{CatTable}, +containing categorical variables only} + +The second and third objects can be then be examined with +the \code{print} and \code{summary} method, for example, \code{summary(object$CatTable)} to examine the categorical variables in detail. } \description{ -Create an object summarizing categorical variables +Create an object summarizing all baseline variables optionally stratifying by one or more startifying variables and performing statistical tests. The object gives a table that is easy to use in medical research papers. See also @@ -121,16 +125,25 @@ tableOne ## Specifying nonnormal variables will show the variables appropriately, ## and show nonparametric test p-values. Specify variables in the exact ## argument to obtain the exact test p-values. -print(tableOne, nonnormal = c("time"), exact = c("ascites")) +print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), + exact = c("status","stage")) ## Use the summary.TableOne method for detailed summary summary(tableOne) ## See the categorical part only using $ operator tableOne$CatTable +summary(tableOne$CatTable) ## See the continuous part only using $ operator tableOne$ContTable +summary(tableOne$ContTable) + +## If your work flow includes copying to Excel and Word when writing manuscripts, +## you may benefit from the quote argument. This will quote everything so that +## Excel does not mess up the cells. +print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), + exact = c("status","stage"), quote = TRUE) } \author{ Justin Bohn, Kazuki Yoshida diff --git a/man/ShowRegTable.Rd b/man/ShowRegTable.Rd index 37ac8c7..e6b32b5 100644 --- a/man/ShowRegTable.Rd +++ b/man/ShowRegTable.Rd @@ -43,10 +43,8 @@ data(pbc) head(pbc) ## Fit a Cox regression model -objCoxph <- - coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, - data = pbc, - ties = c("efron","breslow","exact")[1]) +objCoxph <- coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, + data = pbc) ## Show the simple table ShowRegTable(objCoxph) @@ -57,10 +55,4 @@ ShowRegTable(objCoxph, quote = TRUE) \author{ Kazuki Yoshida } -\seealso{ -CreateContTable, \code{\link{print.ContTable}}, -\code{\link{summary.ContTable}}, -\code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, -\code{\link{summary.CatTable}} -} diff --git a/man/print.CatTable.Rd b/man/print.CatTable.Rd index 4168660..37da460 100644 --- a/man/print.CatTable.Rd +++ b/man/print.CatTable.Rd @@ -60,8 +60,9 @@ does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. } \description{ -This is the print method for the CatTable class objects -created by CreateCatTable function. +This is the \code{print} method for the \code{CatTable} +class objects created by \code{\link{CreateCatTable}} +function. } \examples{ ## Load diff --git a/man/print.ContTable.Rd b/man/print.ContTable.Rd index 505a725..070cab4 100644 --- a/man/print.ContTable.Rd +++ b/man/print.ContTable.Rd @@ -47,8 +47,9 @@ does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. } \description{ -This is the print method for the ContTable class objects -created by CreateContTable function. +This is the \code{print} method for the \code{ContTable} +class objects created by \code{\link{CreateContTable}} +function. } \examples{ ## Load diff --git a/man/print.TableOne.Rd b/man/print.TableOne.Rd index 3c763a4..93ad684 100644 --- a/man/print.TableOne.Rd +++ b/man/print.TableOne.Rd @@ -62,8 +62,9 @@ does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. } \description{ -This is the print method for the TableOne class objects -created by CreateTableOne function. +This is the \code{print} method for the \code{TableOne} +class objects created by \code{\link{CreateTableOne}} +function. } \examples{ ## Load @@ -92,10 +93,19 @@ tableOne ## Specifying nonnormal variables will show the variables appropriately, ## and show nonparametric test p-values. Specify variables in the exact ## argument to obtain the exact test p-values. -print(tableOne, nonnormal = c("time"), exact = c("ascites")) +print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), + exact = c("status","stage")) ## Use the summary.TableOne method for detailed summary summary(tableOne) + +## See the categorical part only using $ operator +tableOne$CatTable +summary(tableOne$CatTable) + +## See the continuous part only using $ operator +tableOne$ContTable +summary(tableOne$ContTable) } \author{ Kazuki Yoshida, Justin Bohn diff --git a/man/summary.CatTable.Rd b/man/summary.CatTable.Rd index c01ba16..5fd33c7 100644 --- a/man/summary.CatTable.Rd +++ b/man/summary.CatTable.Rd @@ -16,10 +16,11 @@ It will print the results. } \description{ -This method shows all the data a CatTable class object has. -This includes the (optionally stratified) part with summary -statistics and p-values from the approximation method test -(chisq.test by default) and exact method test (fisher.test +This method shows all the data a \code{CatTable} class +object has. This includes the (optionally stratified) part +with summary statistics and , if available, p-values from +the approximation method test (\code{\link{chisq.test}} by +default) and exact method test (\code{\link{fisher.test}} by default). } \examples{ diff --git a/man/summary.ContTable.Rd b/man/summary.ContTable.Rd index 75a6710..c0cdfa2 100644 --- a/man/summary.ContTable.Rd +++ b/man/summary.ContTable.Rd @@ -18,9 +18,10 @@ It will print the results. \description{ This method shows all the data a \code{ContTable} class object has. This includes the (optionally stratified) part -with summary statistics and p-values from the normal -assupmtion-based test (oneway.test by default) and -nonparametric test (kruskal.test by default). +with summary statistics and , if available, p-values from +the normal assupmtion-based test (\code{\link{oneway.test}} +by default) and nonparametric test +(\code{\link{kruskal.test}} by default). } \examples{ ## Load diff --git a/man/summary.TableOne.Rd b/man/summary.TableOne.Rd index 2407d2f..bba3168 100644 --- a/man/summary.TableOne.Rd +++ b/man/summary.TableOne.Rd @@ -46,11 +46,6 @@ tableOne <- CreateTableOne(vars = c("time","status","age","ascites","hepato", ## Just typing the object name will invoke the print.TableOne method tableOne -## Specifying nonnormal variables will show the variables appropriately, -## and show nonparametric test p-values. Specify variables in the exact -## argument to obtain the exact test p-values. -print(tableOne, nonnormal = c("time"), exact = c("ascites")) - ## Use the summary.TableOne method for detailed summary summary(tableOne) } diff --git a/man/tableone-package.Rd b/man/tableone-package.Rd index fa50250..eea76e5 100644 --- a/man/tableone-package.Rd +++ b/man/tableone-package.Rd @@ -23,6 +23,10 @@ package is based on. Hadley Wickham for packaging advice and for creating tools this package was made with (roxygen2, devtools, testthat). +Members of Facebook Organization of R Users for Medical +Statistics in Japan (FORUMS-J) for testing pre-release +versions. + Developmental repository is on github. Your contributions are appreciated. @@ -58,16 +62,25 @@ tableOne ## Specifying nonnormal variables will show the variables appropriately, ## and show nonparametric test p-values. Specify variables in the exact ## argument to obtain the exact test p-values. -print(tableOne, nonnormal = c("time"), exact = c("ascites")) +print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), + exact = c("status","stage")) ## Use the summary.TableOne method for detailed summary summary(tableOne) ## See the categorical part only using $ operator tableOne$CatTable +summary(tableOne$CatTable) ## See the continuous part only using $ operator tableOne$ContTable +summary(tableOne$ContTable) + +## If your work flow includes copying to Excel and Word when writing manuscripts, +## you may benefit from the quote argument. This will quote everything so that +## Excel does not mess up the cells. +print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), + exact = c("status","stage"), quote = TRUE) } \author{ Kazuki Yoshida, Justin Bohn From 32c05ca52e3764c9d342db4ae2e80d63d4a79c67 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 11:08:05 -0500 Subject: [PATCH 07/31] factorVars argument for CreateTableOne to specify num vars to factor --- R/CreateTableOne.R | 53 ++++++++++++++++++++----------------------- man/CreateTableOne.Rd | 17 ++++++++++---- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/R/CreateTableOne.R b/R/CreateTableOne.R index 771ee11..263349b 100644 --- a/R/CreateTableOne.R +++ b/R/CreateTableOne.R @@ -2,30 +2,18 @@ ##' ##' Create an object summarizing all baseline variables optionally stratifying by one or more startifying variables and performing statistical tests. The object gives a table that is easy to use in medical research papers. See also \code{\link{print.TableOne}} and \code{\link{summary.TableOne}}. ##' -##' @param vars Variables to be summarized given as a character vector. Factors are -##' handled as categorical variables, whereas numeric variables are handled as continuous variables. -##' @param strata Stratifying (grouping) variable name(s) given as a character -##' vector. If omitted, the overall results are returned. -##' @param data A data frame in which these variables exist. All variables -##' (both vars and strata) must be in this data frame. -##' @param test If TRUE, as in the default and there are more than two groups, -##' groupwise comparisons are performed. -##' @param testNormal A function used to perform the normal assumption based -##' tests. The default is \code{\link{oneway.test}}. This is equivalent of the t-test when there are only two groups. +##' @param vars Variables to be summarized given as a character vector. Factors are handled as categorical variables, whereas numeric variables are handled as continuous variables. +##' @param strata Stratifying (grouping) variable name(s) given as a character vector. If omitted, the overall results are returned. +##' @param data A data frame in which these variables exist. All variables (both vars and strata) must be in this data frame. +##' @param factorVars Numerically coded variables that should be handled as categorical variables given as a character vector. If omitted, only factors are considered categorical variables. If all categorical variables in the dataset are already factors, this option is not necessary. +##' @param test If TRUE, as in the default and there are more than two groups, groupwise comparisons are performed. +##' @param testNormal A function used to perform the normal assumption based tests. The default is \code{\link{oneway.test}}. This is equivalent of the t-test when there are only two groups. ##' @param argsNormal A named list of arguments passed to the function specified in \code{testNormal}. The default is \code{list(var.equal = TRUE)}, which makes it the ordinary ANOVA that assumes equal variance across groups. -##' @param testNonNormal A function used to perform the nonparametric tests. -##' The default is \code{kruskal.test} (Kruskal-Wallis Rank Sum Test). This is -##' equivalent of the wilcox.test (Man-Whitney U test) when there are only two -##' groups. +##' @param testNonNormal A function used to perform the nonparametric tests. The default is \code{kruskal.test} (Kruskal-Wallis Rank Sum Test). This is equivalent of the wilcox.test (Man-Whitney U test) when there are only two groups. ##' @param argsNonNormal A named list of arguments passed to the function specified in \code{testNonNormal}. The default is \code{list(NULL)}, which is just a placeholder. -##' @param testApprox A function used to perform the large sample approximation -##' based tests. The default is \code{\link{chisq.test}}. This is not recommended when some -##' of the cell have small counts like fewer than 5. +##' @param testApprox A function used to perform the large sample approximation based tests. The default is \code{\link{chisq.test}}. This is not recommended when some of the cell have small counts like fewer than 5. ##' @param argsApprox A named list of arguments passed to the function specified in testApprox. The default is \code{list(correct = TRUE)}, which turns on the continuity correction for \code{\link{chisq.test}}. -##' @param testExact A function used to perform the exact tests. The default is -##' fisher.test. If the cells have large numbers, it will fail because of -##' memory limitation. In this situation, the large sample approximation based -##' should suffice. +##' @param testExact A function used to perform the exact tests. The default is fisher.test. If the cells have large numbers, it will fail because of memory limitation. In this situation, the large sample approximation based should suffice. ##' @param argsExact A named list of arguments passed to the function specified in testExact. The default is \code{list(workspace = 2*10^5)}, which specifies the memory space allocated for \code{\link{fisher.test}}. ##' @return An object of class \code{TableOne}, which really is a list of three objects. ##' @return \item{TableOne}{a categorical-continuous mixture data formatted and printed by the \code{\link{print.TableOne}} method} @@ -94,6 +82,7 @@ CreateTableOne <- function(vars, # character vector of variable names strata, # character vector of variable names data, # data frame + factorVars, # variables to be transformed to factors test = TRUE, # whether to put p-values ## Test configuration for categorical data testApprox = chisq.test, # function for approximation test @@ -117,6 +106,14 @@ CreateTableOne <- ## Abort if no variables exist at this point ModuleStopIfNoVarsLeft(vars) + ## Factor conversions if the factorVars argument exist + if (!missing(factorVars)) { + ## Check if variables exist. Drop them if not. + factorVars <- ModuleReturnVarsExist(factorVars, data) + ## Convert to factor + data[factorVars] <- lapply(data[factorVars], factor) + } + ## Toggle test FALSE if no strata is given test <- ModuleReturnFalseIfNoStrata(strata, test) @@ -136,13 +133,13 @@ CreateTableOne <- testNonNormal = testNonNormal, argsNonNormal = argsNonNormal ) - argsCreateCatTable <- list(data = data, - test = test, - testApprox = testApprox, - argsApprox = argsApprox, - testExact = testExact, - argsExact = argsExact - ) + argsCreateCatTable <- list(data = data, + test = test, + testApprox = testApprox, + argsApprox = argsApprox, + testExact = testExact, + argsExact = argsExact + ) ## Add strata = strata for argument only if strata is given if(!missing(strata)) { diff --git a/man/CreateTableOne.Rd b/man/CreateTableOne.Rd index bc8e095..790a14f 100644 --- a/man/CreateTableOne.Rd +++ b/man/CreateTableOne.Rd @@ -2,11 +2,11 @@ \alias{CreateTableOne} \title{Create an object summarizing both categorical and continuous variables} \usage{ -CreateTableOne(vars, strata, data, test = TRUE, testApprox = chisq.test, - argsApprox = list(correct = TRUE), testExact = fisher.test, - argsExact = list(workspace = 2 * 10^5), testNormal = oneway.test, - argsNormal = list(var.equal = TRUE), testNonNormal = kruskal.test, - argsNonNormal = list(NULL)) +CreateTableOne(vars, strata, data, factorVars, test = TRUE, + testApprox = chisq.test, argsApprox = list(correct = TRUE), + testExact = fisher.test, argsExact = list(workspace = 2 * 10^5), + testNormal = oneway.test, argsNormal = list(var.equal = TRUE), + testNonNormal = kruskal.test, argsNonNormal = list(NULL)) } \arguments{ \item{vars}{Variables to be summarized given as a @@ -22,6 +22,13 @@ CreateTableOne(vars, strata, data, test = TRUE, testApprox = chisq.test, All variables (both vars and strata) must be in this data frame.} + \item{factorVars}{Numerically coded variables that should + be handled as categorical variables given as a character + vector. If omitted, only factors are considered + categorical variables. If all categorical variables in + the dataset are already factors, this option is not + necessary.} + \item{test}{If TRUE, as in the default and there are more than two groups, groupwise comparisons are performed.} From 752d765133f343f45d0973f5868603ce42ed0c08 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 11:09:20 -0500 Subject: [PATCH 08/31] doc changes for 0.3.0 --- DESCRIPTION | 4 ++-- NEWS | 17 +++++++++++++++++ man/CreateCatTable.Rd | 2 +- man/CreateContTable.Rd | 2 +- man/print.CatTable.Rd | 2 +- man/print.ContTable.Rd | 12 +++++++++--- man/print.TableOne.Rd | 7 +++++-- 7 files changed, 36 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9797ef6..cd83440 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: tableone Type: Package Title: Create "Table 1" to describe baseline characteristics -Version: 0.2.1 -Date: 2014-02-15 +Version: 0.3.0 +Date: 2014-02-16 Author: Kazuki Yoshida, Justin Bohn Maintainer: Kazuki Yoshida Description: This package creates "Table 1", i.e., description of baseline diff --git a/NEWS b/NEWS index 284c9ff..309f756 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,20 @@ +tableone 0.3.0 (2014-02-16) +---------------------------------------------------------------- +NEW FEATURES + +* CreateTableOne has a new factorVars argument, a character + vector specifying numerically coded variables that should be + treated as factors. + +* The print method for the TableOne/CatTable class object has a + new minMax argument, a logical value specifying whether to show + median [min, max] instead of median [IQR] for nonnormal variables + +BUG FIXES + +* Documentations were fixed to represent the current version. + + tableone 0.2.1 (2014-02-15) ---------------------------------------------------------------- BUG FIXES diff --git a/man/CreateCatTable.Rd b/man/CreateCatTable.Rd index 04b8791..8e82657 100644 --- a/man/CreateCatTable.Rd +++ b/man/CreateCatTable.Rd @@ -100,7 +100,7 @@ catTableBySexTrt <- CreateCatTable(vars = catVars, ## (3 by default). It does <0.001 for you. catTableBySexTrt -## The exact argument will toggle the p-values to the example test result from +## The exact argument toggles the p-values to the exact test result from ## fisher.test. It will show which ones are from exact tests. print(catTableBySexTrt, exact = "ascites") diff --git a/man/CreateContTable.Rd b/man/CreateContTable.Rd index ef0fcdd..3ae281e 100644 --- a/man/CreateContTable.Rd +++ b/man/CreateContTable.Rd @@ -113,7 +113,7 @@ contTableBySexTrt <- CreateContTable(vars = contVars, ## by the pDigits argument (3 by default). It does <0.001 for you. contTableBySexTrt -## The nonnormal argument will toggle the p-values to the nonparametric result from +## The nonnormal argument toggles the p-values to the nonparametric result from ## kruskal.test (wilcox.test equivalent for the two group case). print(contTableBySexTrt, nonnormal = nonNormalVars) diff --git a/man/print.CatTable.Rd b/man/print.CatTable.Rd index 37da460..fee11bd 100644 --- a/man/print.CatTable.Rd +++ b/man/print.CatTable.Rd @@ -104,7 +104,7 @@ catTableBySexTrt <- CreateCatTable(vars = catVars, ## (3 by default). It does <0.001 for you. catTableBySexTrt -## The exact argument will toggle the p-values to the example test result from +## The exact argument toggles the p-values to the exact test result from ## fisher.test. It will show which ones are from exact tests. print(catTableBySexTrt, exact = "ascites") diff --git a/man/print.ContTable.Rd b/man/print.ContTable.Rd index 070cab4..0c677eb 100644 --- a/man/print.ContTable.Rd +++ b/man/print.ContTable.Rd @@ -3,8 +3,8 @@ \title{Format and print the \code{ContTable} class objects} \usage{ \method{print}{ContTable}(x, missing = FALSE, digits = 2, - nonnormal = NULL, quote = FALSE, test = TRUE, pDigits = 3, - explain = TRUE, printToggle = TRUE, ...) + nonnormal = NULL, minMax = FALSE, quote = FALSE, test = TRUE, + pDigits = 3, explain = TRUE, printToggle = TRUE, ...) } \arguments{ \item{x}{The result of a call to the @@ -20,6 +20,9 @@ nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test).} + \item{minMax}{Whether to use [min,max] instead of + [p25,p75] for nonnormal variables. The default is FALSE.} + \item{quote}{Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to @@ -89,10 +92,13 @@ contTableBySexTrt <- CreateContTable(vars = contVars, ## by the pDigits argument (3 by default). It does <0.001 for you. contTableBySexTrt -## The nonnormal argument will toggle the p-values to the nonparametric result from +## The nonnormal argument toggles the p-values to the nonparametric result from ## kruskal.test (wilcox.test equivalent for the two group case). print(contTableBySexTrt, nonnormal = nonNormalVars) +## The minMax argument toggles whether to show median [range] +print(contTableBySexTrt, nonnormal = nonNormalVars, minMax = TRUE) + ## summary now includes both types of p-values summary(contTableBySexTrt) diff --git a/man/print.TableOne.Rd b/man/print.TableOne.Rd index 93ad684..db7ab2b 100644 --- a/man/print.TableOne.Rd +++ b/man/print.TableOne.Rd @@ -4,8 +4,8 @@ \usage{ \method{print}{TableOne}(x, missing = FALSE, quote = FALSE, test = TRUE, catDigits = 1, contDigits = 2, pDigits = 3, format = c("fp", "f", "p", - "pf")[1], exact = NULL, nonnormal = NULL, explain = TRUE, - printToggle = TRUE, ...) + "pf")[1], exact = NULL, nonnormal = NULL, minMax = FALSE, + explain = TRUE, printToggle = TRUE, ...) } \arguments{ \item{x}{The result of a call to the @@ -46,6 +46,9 @@ nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test).} + \item{minMax}{Whether to use [min,max] instead of + [p25,p75] for nonnormal variables. The default is FALSE.} + \item{explain}{Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown.} From 18109ad56a4206012e27af926f75406f4e131361 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 11:34:35 -0500 Subject: [PATCH 09/31] cramVars arg to specify two-level factor to cram into one line --- R/print.CatTable.R | 9 +++++---- R/print.TableOne.R | 3 +++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 2205cf6..cdd32c6 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -20,7 +20,7 @@ ##' @param pDigits Number of digits to print for p-values. ##' @param showAllLevels Whether to show all levels. FALSE by default, i.e., ##' for 2-level categorical variables, only the higher level is shown to avoid -##' @param cramLevels For two level factors, cram them together in one row. +##' @param cramVars For two level factors, cram them together in one row. ##' @param explain Whether to add explanation to the variable names, i.e., (\%) ##' is added to the variable names when percentage is shown. ##' @param CrossTable Whether to show the cross table objects held internally @@ -96,7 +96,7 @@ print.CatTable <- function(x, missing = FALSE, digits = 1, exact = NULL, quote = FALSE, test = TRUE, pDigits = 3, showAllLevels = FALSE, - cramLevels = TRUE, # TRUE for testing + cramVars = NULL, # variables to be crammed into one row explain = TRUE, CrossTable = FALSE, printToggle = TRUE, @@ -229,8 +229,9 @@ print.CatTable <- function(x, missing = FALSE, } else if (!showAllLevels & nRow == 2) { - if (cramLevels) { - ## If cramLevels is true. Cram in one line + ## cram results in one row if requested + if (unique(DF$var) %in% cramVars) { + ## If cramVars is true. Cram in one line ## Cram two freq and count with / in between DF$freq <- paste0(DF$freq, collapse = "/") DF$percent <- paste0(DF$percent, collapse = "/") diff --git a/R/print.TableOne.R b/R/print.TableOne.R index 2f43263..3125b55 100644 --- a/R/print.TableOne.R +++ b/R/print.TableOne.R @@ -11,6 +11,7 @@ ##' @param pDigits Number of digits to print for p-values. Default 3. ##' @param format The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). ##' @param exact A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test). +##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shon in one row. ##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). ##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. ##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. @@ -72,6 +73,7 @@ print.TableOne <- function(x, missing = FALSE, ## Categorical options format = c("fp","f","p","pf")[1], # Format f_requency and/or p_ercent exact = NULL, + cramVars = NULL, ## Continuous options nonnormal = NULL, @@ -99,6 +101,7 @@ print.TableOne <- function(x, missing = FALSE, ## print.CatTable arguments format = format, exact = exact, showAllLevels = FALSE, # must be FALSE to get same column counts + cramVars = cramVars, ## print.ContTable argument nonnormal = nonnormal, minMax = minMax ) # Method dispatch at work From 6ba0c528269e1fd7024c547ab1345acf4a985475 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 11:46:33 -0500 Subject: [PATCH 10/31] doc fix for cramVars arg --- R/print.CatTable.R | 2 +- R/print.TableOne.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index cdd32c6..0163b52 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -20,7 +20,7 @@ ##' @param pDigits Number of digits to print for p-values. ##' @param showAllLevels Whether to show all levels. FALSE by default, i.e., ##' for 2-level categorical variables, only the higher level is shown to avoid -##' @param cramVars For two level factors, cram them together in one row. +##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shown in one row. ##' @param explain Whether to add explanation to the variable names, i.e., (\%) ##' is added to the variable names when percentage is shown. ##' @param CrossTable Whether to show the cross table objects held internally diff --git a/R/print.TableOne.R b/R/print.TableOne.R index 3125b55..ff3fe99 100644 --- a/R/print.TableOne.R +++ b/R/print.TableOne.R @@ -11,7 +11,7 @@ ##' @param pDigits Number of digits to print for p-values. Default 3. ##' @param format The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). ##' @param exact A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test). -##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shon in one row. +##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shown in one row. ##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). ##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. ##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. From 970d68194ce9442e12ddfbf4b7196d46932f2429 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 15:40:15 -0500 Subject: [PATCH 11/31] cramVars added to man --- man/print.CatTable.Rd | 6 +++++- man/print.TableOne.Rd | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/man/print.CatTable.Rd b/man/print.CatTable.Rd index fee11bd..1a1dd48 100644 --- a/man/print.CatTable.Rd +++ b/man/print.CatTable.Rd @@ -4,7 +4,7 @@ \usage{ \method{print}{CatTable}(x, missing = FALSE, format = c("fp", "f", "p", "pf")[1], digits = 1, exact = NULL, quote = FALSE, test = TRUE, - pDigits = 3, showAllLevels = FALSE, explain = TRUE, + pDigits = 3, showAllLevels = FALSE, cramVars = NULL, explain = TRUE, CrossTable = FALSE, printToggle = TRUE, ...) } \arguments{ @@ -40,6 +40,10 @@ default, i.e., for 2-level categorical variables, only the higher level is shown to avoid} + \item{cramVars}{A character vector to specify the + two-level categorical variables, for which both levels + should be shown in one row.} + \item{explain}{Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown.} diff --git a/man/print.TableOne.Rd b/man/print.TableOne.Rd index db7ab2b..992b961 100644 --- a/man/print.TableOne.Rd +++ b/man/print.TableOne.Rd @@ -4,8 +4,8 @@ \usage{ \method{print}{TableOne}(x, missing = FALSE, quote = FALSE, test = TRUE, catDigits = 1, contDigits = 2, pDigits = 3, format = c("fp", "f", "p", - "pf")[1], exact = NULL, nonnormal = NULL, minMax = FALSE, - explain = TRUE, printToggle = TRUE, ...) + "pf")[1], exact = NULL, cramVars = NULL, nonnormal = NULL, + minMax = FALSE, explain = TRUE, printToggle = TRUE, ...) } \arguments{ \item{x}{The result of a call to the @@ -41,6 +41,10 @@ default all p-values are from large sample approximation tests (chisq.test).} + \item{cramVars}{A character vector to specify the + two-level categorical variables, for which both levels + should be shown in one row.} + \item{nonnormal}{A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from From d9b63de216aae05c72504114daf37ff878a560de Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 16:24:36 -0500 Subject: [PATCH 12/31] args re-ordered. docs fixed --- R/CreateCatTable.R | 4 +-- R/CreateContTable.R | 20 +++++++------- R/print.CatTable.R | 63 +++++++++++++++++++----------------------- R/print.ContTable.R | 28 ++++++++++++------- R/print.TableOne.R | 47 ++++++++++++++++--------------- man/print.CatTable.Rd | 55 ++++++++++++++++++------------------ man/print.ContTable.Rd | 36 ++++++++++++------------ man/print.TableOne.Rd | 54 ++++++++++++++++++------------------ 8 files changed, 155 insertions(+), 152 deletions(-) diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index a082d76..55d402f 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -23,9 +23,7 @@ ##' memory limitation. In this situation, the large sample approximation based ##' should suffice. ##' @param argsExact A named list of arguments passed to the function specified in testExact. The default is \code{list(workspace = 2*10^5)}, which specifies the memory space allocated for \code{\link{fisher.test}}. -##' @return An object of class \code{CatTable}, which really is a \code{\link{by}} object with -##' additional attributes. Each element of the \code{\link{by}} part is a matrix with rows -##' representing variables, and columns representing summary statistics. +##' @return An object of class \code{CatTable}, which really is a \code{\link{by}} object with additional attributes. Each element of the \code{\link{by}} part is a matrix with rows representing variables, and columns representing summary statistics. ##' @author Kazuki Yoshida (based on \code{Deducer::frequencies()}) ##' @seealso ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, diff --git a/R/CreateContTable.R b/R/CreateContTable.R index d9d81a7..702c767 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -91,21 +91,21 @@ ##' ##' @export CreateContTable <- - function(vars, # character vector of variable names - strata, # character vector of variable names - data, # data frame - func.names = c( # can pick a subset of them + function(vars, # character vector of variable names + strata, # character vector of variable names + data, # data frame + func.names = c( # can pick a subset of them "n","miss", "mean","sd", "median","p25","p75","min","max", "skew","kurt" ), - func.additional, # named list of additional functions - test = TRUE, # Whether to put p-values - testNormal = oneway.test, # test for normally distributed variables - argsNormal = list(var.equal = TRUE), # arguments passed to testNormal - testNonNormal = kruskal.test, # test for nonnormally distributed variables - argsNonNormal = list(NULL) # arguments passed to testNonNormal + func.additional, # named list of additional functions + test = TRUE, # Whether to put p-values + testNormal = oneway.test, # test for normally distributed variables + argsNormal = list(var.equal = TRUE), # arguments passed to testNormal + testNonNormal = kruskal.test, # test for nonnormally distributed variables + argsNonNormal = list(NULL) # arguments passed to testNonNormal ) { ## Require dependencies (DELETE before CRAN release. Use Depends in DESCRIPTION) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 0163b52..6b31fbb 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -3,35 +3,20 @@ ##' This is the \code{print} method for the \code{CatTable} class objects created by \code{\link{CreateCatTable}} function. ##' ##' @param x The result of a call to the \code{\link{CreateCatTable}} function. -##' @param missing Whether to show missing data information (not implemented -##' yet, placeholder) -##' @param format The default is "fp" frequency (percentage). You can also -##' choose from "f" frequency only, "p" percentage only, and "pf" percentage -##' (frequency). ##' @param digits Number of digits to print in the table. -##' @param exact A character vector to specify the variables for which the -##' p-values should be those of exact tests. By default all p-values are from -##' large sample approximation tests (chisq.test). -##' @param quote Whether to show everything in quotes. The default is FALSE. If -##' TRUE, everything including the row and column names are quoted so that you -##' can copy it to Excel easily. -##' @param test Whether to show the p-values. TRUE by default. If FALSE, only -##' the numerical summaries are shown. ##' @param pDigits Number of digits to print for p-values. -##' @param showAllLevels Whether to show all levels. FALSE by default, i.e., -##' for 2-level categorical variables, only the higher level is shown to avoid -##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shown in one row. -##' @param explain Whether to add explanation to the variable names, i.e., (\%) -##' is added to the variable names when percentage is shown. -##' @param CrossTable Whether to show the cross table objects held internally -##' using gmodels::CrossTable function. This will give an output similar to the -##' PROC FREQ in SAS. -##' @param printToggle Whether to print the output. If FLASE, no output is -##' created, and a matrix is invisibly returned. +##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. +##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. +##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. +##' @param format The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). +##' @param showAllLevels Whether to show all levels. FALSE by default, i.e., for 2-level categorical variables, only the higher level is shown to avoid +##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shown in one row. This should be used via \code{\link{print.TableOne}}. +##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. +##' @param exact A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test). +##' @param CrossTable Whether to show the cross table objects held internally using gmodels::CrossTable function. This will give an output similar to the PROC FREQ in SAS. ##' @param ... For compatibility with generic. Ignored. -##' @return It is mainly for printing the result. But this function does return -##' a matrix containing what you see in the output invisibly. You can assign it -##' to an object to save it. +##' @return It is mainly for printing the result. But this function does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. ##' @author Kazuki Yoshida ##' @seealso ##' \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}}, @@ -91,15 +76,23 @@ ##' print(catTableBySexTrt, exact = "ascites", quote = TRUE) ##' ##' @export -print.CatTable <- function(x, missing = FALSE, - format = c("fp","f","p","pf")[1], # Format f_requency and/or p_ercent - digits = 1, exact = NULL, quote = FALSE, - test = TRUE, pDigits = 3, +print.CatTable <- function(x, # CatTable object + digits = 1, pDigits = 3, # Number of digits to show + quote = FALSE, # Whether to show quotes + + missing = FALSE, # Show missing values (not implemented yet) + explain = TRUE, # Whether to show explanation in variable names + printToggle = TRUE, # Whether to print the result visibly + + format = c("fp","f","p","pf")[1], # Format f_requency and/or p_ercent showAllLevels = FALSE, - cramVars = NULL, # variables to be crammed into one row - explain = TRUE, - CrossTable = FALSE, - printToggle = TRUE, + cramVars = NULL, # variables to be crammed into one row + + test = TRUE, # Whether to add p-values + exact = NULL, # Which variables should be tested with exact tests + + CrossTable = FALSE, # Whether to show gmodels::CrossTable + ...) { ## x and ... required to be consistent with generic print(x, ...) @@ -293,7 +286,7 @@ print.CatTable <- function(x, missing = FALSE, format(DF[DF$crammedRowInd == "crammed","freq"], justify = "right") DF[DF$crammedRowInd == "","freq"] <- format(DF[DF$crammedRowInd == "","freq"], justify = "right") - + ## Obtain the max width of characters nCharFreq <- max(nchar(DF$freq)) diff --git a/R/print.ContTable.R b/R/print.ContTable.R index d2fe7ad..3b215d9 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -3,15 +3,15 @@ ##' This is the \code{print} method for the \code{ContTable} class objects created by \code{\link{CreateContTable}} function. ##' ##' @param x The result of a call to the \code{\link{CreateContTable}} function. -##' @param missing Whether to show missing data information (not implemented yet, placeholder) ##' @param digits Number of digits to print in the table. -##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). -##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. -##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param pDigits Number of digits to print for p-values. +##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. +##' @param missing Whether to show missing data information (not implemented yet, placeholder) ##' @param explain Whether to add explanation to the variable names, i.e., (mean (sd) or median [IQR]) is added to the variable names. ##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. +##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). +##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. +##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param ... For compatibility with generic. Ignored. ##' @return It is mainly for printing the result. But this function does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. ##' @author Kazuki Yoshida @@ -74,11 +74,19 @@ ##' print(contTableBySexTrt, nonnormal = nonNormalVars, quote = TRUE) ##' ##' @export -print.ContTable <- function(x, missing = FALSE, - digits = 2, nonnormal = NULL, minMax = FALSE, quote = FALSE, - test = TRUE, pDigits = 3, - explain = TRUE, - printToggle = TRUE, +print.ContTable <- function(x, # ContTable object + digits = 2, pDigits = 3, # Number of digits to show + quote = FALSE, # Whether to show quotes + + missing = FALSE, # show missing values (not implemented yet) + explain = TRUE, # Whether to show explanation in variable names + printToggle = TRUE, # Whether to print the result visibly + + nonnormal = NULL, # Which variables should be treated as nonnormal + minMax = FALSE, # median [range] instead of median [IQR] + + test = TRUE, # Whether to add p-values + ...) { ## x and ... required to be consistent with generic print(x, ...) diff --git a/R/print.TableOne.R b/R/print.TableOne.R index ff3fe99..a729cb4 100644 --- a/R/print.TableOne.R +++ b/R/print.TableOne.R @@ -3,19 +3,19 @@ ##' This is the \code{print} method for the \code{TableOne} class objects created by \code{\link{CreateTableOne}} function. ##' ##' @param x The result of a call to the \code{\link{CreateTableOne}} function. -##' @param missing Whether to show missing data information (not implemented yet, placeholder) -##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param catDigits Number of digits to print for proportions. Default 1. ##' @param contDigits Number of digits to print for continuous variables. Default 2. ##' @param pDigits Number of digits to print for p-values. Default 3. +##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. +##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. +##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. +##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param format The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). -##' @param exact A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test). ##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shown in one row. +##' @param exact A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test). ##' @param nonnormal A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from normal assumption-based tests (oneway.test). ##' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. -##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. -##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param ... For compatibility with generic. Ignored. ##' @return It is mainly for printing the result. But this function does return a matrix containing what you see in the output invisibly. You can assign it to an object to save it. ##' @author Kazuki Yoshida, Justin Bohn @@ -56,7 +56,7 @@ ##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) -##' +##' ##' ## See the categorical part only using $ operator ##' tableOne$CatTable ##' summary(tableOne$CatTable) @@ -66,22 +66,25 @@ ##' summary(tableOne$ContTable) ##' ##' @export -print.TableOne <- function(x, missing = FALSE, - quote = FALSE, - test = TRUE, catDigits = 1, contDigits = 2, pDigits = 3, +print.TableOne <- function(x, # TableOne object + catDigits = 1, contDigits = 2, pDigits = 3, # Number of digits to show + quote = FALSE, # Whether to show quotes + + ## Common options + missing = FALSE, # Not implemented yet + explain = TRUE, # Whether to show explanation in variable names + printToggle = TRUE, # Whether to print the result visibly + test = TRUE, # Whether to add p-values ## Categorical options - format = c("fp","f","p","pf")[1], # Format f_requency and/or p_ercent - exact = NULL, - cramVars = NULL, + format = c("fp","f","p","pf")[1], # Format f_requency and/or p_ercent + cramVars = NULL, # Which 2-level variables to show both levels in one row + exact = NULL, # Which variables should be tested with exact tests ## Continuous options - nonnormal = NULL, - minMax = FALSE, - - ## Common options - explain = TRUE, - printToggle = TRUE, + nonnormal = NULL, # Which variables should be treated as nonnormal + minMax = FALSE, # Whether to show median + ...) { ## Get the mixed element only @@ -95,7 +98,7 @@ print.TableOne <- function(x, missing = FALSE, ## Get the formatted tables formattedTables <- sapply(seq_along(TableOne), FUN = function(i) { - + print(TableOne[[i]], printToggle = FALSE, test = test, explain = explain, digits = digits[i], ## print.CatTable arguments @@ -111,7 +114,7 @@ print.TableOne <- function(x, missing = FALSE, ## Get the column width information (strata x vars format) columnWidthInfo <- sapply(formattedTables, FUN = function(matObj) { - + attributes(matObj)$vecColWidths }, simplify = FALSE) @@ -155,7 +158,7 @@ print.TableOne <- function(x, missing = FALSE, ## Remove 1st rows from each table (stratum sizes) spaceFormattedTables <- sapply(spaceFormattedTables, FUN = function(matObj) { - + matObj[-1, , drop = FALSE] }, simplify = FALSE) diff --git a/man/print.CatTable.Rd b/man/print.CatTable.Rd index 1a1dd48..dabd35f 100644 --- a/man/print.CatTable.Rd +++ b/man/print.CatTable.Rd @@ -2,39 +2,38 @@ \alias{print.CatTable} \title{Format and print the \code{CatTable} class objects} \usage{ -\method{print}{CatTable}(x, missing = FALSE, format = c("fp", "f", "p", - "pf")[1], digits = 1, exact = NULL, quote = FALSE, test = TRUE, - pDigits = 3, showAllLevels = FALSE, cramVars = NULL, explain = TRUE, - CrossTable = FALSE, printToggle = TRUE, ...) +\method{print}{CatTable}(x, digits = 1, pDigits = 3, quote = FALSE, + missing = FALSE, explain = TRUE, printToggle = TRUE, format = c("fp", + "f", "p", "pf")[1], showAllLevels = FALSE, cramVars = NULL, test = TRUE, + exact = NULL, CrossTable = FALSE, ...) } \arguments{ \item{x}{The result of a call to the \code{\link{CreateCatTable}} function.} - \item{missing}{Whether to show missing data information - (not implemented yet, placeholder)} - - \item{format}{The default is "fp" frequency (percentage). - You can also choose from "f" frequency only, "p" - percentage only, and "pf" percentage (frequency).} - \item{digits}{Number of digits to print in the table.} - \item{exact}{A character vector to specify the variables - for which the p-values should be those of exact tests. By - default all p-values are from large sample approximation - tests (chisq.test).} + \item{pDigits}{Number of digits to print for p-values.} \item{quote}{Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily.} - \item{test}{Whether to show the p-values. TRUE by - default. If FALSE, only the numerical summaries are - shown.} + \item{missing}{Whether to show missing data information + (not implemented yet, placeholder)} - \item{pDigits}{Number of digits to print for p-values.} + \item{explain}{Whether to add explanation to the variable + names, i.e., (\%) is added to the variable names when + percentage is shown.} + + \item{printToggle}{Whether to print the output. If FLASE, + no output is created, and a matrix is invisibly + returned.} + + \item{format}{The default is "fp" frequency (percentage). + You can also choose from "f" frequency only, "p" + percentage only, and "pf" percentage (frequency).} \item{showAllLevels}{Whether to show all levels. FALSE by default, i.e., for 2-level categorical variables, only @@ -42,20 +41,22 @@ \item{cramVars}{A character vector to specify the two-level categorical variables, for which both levels - should be shown in one row.} + should be shown in one row. This should be used via + \code{\link{print.TableOne}}.} - \item{explain}{Whether to add explanation to the variable - names, i.e., (\%) is added to the variable names when - percentage is shown.} + \item{test}{Whether to show the p-values. TRUE by + default. If FALSE, only the numerical summaries are + shown.} + + \item{exact}{A character vector to specify the variables + for which the p-values should be those of exact tests. By + default all p-values are from large sample approximation + tests (chisq.test).} \item{CrossTable}{Whether to show the cross table objects held internally using gmodels::CrossTable function. This will give an output similar to the PROC FREQ in SAS.} - \item{printToggle}{Whether to print the output. If FLASE, - no output is created, and a matrix is invisibly - returned.} - \item{...}{For compatibility with generic. Ignored.} } \value{ diff --git a/man/print.ContTable.Rd b/man/print.ContTable.Rd index 0c677eb..59efaeb 100644 --- a/man/print.ContTable.Rd +++ b/man/print.ContTable.Rd @@ -2,37 +2,25 @@ \alias{print.ContTable} \title{Format and print the \code{ContTable} class objects} \usage{ -\method{print}{ContTable}(x, missing = FALSE, digits = 2, - nonnormal = NULL, minMax = FALSE, quote = FALSE, test = TRUE, - pDigits = 3, explain = TRUE, printToggle = TRUE, ...) +\method{print}{ContTable}(x, digits = 2, pDigits = 3, quote = FALSE, + missing = FALSE, explain = TRUE, printToggle = TRUE, nonnormal = NULL, + minMax = FALSE, test = TRUE, ...) } \arguments{ \item{x}{The result of a call to the \code{\link{CreateContTable}} function.} - \item{missing}{Whether to show missing data information - (not implemented yet, placeholder)} - \item{digits}{Number of digits to print in the table.} - \item{nonnormal}{A character vector to specify the - variables for which the p-values should be those of - nonparametric tests. By default all p-values are from - normal assumption-based tests (oneway.test).} - - \item{minMax}{Whether to use [min,max] instead of - [p25,p75] for nonnormal variables. The default is FALSE.} + \item{pDigits}{Number of digits to print for p-values.} \item{quote}{Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily.} - \item{test}{Whether to show the p-values. TRUE by - default. If FALSE, only the numerical summaries are - shown.} - - \item{pDigits}{Number of digits to print for p-values.} + \item{missing}{Whether to show missing data information + (not implemented yet, placeholder)} \item{explain}{Whether to add explanation to the variable names, i.e., (mean (sd) or median [IQR]) is added to the @@ -42,6 +30,18 @@ no output is created, and a matrix is invisibly returned.} + \item{nonnormal}{A character vector to specify the + variables for which the p-values should be those of + nonparametric tests. By default all p-values are from + normal assumption-based tests (oneway.test).} + + \item{minMax}{Whether to use [min,max] instead of + [p25,p75] for nonnormal variables. The default is FALSE.} + + \item{test}{Whether to show the p-values. TRUE by + default. If FALSE, only the numerical summaries are + shown.} + \item{...}{For compatibility with generic. Ignored.} } \value{ diff --git a/man/print.TableOne.Rd b/man/print.TableOne.Rd index 992b961..d17bbf3 100644 --- a/man/print.TableOne.Rd +++ b/man/print.TableOne.Rd @@ -2,49 +2,57 @@ \alias{print.TableOne} \title{Format and print the \code{TableOne} class objects} \usage{ -\method{print}{TableOne}(x, missing = FALSE, quote = FALSE, test = TRUE, - catDigits = 1, contDigits = 2, pDigits = 3, format = c("fp", "f", "p", - "pf")[1], exact = NULL, cramVars = NULL, nonnormal = NULL, - minMax = FALSE, explain = TRUE, printToggle = TRUE, ...) +\method{print}{TableOne}(x, catDigits = 1, contDigits = 2, pDigits = 3, + quote = FALSE, missing = FALSE, explain = TRUE, printToggle = TRUE, + test = TRUE, format = c("fp", "f", "p", "pf")[1], cramVars = NULL, + exact = NULL, nonnormal = NULL, minMax = FALSE, ...) } \arguments{ \item{x}{The result of a call to the \code{\link{CreateTableOne}} function.} - \item{missing}{Whether to show missing data information - (not implemented yet, placeholder)} + \item{catDigits}{Number of digits to print for + proportions. Default 1.} + + \item{contDigits}{Number of digits to print for + continuous variables. Default 2.} + + \item{pDigits}{Number of digits to print for p-values. + Default 3.} \item{quote}{Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily.} - \item{test}{Whether to show the p-values. TRUE by - default. If FALSE, only the numerical summaries are - shown.} + \item{missing}{Whether to show missing data information + (not implemented yet, placeholder)} - \item{catDigits}{Number of digits to print for - proportions. Default 1.} + \item{explain}{Whether to add explanation to the variable + names, i.e., (\%) is added to the variable names when + percentage is shown.} - \item{contDigits}{Number of digits to print for - continuous variables. Default 2.} + \item{printToggle}{Whether to print the output. If FLASE, + no output is created, and a matrix is invisibly + returned.} - \item{pDigits}{Number of digits to print for p-values. - Default 3.} + \item{test}{Whether to show the p-values. TRUE by + default. If FALSE, only the numerical summaries are + shown.} \item{format}{The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency).} + \item{cramVars}{A character vector to specify the + two-level categorical variables, for which both levels + should be shown in one row.} + \item{exact}{A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test).} - \item{cramVars}{A character vector to specify the - two-level categorical variables, for which both levels - should be shown in one row.} - \item{nonnormal}{A character vector to specify the variables for which the p-values should be those of nonparametric tests. By default all p-values are from @@ -53,14 +61,6 @@ \item{minMax}{Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE.} - \item{explain}{Whether to add explanation to the variable - names, i.e., (\%) is added to the variable names when - percentage is shown.} - - \item{printToggle}{Whether to print the output. If FLASE, - no output is created, and a matrix is invisibly - returned.} - \item{...}{For compatibility with generic. Ignored.} } \value{ From 9debd4e9a99de25306625a368bb074196727fc24 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 18:03:22 -0500 Subject: [PATCH 13/31] documents and example fix --- DESCRIPTION | 2 +- R/CreateCatTable.R | 3 ++- R/CreateContTable.R | 3 +++ R/print.ContTable.R | 3 +++ R/tableone-package.R | 25 ++++++++++++++++--------- man/CreateCatTable.Rd | 3 ++- man/CreateContTable.Rd | 3 +++ man/print.ContTable.Rd | 3 +++ man/tableone-package.Rd | 38 ++++++++++++++++++++++---------------- 9 files changed, 55 insertions(+), 28 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cd83440..0e31080 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,7 +6,7 @@ Date: 2014-02-16 Author: Kazuki Yoshida, Justin Bohn Maintainer: Kazuki Yoshida Description: This package creates "Table 1", i.e., description of baseline - patient characteristics, which is essential every medical research. This + patient characteristics, which is essential in every medical research. This package provides functions to create such summaries for continuous and categorical variables, optionally with subgroups comparisons. The package was insipired by and based on descriptive statistics functions in Deducer, diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index 55d402f..9e6714f 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -46,7 +46,8 @@ ##' ##' ## Simply typing the object name will invoke the print.CatTable method, ##' ## which will show the sample size, frequencies and percentages. -##' ## For 2-level variables, only the higher level is shown for simplicity. +##' ## For 2-level variables, only the higher level is shown for simplicity +##' ## unless the variables are specified in the cramVars argument. ##' catTableOverall ##' ##' ## Use the showAllLevels argument to see all levels for all variables. diff --git a/R/CreateContTable.R b/R/CreateContTable.R index 702c767..a5d222b 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -68,6 +68,9 @@ ##' nonNormalVars <- c("age","chol","copper","alk.phos","trig","protime") ##' print(contTableOverall, nonnormal = nonNormalVars) ##' +##' ## To show median [min,max] for nonnormal variables, use minMax = TRUE +##' print(contTableOverall, nonnormal = nonNormalVars, minMax = TRUE) +##' ##' ## The table can be stratified by one or more variables ##' contTableBySexTrt <- CreateContTable(vars = contVars, ##' strata = c("sex","trt"), data = pbc) diff --git a/R/print.ContTable.R b/R/print.ContTable.R index 3b215d9..7d3d98b 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -49,6 +49,9 @@ ##' nonNormalVars <- c("age","chol","copper","alk.phos","trig","protime") ##' print(contTableOverall, nonnormal = nonNormalVars) ##' +##' ## To show median [min,max] for nonnormal variables, use minMax = TRUE +##' print(contTableOverall, nonnormal = nonNormalVars, minMax = TRUE) +##' ##' ## The table can be stratified by one or more variables ##' contTableBySexTrt <- CreateContTable(vars = contVars, ##' strata = c("sex","trt"), data = pbc) diff --git a/R/tableone-package.R b/R/tableone-package.R index 176ebb9..9eb0551 100644 --- a/R/tableone-package.R +++ b/R/tableone-package.R @@ -1,6 +1,6 @@ ##' Create "Table 1" to describe baseline characteristics ##' -##' This package creates "Table 1", i.e., description of baseline patient characteristics, which is essential every medical research. This package provides functions to create such summaries for continuous and categorical variables, optionally with subgroups and groupwise comparison. The package was insipired by and based on descriptive statistics functions in Deducer, a Java-based GUI package by Ian Fellows. This package does not require GUI or Java, and intended for CUI users. +##' This package creates "Table 1", i.e., description of baseline patient characteristics, which is essential in every medical research. This package provides functions to create such summaries for continuous and categorical variables, optionally with subgroups and groupwise comparison. The package was insipired by and based on descriptive statistics functions in Deducer, a Java-based GUI package by Ian Fellows. This package does not require GUI or Java, and intended for CUI users. ##' ##' @name tableone-package ##' @aliases tableone-package tableone @@ -10,8 +10,11 @@ ##' Ian Fellows for developing the Deducer package, which this package is based on. ##' ##' Hadley Wickham for packaging advice and for creating tools this package was made with (roxygen2, devtools, testthat). +##' +##' Yoshinobu Kanda for design advice. +## and for (future) integration into \code{RcmdrPlugin.EZR}. ##' -##' Members of Facebook Organization of R Users for Medical Statistics in Japan (FORUMS-J) for testing pre-release versions. +##' Members of the Facebook Organization of R Users for Medical Statistics in Japan (FORUMS-J) for testing pre-release versions. ##' ##' Developmental repository is on github. Your contributions are appreciated. ##' @@ -36,9 +39,9 @@ ##' ## Check variables ##' head(pbc) ##' -##' ## Make categorical variables factors -##' varsToFactor <- c("status","trt","ascites","hepato","spiders","edema","stage") -##' pbc[varsToFactor] <- lapply(pbc[varsToFactor], factor) +##' ## List numerically coded categorical variables for later conversion. +##' ## Factor variables are automatically handled as categorical variables. +##' factorVars <- c("status","trt","ascites","hepato","spiders","edema","stage") ##' ##' ## Create a variable list ##' dput(names(pbc)) @@ -47,17 +50,21 @@ ##' "copper","alk.phos","ast","trig","platelet", ##' "protime","stage") ##' -##' ## Create Table 1 stratified by trt -##' tableOne <- CreateTableOne(vars = vars, strata = c("trt"), data = pbc) +##' ## Create Table 1 stratified by trt. Use factorVars to convert numerically +##' ## coded categorical variables as factors without changing the dataset. +##' tableOne <- CreateTableOne(vars = vars, strata = c("trt"), data = pbc, +##' factorVars = factorVars) ##' ##' ## Just typing the object name will invoke the print.TableOne method ##' tableOne ##' ##' ## Specifying nonnormal variables will show the variables appropriately, ##' ## and show nonparametric test p-values. Specify variables in the exact -##' ## argument to obtain the exact test p-values. +##' ## argument to obtain the exact test p-values. For two-level categorical +##' ## variables specified in cramVars, both levels are shown. Use minMax +##' ## argument to show median [min, max] for nonnormal variables. ##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), -##' exact = c("status","stage")) +##' exact = c("status","stage"), cramVars = "sex") ##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) diff --git a/man/CreateCatTable.Rd b/man/CreateCatTable.Rd index 8e82657..72c6fc4 100644 --- a/man/CreateCatTable.Rd +++ b/man/CreateCatTable.Rd @@ -76,7 +76,8 @@ catTableOverall <- CreateCatTable(vars = catVars, data = pbc) ## Simply typing the object name will invoke the print.CatTable method, ## which will show the sample size, frequencies and percentages. -## For 2-level variables, only the higher level is shown for simplicity. +## For 2-level variables, only the higher level is shown for simplicity +## unless the variables are specified in the cramVars argument. catTableOverall ## Use the showAllLevels argument to see all levels for all variables. diff --git a/man/CreateContTable.Rd b/man/CreateContTable.Rd index 3ae281e..b0d93c5 100644 --- a/man/CreateContTable.Rd +++ b/man/CreateContTable.Rd @@ -104,6 +104,9 @@ summary(contTableOverall) nonNormalVars <- c("age","chol","copper","alk.phos","trig","protime") print(contTableOverall, nonnormal = nonNormalVars) +## To show median [min,max] for nonnormal variables, use minMax = TRUE +print(contTableOverall, nonnormal = nonNormalVars, minMax = TRUE) + ## The table can be stratified by one or more variables contTableBySexTrt <- CreateContTable(vars = contVars, strata = c("sex","trt"), data = pbc) diff --git a/man/print.ContTable.Rd b/man/print.ContTable.Rd index 59efaeb..5feb81c 100644 --- a/man/print.ContTable.Rd +++ b/man/print.ContTable.Rd @@ -83,6 +83,9 @@ summary(contTableOverall) nonNormalVars <- c("age","chol","copper","alk.phos","trig","protime") print(contTableOverall, nonnormal = nonNormalVars) +## To show median [min,max] for nonnormal variables, use minMax = TRUE +print(contTableOverall, nonnormal = nonNormalVars, minMax = TRUE) + ## The table can be stratified by one or more variables contTableBySexTrt <- CreateContTable(vars = contVars, strata = c("sex","trt"), data = pbc) diff --git a/man/tableone-package.Rd b/man/tableone-package.Rd index eea76e5..4447ef0 100644 --- a/man/tableone-package.Rd +++ b/man/tableone-package.Rd @@ -5,14 +5,14 @@ \title{Create "Table 1" to describe baseline characteristics} \description{ This package creates "Table 1", i.e., description of -baseline patient characteristics, which is essential every -medical research. This package provides functions to create -such summaries for continuous and categorical variables, -optionally with subgroups and groupwise comparison. The -package was insipired by and based on descriptive -statistics functions in Deducer, a Java-based GUI package -by Ian Fellows. This package does not require GUI or Java, -and intended for CUI users. +baseline patient characteristics, which is essential in +every medical research. This package provides functions to +create such summaries for continuous and categorical +variables, optionally with subgroups and groupwise +comparison. The package was insipired by and based on +descriptive statistics functions in Deducer, a Java-based +GUI package by Ian Fellows. This package does not require +GUI or Java, and intended for CUI users. } \note{ Special Thanks: @@ -23,7 +23,9 @@ package is based on. Hadley Wickham for packaging advice and for creating tools this package was made with (roxygen2, devtools, testthat). -Members of Facebook Organization of R Users for Medical +Yoshinobu Kanda for design advice. + +Members of the Facebook Organization of R Users for Medical Statistics in Japan (FORUMS-J) for testing pre-release versions. @@ -42,9 +44,9 @@ data(pbc) ## Check variables head(pbc) -## Make categorical variables factors -varsToFactor <- c("status","trt","ascites","hepato","spiders","edema","stage") -pbc[varsToFactor] <- lapply(pbc[varsToFactor], factor) +## List numerically coded categorical variables for later conversion. +## Factor variables are automatically handled as categorical variables. +factorVars <- c("status","trt","ascites","hepato","spiders","edema","stage") ## Create a variable list dput(names(pbc)) @@ -53,17 +55,21 @@ vars <- c("time","status","age","sex","ascites","hepato", "copper","alk.phos","ast","trig","platelet", "protime","stage") -## Create Table 1 stratified by trt -tableOne <- CreateTableOne(vars = vars, strata = c("trt"), data = pbc) +## Create Table 1 stratified by trt. Use factorVars to convert numerically +## coded categorical variables as factors without changing the dataset. +tableOne <- CreateTableOne(vars = vars, strata = c("trt"), data = pbc, + factorVars = factorVars) ## Just typing the object name will invoke the print.TableOne method tableOne ## Specifying nonnormal variables will show the variables appropriately, ## and show nonparametric test p-values. Specify variables in the exact -## argument to obtain the exact test p-values. +## argument to obtain the exact test p-values. For two-level categorical +## variables specified in cramVars, both levels are shown. Use minMax +## argument to show median [min, max] for nonnormal variables. print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), - exact = c("status","stage")) + exact = c("status","stage"), cramVars = "sex") ## Use the summary.TableOne method for detailed summary summary(tableOne) From f7465faa372253f1926da2948b0c8e1347cfef09 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 18:09:04 -0500 Subject: [PATCH 14/31] name change ModuleTryCatchWE --- R/modules.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/modules.R b/R/modules.R index cef999d..3a43a46 100644 --- a/R/modules.R +++ b/R/modules.R @@ -105,7 +105,7 @@ ModuleCreateStrataVarName <- function(obj) { ## Try catch function # Taken from demo(error.catching) ## Used to define non-failing functions, that return NA when there is an error -tryCatch.W.E <- function(expr) { +ModuleTryCatchWE <- function(expr) { W <- NULL w.handler <- function(w) { # warning handler W <<- w @@ -121,19 +121,19 @@ tryCatch.W.E <- function(expr) { ## Consider additional options by do.call() ModuleTestSafe <- function(obj, testFunction, testArgs = NULL) { - out <- tryCatch.W.E(do.call(testFunction, args = c(list(obj), testArgs))$p.value) + out <- ModuleTryCatchWE(do.call(testFunction, args = c(list(obj), testArgs))$p.value) ## If it returns a numeric value, return it. Otherwise, return NA. ifelse(is.numeric(out$value), out$value, NA) } ## Define special skewness and kurtosis functions that do not fail (SAS definitions) sasSkewness <- function(x) { - out <- tryCatch.W.E(e1071::skewness(x, na.rm = TRUE, type = 2)) + out <- ModuleTryCatchWE(e1071::skewness(x, na.rm = TRUE, type = 2)) ## If it returns a numeric value, return it. Otherwise, return NaN. ifelse(is.numeric(out$value), out$value, NaN) } sasKurtosis <- function(x) { - out <- tryCatch.W.E(e1071::kurtosis(x, na.rm = TRUE, type = 2)) + out <- ModuleTryCatchWE(e1071::kurtosis(x, na.rm = TRUE, type = 2)) ## If it returns a numeric value, return it. Otherwise, return NaN. ifelse(is.numeric(out$value), out$value, NaN) } From 6ec1e9e21b41878a262eaf2cf9705a603d77ba66 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 19:11:05 -0500 Subject: [PATCH 15/31] p.miss (proportion missing added) --- R/CreateContTable.R | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/R/CreateContTable.R b/R/CreateContTable.R index a5d222b..39d6382 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -97,8 +97,8 @@ CreateContTable <- function(vars, # character vector of variable names strata, # character vector of variable names data, # data frame - func.names = c( # can pick a subset of them - "n","miss", + funcNames = c( # can pick a subset of them + "n","miss","p.miss", "mean","sd", "median","p25","p75","min","max", "skew","kurt" @@ -145,30 +145,31 @@ CreateContTable <- if(!all(sapply(dat, is.numeric))) {stop("Can only be run on numeric variables")} - ## Create indexes for default functions by partial string matching with the func.names argument - func.indexes <- pmatch(func.names, c("n","miss", + ## Create indexes for default functions by partial string matching with the funcNames argument + funcIndexes <- pmatch(funcNames, c("n","miss","p.miss", "mean","sd", "median","p25","p75","min","max", "skew","kurt")) ## Remove NA - func.indexes <- func.indexes[!is.na(func.indexes)] + funcIndexes <- funcIndexes[!is.na(funcIndexes)] ## Create a list of default functions - functions <- c("n" = function(x) length(x), - "miss" = function(x) sum(is.na(x)), - "mean" = function(x) mean(x, na.rm = TRUE), - "sd" = function(x) sd(x, na.rm = TRUE), - "median" = function(x) median(x, na.rm = TRUE), - "p25" = function(x) quantile(x, probs = 0.25, na.rm = TRUE), - "p75" = function(x) quantile(x, probs = 0.75, na.rm = TRUE), - "min" = function(x) min(x, na.rm = TRUE), - "max" = function(x) max(x, na.rm = TRUE), - "skew" = function(x) sasSkewness(x), - "kurt" = function(x) sasKurtosis(x) + functions <- c("n" = function(x) {length(x)}, + "miss" = function(x) {sum(is.na(x))}, + "p.miss" = function(x) {sum(is.na(x)) / length(x)}, + "mean" = function(x) {mean(x, na.rm = TRUE)}, + "sd" = function(x) {sd(x, na.rm = TRUE)}, + "median" = function(x) {median(x, na.rm = TRUE)}, + "p25" = function(x) {quantile(x, probs = 0.25, na.rm = TRUE)}, + "p75" = function(x) {quantile(x, probs = 0.75, na.rm = TRUE)}, + "min" = function(x) {min(x, na.rm = TRUE)}, + "max" = function(x) {max(x, na.rm = TRUE)}, + "skew" = function(x) {ModuleSasSkewness(x)}, + "kurt" = function(x) {ModuleSasKurtosis(x)} ) ## Keep only functions in use - functions <- functions[func.indexes] + functions <- functions[funcIndexes] ## Check for additional functions if(!missing(func.additional)){ From afac294c21162ae13a7ad4f9920c62b58ee58fa0 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 19:44:24 -0500 Subject: [PATCH 16/31] module name consistency etc. no function change --- R/CreateCatTable.R | 76 ++++++++++++++++++++++----------------------- R/CreateContTable.R | 61 ++++++++++++++---------------------- R/CreateTableOne.R | 26 ++++++++-------- R/modules.R | 4 +-- 4 files changed, 75 insertions(+), 92 deletions(-) diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index 9e6714f..f0585e5 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -1,10 +1,10 @@ ##' Create an object summarizing categorical variables -##' +##' ##' Create an object summarizing categorical variables optionally stratifying ##' by one or more startifying variables and performing statistical tests. The ##' object gives a table that is easy to use in medical research papers. See ##' also \code{\link{print.CatTable}} and \code{\link{summary.CatTable}}. -##' +##' ##' @param vars Variable(s) to be summarized given as a character vector. ##' @param strata Stratifying (grouping) variable name(s) given as a character ##' vector. If omitted, the overall results are returned. @@ -30,59 +30,59 @@ ##' \code{\link{CreateContTable}}, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}}, ##' \code{\link{CreateTableOne}}, \code{\link{print.TableOne}}, \code{\link{summary.TableOne}} ##' @examples -##' +##' ##' ## Load ##' library(tableone) -##' +##' ##' ## Load Mayo Clinic Primary Biliary Cirrhosis Data ##' library(survival) ##' data(pbc) ##' ## Check variables ##' head(pbc) -##' +##' ##' ## Create an overall table for categorical variables ##' catVars <- c("status","ascites","hepato","spiders","edema","stage") ##' catTableOverall <- CreateCatTable(vars = catVars, data = pbc) -##' +##' ##' ## Simply typing the object name will invoke the print.CatTable method, ##' ## which will show the sample size, frequencies and percentages. ##' ## For 2-level variables, only the higher level is shown for simplicity ##' ## unless the variables are specified in the cramVars argument. ##' catTableOverall -##' +##' ##' ## Use the showAllLevels argument to see all levels for all variables. ##' print(catTableOverall, showAllLevels = TRUE) -##' +##' ##' ## You can choose form frequencies ("f") and/or percentages ("p") or both. ##' ## "fp" frequency (percentage) is the default. Row names change accordingly. ##' print(catTableOverall, format = "f") ##' print(catTableOverall, format = "p") -##' +##' ##' ## To further examine the variables, use the summary.CatTable method, ##' ## which will show more details. ##' summary(catTableOverall) -##' +##' ##' ## The table can be stratified by one or more variables ##' catTableBySexTrt <- CreateCatTable(vars = catVars, ##' strata = c("sex","trt"), data = pbc) -##' +##' ##' ## print now includes p-values which are by default calculated by chisq.test. ##' ## It is formatted at the decimal place specified by the pDigits argument ##' ## (3 by default). It does <0.001 for you. ##' catTableBySexTrt -##' +##' ##' ## The exact argument toggles the p-values to the exact test result from ##' ## fisher.test. It will show which ones are from exact tests. ##' print(catTableBySexTrt, exact = "ascites") -##' +##' ##' ## summary now includes both types of p-values ##' summary(catTableBySexTrt) -##' +##' ##' ## If your work flow includes copying to Excel and Word when writing manuscripts, ##' ## you may benefit from the quote argument. This will quote everything so that ##' ## Excel does not mess up the cells. ##' print(catTableBySexTrt, exact = "ascites", quote = TRUE) -##' +##' ##' @export CreateCatTable <- function(vars, # character vector of variable names @@ -98,33 +98,33 @@ CreateCatTable <- ### Data check ## Check if the data given is a dataframe ModuleStopIfNotDataFrame(data) - + ## Check if variables exist. Drop them if not. vars <- ModuleReturnVarsExist(vars, data) ## Abort if no variables exist at this point ModuleStopIfNoVarsLeft(vars) - + ## Extract necessary variables (unused variables are not included in dat) dat <- data[c(vars)] + ## Toggle test FALSE if no strata + test <- ModuleReturnFalseIfNoStrata(strata, test) + ## Convert to a factor if it is not a factor already. (categorical version only) ## Not done on factors, to avoid dropping zero levels. datNotFactor <- sapply(dat, class) != "factor" dat[datNotFactor] <- lapply(dat[datNotFactor], factor) - ## Toggle test FALSE if no strata - test <- ModuleReturnFalseIfNoStrata(strata, test) - ## Create strata data frame (data frame with only strata variables) strata <- ModuleReturnStrata(strata, data, dat) -### Perform descriptive analysis +### Actual descriptive statistics are calculated here. ## strata--variable-CreateTableForOneVar structure ## Devide by strata - result <- by(data = dat, INDICES = strata, + result <- by(data = dat, INDICES = strata, # INDICES can be a multi-column data frame ## Work on each stratum FUN = function(dfStrataDat) { # dfStrataDat should be a data frame @@ -136,8 +136,8 @@ CreateCatTable <- }, simplify = FALSE) - - ## Add stratification information to the column header + + ## Add stratification variable information as an attribute if (length(result) > 1 ) { ## strataVarName from dimension headers strataVarName <- ModuleCreateStrataVarName(result) @@ -145,28 +145,26 @@ CreateCatTable <- attributes(result) <- c(attributes(result), list(strataVarName = strataVarName)) } - + ### Perform tests when necessary ## Initialize pValues <- NULL listXtabs <- list() - ## Only when test is asked for # Should always do this? + ## Only when test is asked for if (test == TRUE) { ## Create all combinations of strata levels and collapse as a vector for level combinations. dfStrataLevels <- expand.grid(attr(result, "dimnames")) # 1st var cycles fastest, consistent with by() - ## Create a single variable representing all strata - strataLevels <- apply(X = dfStrataLevels, - MARGIN = 1, - FUN = paste0, collapse = ":") - ## Create the actual variable from the observed levels - strataVar <- as.character(interaction(strata, sep = ":")) - + ## Create a single variable representing all strata + strataLevels <- apply(X = dfStrataLevels, MARGIN = 1, FUN = paste0, collapse = ":") + ## Create the actual variable from the observed levels + strataVar <- as.character(interaction(strata, sep = ":")) ## Make it a factor (kruskal.test requires it). Use levels not to drop defined nonexisting levels. - strataVar <- factor(strataVar, levels = strataLevels) + strataVar <- factor(strataVar, levels = strataLevels) + ## Loop over variables in dat, and create a list of xtabs listXtabs <- sapply(X = names(dat), @@ -174,7 +172,7 @@ CreateCatTable <- ## Create a formula formula <- paste0("~ ", var, " + ", "strataVar") formula <- as.formula(formula) - + ## Create a 2-dimensional crosstable xtabs(formula = formula, data = dat) }, @@ -182,10 +180,10 @@ CreateCatTable <- ## Rename the second dimension of the xtabs with the newly create name. for (i in seq_along(listXtabs)) { - + names(dimnames(listXtabs[[i]]))[2] <- strataVarName - } - + } + ## Loop over xtabs, and create p-values pValues <- sapply(X = listXtabs, FUN = function(xtabs) { @@ -195,7 +193,7 @@ CreateCatTable <- pExact = ModuleTestSafe(xtabs, testExact, argsExact) ) }, - simplify = FALSE) + simplify = FALSE) ## Create a single data frame (n x 2 (normal,nonormal)) pValues <- do.call(rbind, pValues) diff --git a/R/CreateContTable.R b/R/CreateContTable.R index 39d6382..d7bd90c 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -6,33 +6,16 @@ ##' also \code{\link{print.ContTable}} and \code{\link{summary.ContTable}}. ##' ##' @param vars Variable(s) to be summarized given as a character vector. -##' @param strata Stratifying (grouping) variable name(s) given as a character -##' vector. If omitted, the overall results are returned. -##' @param data A data frame in which these variables exist. All variables -##' (both vars and strata) must be in this data frame. -##' @param func.names The functions to give the group size, number with missing -##' values, mean, standard deviations, median, 25th percentile, 75th -##' percentile, minimum, maximum, skewness (same definition as in SAS), -##' kurtosis (same definition as in SAS). All of them can be seen in the -##' summary method output. The print method uses subset of these. You can -##' choose subset of them or reorder them. They are all configure to omit NA -##' values (\code{na.rm = TRUE}). -##' @param func.additional Additional functions can be given as a named list. For example, \code{list(sum = sum)}. -##' @param test If TRUE, as in the default and there are more than two groups, -##' groupwise comparisons are performed. Both tests that assume normality and -##' tests that do not are performed. Either one of the result can be obtained -##' from the print method. -##' @param testNormal A function used to perform the normal assumption based -##' tests. The default is \code{\link{oneway.test}}. This is equivalent of the t-test when there are only two groups. +##' @param strata Stratifying (grouping) variable name(s) given as a character vector. If omitted, the overall results are returned. +##' @param data A data frame in which these variables exist. All variables (both vars and strata) must be in this data frame. +##' @param funcNames The functions to give the group size, number with missing values, proportion with missing values, mean, standard deviations, median, 25th percentile, 75th percentile, minimum, maximum, skewness (same definition as in SAS), kurtosis (same definition as in SAS). All of them can be seen in the summary method output. The print method uses subset of these. You can choose subset of them or reorder them. They are all configure to omit NA values (\code{na.rm = TRUE}). +##' @param funcAdditional Additional functions can be given as a named list. For example, \code{list(sum = sum)}. +##' @param test If TRUE, as in the default and there are more than two groups, groupwise comparisons are performed. Both tests that assume normality and tests that do not are performed. Either one of the result can be obtained from the print method. +##' @param testNormal A function used to perform the normal assumption based tests. The default is \code{\link{oneway.test}}. This is equivalent of the t-test when there are only two groups. ##' @param argsNormal A named list of arguments passed to the function specified in \code{testNormal}. The default is \code{list(var.equal = TRUE)}, which makes it the ordinary ANOVA that assumes equal variance across groups. -##' @param testNonNormal A function used to perform the nonparametric tests. -##' The default is \code{kruskal.test} (Kruskal-Wallis rank sum test). This is -##' equivalent of the wilcox.test (Man-Whitney U test) when there are only two -##' groups. +##' @param testNonNormal A function used to perform the nonparametric tests. The default is \code{kruskal.test} (Kruskal-Wallis rank sum test). This is equivalent of the wilcox.test (Man-Whitney U test) when there are only two groups. ##' @param argsNonNormal A named list of arguments passed to the function specified in \code{testNonNormal}. The default is \code{list(NULL)}, which is just a placeholder. -##' @return An object of class \code{ContTable}, which really is a \code{\link{by}} object with -##' additional attributes. Each element of the \code{\link{by}} part is a matrix with rows -##' representing variables, and columns representing summary statistics. +##' @return An object of class \code{ContTable}, which really is a \code{\link{by}} object with additional attributes. Each element of the \code{\link{by}} part is a matrix with rows representing variables, and columns representing summary statistics. ##' @author Kazuki Yoshida (based on \code{Deducer::descriptive.table()}) ##' @seealso ##' \code{\link{CreateContTable}}, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}}, @@ -103,7 +86,7 @@ CreateContTable <- "median","p25","p75","min","max", "skew","kurt" ), - func.additional, # named list of additional functions + funcAdditional, # named list of additional functions test = TRUE, # Whether to put p-values testNormal = oneway.test, # test for normally distributed variables argsNormal = list(var.equal = TRUE), # arguments passed to testNormal @@ -147,9 +130,9 @@ CreateContTable <- ## Create indexes for default functions by partial string matching with the funcNames argument funcIndexes <- pmatch(funcNames, c("n","miss","p.miss", - "mean","sd", - "median","p25","p75","min","max", - "skew","kurt")) + "mean","sd", + "median","p25","p75","min","max", + "skew","kurt")) ## Remove NA funcIndexes <- funcIndexes[!is.na(funcIndexes)] @@ -172,23 +155,26 @@ CreateContTable <- functions <- functions[funcIndexes] ## Check for additional functions - if(!missing(func.additional)){ + if(!missing(funcAdditional)) { + ## When additional functions are given - if(!is.list(func.additional) || is.null(names(func.additional))) { + if(!is.list(funcAdditional) || is.null(names(funcAdditional))) { ## Stop if not a named list - stop("func.additional must be a named list of functions") + stop("funcAdditional must be a named list of functions") } ## If a named list is given, add to the vector of functions and their names - functions <- c(functions, unlist(func.additional)) - func.names <- c(func.names, names(func.additional)) + functions <- c(functions, unlist(funcAdditional)) + funcNames <- c(funcNames, names(funcAdditional)) } ### Actual descriptive statistics are calculated here. ## strata-functions-variable structure alternative 2014-01-22 ## Devide by strata - result <- by(data = dat, INDICES = strata, # INDICES can be a multi-column data frame + result <- by(data = dat, INDICES = strata, # INDICES can be a multi-column data frame + + ## Work on each stratum FUN = function(strataDat) { # Work on each stratum through by() ## Loop for functions @@ -205,7 +191,7 @@ CreateContTable <- do.call(cbind, out) }) - ## Add stratification information to the column header + ## Add stratification variable information as an attribute if (length(result) > 1 ) { ## strataVarName from dimension headers strataVarName <- ModuleCreateStrataVarName(result) @@ -219,7 +205,6 @@ CreateContTable <- ## Initialize to avoid error when it does not exist at the attribute assignment pValues <- NULL -### This part performs between group tests ## Only when test is asked for if (test == TRUE) { @@ -238,7 +223,7 @@ CreateContTable <- FUN = function(var) { ## Perform tests and return the result as 1x2 DF data.frame( - pNormal = ModuleTestSafe(var ~ strataVec, testNormal, argsNormal), + pNormal = ModuleTestSafe(var ~ strataVec, testNormal, argsNormal), pNonNormal = ModuleTestSafe(var ~ strataVec, testNonNormal, argsNonNormal) ) }, diff --git a/R/CreateTableOne.R b/R/CreateTableOne.R index 263349b..fa35c98 100644 --- a/R/CreateTableOne.R +++ b/R/CreateTableOne.R @@ -172,12 +172,12 @@ CreateTableOne <- ## CreateCatTable for categorical. CreateContTable for continuous. listOfConstructors <- listOfConstructors[logiFactors + 1] ## Create a list of arguments - listOfArgs <- list(argsCreateContTable = argsCreateContTable, - argsCreateCatTable = argsCreateCatTable) + listOfArgs <- list(argsCreateContTable = argsCreateContTable, + argsCreateCatTable = argsCreateCatTable) ## argsCreateCatTable for categorical. argsCreateContTable for continuous. - listOfArgs <- listOfArgs[logiFactors + 1] + listOfArgs <- listOfArgs[logiFactors + 1] - ## Create a list of tables + ## Create a list of tables by looping over variables/constructors/arguments TableOne <- sapply(seq_along(listOfConstructors), FUN = function(i) { @@ -189,11 +189,11 @@ CreateTableOne <- }, simplify = FALSE) - ## Give variable names + ## Give variable names to the result object names(TableOne) <- vars - ## Create ContTable and CatTable objects (this is redundant) + ## Create ContTable and CatTable objects (this is redundant, but easy) ## Aggregated ContTable ContTable <- do.call(CreateContTable, args = c(list(vars = varNumerics), argsCreateContTable)) @@ -201,16 +201,16 @@ CreateTableOne <- CatTable <- do.call(CreateCatTable, args = c(list(vars = varFactors), argsCreateCatTable)) - ## Create a list - listOfTables <- list(TableOne = TableOne, - ContTable = ContTable, - CatTable = CatTable - ) + ## Create a list for output + TableOneObject <- list(TableOne = TableOne, + ContTable = ContTable, + CatTable = CatTable + ) ## Give a class - class(listOfTables) <- "TableOne" + class(TableOneObject) <- "TableOne" ## Return the object - return(listOfTables) + return(TableOneObject) } } diff --git a/R/modules.R b/R/modules.R index 3a43a46..1d17951 100644 --- a/R/modules.R +++ b/R/modules.R @@ -127,12 +127,12 @@ ModuleTestSafe <- function(obj, testFunction, testArgs = NULL) { } ## Define special skewness and kurtosis functions that do not fail (SAS definitions) -sasSkewness <- function(x) { +ModuleSasSkewness <- function(x) { out <- ModuleTryCatchWE(e1071::skewness(x, na.rm = TRUE, type = 2)) ## If it returns a numeric value, return it. Otherwise, return NaN. ifelse(is.numeric(out$value), out$value, NaN) } -sasKurtosis <- function(x) { +ModuleSasKurtosis <- function(x) { out <- ModuleTryCatchWE(e1071::kurtosis(x, na.rm = TRUE, type = 2)) ## If it returns a numeric value, return it. Otherwise, return NaN. ifelse(is.numeric(out$value), out$value, NaN) From 2372f30d87d3fd29e2c975ba5cf398b326d02869 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 20:20:10 -0500 Subject: [PATCH 17/31] ModuleCreateStrataVarAsFactor: single variable representation of multivar strata --- R/CreateCatTable.R | 17 +++++------------ R/CreateContTable.R | 22 +++++++++------------- R/modules.R | 22 ++++++++++++++++++++++ 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index f0585e5..4d27973 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -149,24 +149,17 @@ CreateCatTable <- ### Perform tests when necessary ## Initialize - pValues <- NULL + pValues <- NULL listXtabs <- list() ## Only when test is asked for if (test == TRUE) { - ## Create all combinations of strata levels and collapse as a vector for level combinations. - dfStrataLevels <- expand.grid(attr(result, "dimnames")) # 1st var cycles fastest, consistent with by() - ## Create a single variable representing all strata - strataLevels <- apply(X = dfStrataLevels, MARGIN = 1, FUN = paste0, collapse = ":") - - ## Create the actual variable from the observed levels - strataVar <- as.character(interaction(strata, sep = ":")) - ## Make it a factor (kruskal.test requires it). Use levels not to drop defined nonexisting levels. - strataVar <- factor(strataVar, levels = strataLevels) - - + ## Create a single variable representation of multivariable stratification + strataVar <- ModuleCreateStrataVarAsFactor(result, strata) + ## Loop over variables in dat, and create a list of xtabs + ## Empty strata are kept in the corss tables. Different behavior than the cont counterpart! listXtabs <- sapply(X = names(dat), FUN = function(var) { ## Create a formula diff --git a/R/CreateContTable.R b/R/CreateContTable.R index d7bd90c..bc6e5e0 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -14,7 +14,7 @@ ##' @param testNormal A function used to perform the normal assumption based tests. The default is \code{\link{oneway.test}}. This is equivalent of the t-test when there are only two groups. ##' @param argsNormal A named list of arguments passed to the function specified in \code{testNormal}. The default is \code{list(var.equal = TRUE)}, which makes it the ordinary ANOVA that assumes equal variance across groups. ##' @param testNonNormal A function used to perform the nonparametric tests. The default is \code{kruskal.test} (Kruskal-Wallis rank sum test). This is equivalent of the wilcox.test (Man-Whitney U test) when there are only two groups. -##' @param argsNonNormal A named list of arguments passed to the function specified in \code{testNonNormal}. The default is \code{list(NULL)}, which is just a placeholder. +##' @param argsNonNormal A named list of arguments passed to the function specified in \code{testNonNormal}. The default is \code{list(NULL)}, which is just a placeholder. ##' @return An object of class \code{ContTable}, which really is a \code{\link{by}} object with additional attributes. Each element of the \code{\link{by}} part is a matrix with rows representing variables, and columns representing summary statistics. ##' @author Kazuki Yoshida (based on \code{Deducer::descriptive.table()}) ##' @seealso @@ -156,7 +156,7 @@ CreateContTable <- ## Check for additional functions if(!missing(funcAdditional)) { - + ## When additional functions are given if(!is.list(funcAdditional) || is.null(names(funcAdditional))) { ## Stop if not a named list @@ -205,33 +205,29 @@ CreateContTable <- ## Initialize to avoid error when it does not exist at the attribute assignment pValues <- NULL + ## Only when test is asked for if (test == TRUE) { - ## Create a single variable representing all strata - strataVec <- apply(X = strata, MARGIN = 1, FUN = paste0, collapse = ":") - ## Give NA if any of the variables are missing - strataVecAnyMiss <- apply(X = is.na(strata), MARGIN = 1, FUN = sum) > 0 - strataVec[strataVecAnyMiss] <- NA - ## Make it a factor (kruskal.test requires it) - strataVec <- factor(strataVec) - + ## Create a single variable representation of multivariable stratification + strataVar <- ModuleCreateStrataVarAsFactor(result, strata) ## Loop over variables in dat, and obtain p values for two tests + ## DF = 6 when there are 8 levels (one empty), i.e., empty strata dropped by oneway.test/kruskal.test pValues <- sapply(X = dat, FUN = function(var) { ## Perform tests and return the result as 1x2 DF data.frame( - pNormal = ModuleTestSafe(var ~ strataVec, testNormal, argsNormal), - pNonNormal = ModuleTestSafe(var ~ strataVec, testNonNormal, argsNonNormal) + pNormal = ModuleTestSafe(var ~ strataVar, testNormal, argsNormal), + pNonNormal = ModuleTestSafe(var ~ strataVar, testNonNormal, argsNonNormal) ) }, simplify = FALSE) ## Create a single data frame (n x 2 (normal,nonormal)) pValues <- do.call(rbind, pValues) - } + } # Conditional for test == TRUE ends here. ## Return object diff --git a/R/modules.R b/R/modules.R index 1d17951..316d219 100644 --- a/R/modules.R +++ b/R/modules.R @@ -139,6 +139,28 @@ ModuleSasKurtosis <- function(x) { } +## Create a single variable representation of multivariable stratification +## result: by object; strata: data frame of stratifying variable(s) +ModuleCreateStrataVarAsFactor <- function(result, strata) { + + ## Create all possible combinations of strata levels and collapse as a vector. + dfStrataLevels <- expand.grid(attr(result, "dimnames")) # 1st var cycles fastest, consistent with by() + ## Create a single variable representing all strata + strataLevels <- apply(X = dfStrataLevels, MARGIN = 1, FUN = paste0, collapse = ":") + ## The length is the number of potential combinations. Used for the levels argument in the next part. + + ## Create the actual variable from the observed levels. NA if any one of the variables is NA. + strataVar <- as.character(interaction(strata, sep = ":")) + ## Make it a factor (kruskal.test requires it). Use levels not to drop defined nonexisting levels. + strataVar <- factor(strataVar, levels = strataLevels) + + ## Return stratifying variable. The length is the number of observations in the dataset. + ## NA for subjects with NA for any of the stratifying variables. + return(strataVar) +} + + + ### Modules intented for the print methods ################################################################################ From 9bdb6ee66f5658cd3c2dd3c13d24460dc61a086d Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 20:28:22 -0500 Subject: [PATCH 18/31] p.miss (percentage) in summary.CatTable --- R/modules.R | 5 ++++- R/summary.CatTable.R | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/modules.R b/R/modules.R index 316d219..d4ac57c 100644 --- a/R/modules.R +++ b/R/modules.R @@ -81,6 +81,9 @@ ModuleCreateTableForOneVar <- function(x) { # Give a vector ## Total missing n (duplicated as many times as there are levels) freq$miss <- sum(is.na(x)) + ## Total missing percentage + freq$p.miss <- (freq$miss / freq$n) * 100 + ## Category frequency freq$freq <- freqRaw @@ -91,7 +94,7 @@ ModuleCreateTableForOneVar <- function(x) { # Give a vector freq$cum.percent <- cumsum(freqRaw) / sum(freqRaw) * 100 ## Reorder variables - freq <- freq[c("n","miss","level","freq","percent","cum.percent")] + freq <- freq[c("n","miss","p.miss","level","freq","percent","cum.percent")] ## Return result as a data frame return(freq) diff --git a/R/summary.CatTable.R b/R/summary.CatTable.R index 0c1e1d2..052b18f 100644 --- a/R/summary.CatTable.R +++ b/R/summary.CatTable.R @@ -66,8 +66,8 @@ summary.CatTable <- function(object, digits = 1, ...) { DF) ## Format percent and cum.percent - DF[c("percent","cum.percent")] <- - lapply(X = DF[c("percent","cum.percent")], + DF[c("p.miss","percent","cum.percent")] <- + lapply(X = DF[c("p.miss","percent","cum.percent")], FUN = sprintf, fmt = fmt) @@ -77,7 +77,7 @@ summary.CatTable <- function(object, digits = 1, ...) { FUN = as.character) ## Delete n and miss except in the first row - DF[-1, c("var","n","miss")] <- "" + DF[-1, c("var","n","miss","p.miss")] <- "" ## row bind an empty row DF <- rbind(DF, From 7f8fde8ddb6841f61667289897ad85778e7ae611 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 20:43:19 -0500 Subject: [PATCH 19/31] p.miss for Cont in percentage --- R/CreateContTable.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CreateContTable.R b/R/CreateContTable.R index bc6e5e0..8bb32f1 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -139,7 +139,7 @@ CreateContTable <- ## Create a list of default functions functions <- c("n" = function(x) {length(x)}, "miss" = function(x) {sum(is.na(x))}, - "p.miss" = function(x) {sum(is.na(x)) / length(x)}, + "p.miss" = function(x) {(sum(is.na(x)) / length(x)) * 100}, "mean" = function(x) {mean(x, na.rm = TRUE)}, "sd" = function(x) {sd(x, na.rm = TRUE)}, "median" = function(x) {median(x, na.rm = TRUE)}, From fc93a3c73b57af51763ec0db7198a8c817f30673 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 21:46:52 -0500 Subject: [PATCH 20/31] ModuleHandleDefaultOrAlternative for unified handling of nonnormal and exact TRUE/FALSE or character vector argument --- R/modules.R | 41 +++++++++++++++++++++++++++++++++++++ R/print.CatTable.R | 44 ++++++++++----------------------------- R/print.ContTable.R | 50 +++++++++++++-------------------------------- 3 files changed, 66 insertions(+), 69 deletions(-) diff --git a/R/modules.R b/R/modules.R index d4ac57c..4b14928 100644 --- a/R/modules.R +++ b/R/modules.R @@ -202,6 +202,47 @@ ModuleConvertNonNormal <- function(rowMat, digits, minMax = FALSE) { return(out) } + +## Module to handle TRUE/FALSE or character vector of variable names +## Returns a numeric vector: 1 for default action variable; 2 for alternative action variable +ModuleHandleDefaultOrAlternative <- function(switchVec, nameOfSwitchVec, varNames) { + + ## Check the number of variables + nVars <- length(varNames) + + ## If null, do default print/test for all variables + if (is.null(switchVec)) { + ## Give one as many as there are variables + switchVec <- rep(1, nVars) + + } else { + ## If not null, it needs checking. + + ## Check the switchVec argument + if (!is.logical(switchVec) & !is.character(switchVec)) { + stop(paste0(nameOfSwitchVec, " argument has to be FALSE/TRUE or a character vector of variable names.")) + } + ## Extend if it is a logitcal vector with one element. + if (is.logical(switchVec)) { + + if (length(switchVec) != 1) { + stop(paste0(nameOfSwitchVec, " has to be a logical vector of length 1")) + } + + switchVec <- rep(switchVec, nVars) + } + ## Convert to a logical vector if it is a character vector + if (is.character(switchVec)) { + switchVec <- varNames %in% switchVec + } + ## Convert to numeric (1 for default action, 2 for alternative actions) + switchVec <- as.numeric(switchVec) + 1 + } + + return(switchVec) +} + + ### Modules by both print and summary methods ## ModuleQuoteAndPrintMat() ## Takes an matrix object format, print, and (invisibly) return it diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 6b31fbb..68d93d2 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -110,36 +110,14 @@ print.CatTable <- function(x, # CatTable object ## Save variable names using the first non-null element varNames <- names(CatTable[[posFirstNonNullElement]]) ## Check the number of variables (list length) - nVar <- length(varNames) + nVars <- length(varNames) - ## If null, do approx - if (is.null(exact)) { - exact <- rep(1, nVar) - - } else { - ## If not null, it needs checking. - - ## Check the exact argument - if (!is.logical(exact) & !is.character(exact)) { - stop("exact argument has to be FALSE/TRUE or character.") - } - ## Extend if it is a logitcal vector with one element. - if (is.logical(exact)) { - - if (length(exact) != 1) { - stop("exact has to be a logical vector of length 1") - } - - exact <- rep(exact, nVar) - } - ## Convert to a logical vector if it is a character vector - if (is.character(exact)) { - exact <- varNames %in% exact - } - ## Convert to numeric (1 for approx, 2 for exact) - exact <- as.numeric(exact) + 1 - } + ## Returns a numeric vector: 1 for approx test variable; 2 for exact test variable + exact <- ModuleHandleDefaultOrAlternative(switchVec = exact, + nameOfSwitchVec = "exact", + varNames = varNames) + ## Check format argument. If it is broken, choose "fp" for frequency (percent) if (!length(format) == 1 | !format %in% c("fp","f","p","pf")) { @@ -151,15 +129,15 @@ print.CatTable <- function(x, # CatTable object ## Added as the top row later strataN <- sapply(CatTable, FUN = function(stratum) { # loop over strata - ## Just the first available element may be enough. - ## Obtain n from all variables and all levels, and get the mean + ## each stratum is a list of one data frame for each variable + ## Obtain n from all variables and all levels n <- unlist(sapply(stratum, getElement, "n")) ## Pick the first non-null element n[!is.null(n)][1] - ## Convert NULL to N + ## Convert NULL to 0 ifelse(is.null(n), "0", as.character(n)) }, - simplify = TRUE) + simplify = TRUE) # vector with as many elements as strata ## Provide indicators to show what columns were added. wasLevelColumnAdded <- FALSE @@ -391,7 +369,7 @@ print.CatTable <- function(x, # CatTable object ## Add p-values when requested and available if (test == TRUE & !is.null(attr(CatTable, "pValues"))) { - ## nVariables x 2 (pNormal,pNonNormal) data frame + ## nVarsiables x 2 (pNormal,pNonNormal) data frame pValues <- attr(CatTable, "pValues") ## Pick ones specified in exact (a vector with 1s(approx) and 2s(exact)) diff --git a/R/print.ContTable.R b/R/print.ContTable.R index 7d3d98b..6701335 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -102,38 +102,16 @@ print.ContTable <- function(x, # ContTable object posFirstNonNullElement <- which(!sapply(ContTable, is.null))[1] ## Save variable names using the first non-null element varNames <- rownames(ContTable[[posFirstNonNullElement]]) - ## Check the number of rows - nRows <- length(varNames) + ## Check the number of variables + nVars <- length(varNames) - ## If null, do normal print/test - if (is.null(nonnormal)) { - ## Give one as many as there are rows - nonnormal <- rep(1, nRows) - } else { - ## If not null, it needs checking. - - ## Check the nonnormal argument - if (!is.logical(nonnormal) & !is.character(nonnormal)) { - stop("nonnormal argument has to be FALSE/TRUE or character.") - } - ## Extend if it is a logitcal vector with one element. - if (is.logical(nonnormal)) { - - if (length(nonnormal) != 1) { - stop("nonormal has to be a logical vector of length 1") - } - - nonnormal <- rep(nonnormal, nRows) - } - ## Convert to a logical vector if it is a character vector - if (is.character(nonnormal)) { - nonnormal <- varNames %in% nonnormal - } - ## Convert to numeric (1 for normal, 2 for nonnormal) - nonnormal <- as.numeric(nonnormal) + 1 - } + ## Returns a numeric vector: 1 for normal variable; 2 for nonnormal variable + nonnormal <- ModuleHandleDefaultOrAlternative(switchVec = nonnormal, + nameOfSwitchVec = "nonnormal", + varNames = varNames) + ## Check the statistics. If necessary statistics are lacking abort statNames <- colnames(ContTable[[posFirstNonNullElement]]) funcDefault <- c("n","miss","mean","sd","median","p25","p75") @@ -148,15 +126,15 @@ print.ContTable <- function(x, # ContTable object ## Added as the top row later strataN <- sapply(ContTable, FUN = function(stratum) { # loop over strata - ## Just the first available element may be enough. - ## Obtain n from all variables and all levels, and get the mean + ## each strutum is a data frame with one row for each variable + ## Obtain n from all variables n <- stratum[,"n"] ## Pick the first non-null element n[!is.null(n)][1] - ## Convert NULL to N + ## Convert NULL to 0 ifelse(is.null(n), "0", as.character(n)) }, - simplify = TRUE) + simplify = TRUE) # vector with as many elements as strata ## Provide indicators to show what columns were added. wasPValueColumnAdded <- FALSE @@ -179,7 +157,7 @@ print.ContTable <- function(x, # ContTable object ## Create a list of these two functions listOfFunctions <- list(normal = ConvertNormal, nonnormal = ConvertNonNormal) - ## Take functions from the 2-element list, and convert to an nRows-length list + ## Take functions from the 2-element list, and convert to an nVars-length list listOfFunctions <- listOfFunctions[nonnormal] ## Loop over strata (There may be just one) @@ -188,14 +166,14 @@ print.ContTable <- function(x, # ContTable object ## In an empty stratum, return empty if (is.null(stratum)) { - out <- rep("-", nRows) + out <- rep("-", nVars) ## Give NA to the width of the mean/median column in characters nCharMeanOrMedian <- NA } else { ## Apply row by row within each non-empty stratum ## This row-by-row operation is necessary to handle mean (sd) and median [IQR] - out <- sapply(seq_len(nRows), + out <- sapply(seq_len(nVars), FUN = function(i) { ## Choose between normal or nonnormal function From f6d6c49935cb88d3e1a8aa3d739a5490f75e39e1 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 22:04:11 -0500 Subject: [PATCH 21/31] Made firstRowInd to keep var,n,miss,pmiss non-redundant (one instance after if/else if/else) --- R/print.CatTable.R | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 68d93d2..7d2b8b0 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -117,7 +117,7 @@ print.CatTable <- function(x, # CatTable object exact <- ModuleHandleDefaultOrAlternative(switchVec = exact, nameOfSwitchVec = "exact", varNames = varNames) - + ## Check format argument. If it is broken, choose "fp" for frequency (percent) if (!length(format) == 1 | !format %in% c("fp","f","p","pf")) { @@ -130,7 +130,7 @@ print.CatTable <- function(x, # CatTable object strataN <- sapply(CatTable, FUN = function(stratum) { # loop over strata ## each stratum is a list of one data frame for each variable - ## Obtain n from all variables and all levels + ## Obtain n from all variables and all levels (list of data frames) n <- unlist(sapply(stratum, getElement, "n")) ## Pick the first non-null element n[!is.null(n)][1] @@ -195,23 +195,21 @@ print.CatTable <- function(x, # CatTable object ## If showAllLevels is FALSE AND there are only ONE levels, ## change variable name to "var = level" DF$var <- with(DF, paste0(var, " = ", level)) - ## Add first row indicator (used to add (%)) - DF[1,"firstRowInd"] <- "first" } else if (!showAllLevels & nRow == 2) { - ## cram results in one row if requested + ## cram two levels in one row if requested if (unique(DF$var) %in% cramVars) { - ## If cramVars is true. Cram in one line + ## If cramVars includes var, cram into one line ## Cram two freq and count with / in between DF$freq <- paste0(DF$freq, collapse = "/") DF$percent <- paste0(DF$percent, collapse = "/") + ## change variable name, and delete the first level. DF$var <- paste0(DF$var, " = ", paste0(DF$level, collapse = "/")) ## Delete the first row DF <- DF[-1, , drop = FALSE] - ## Add first row indicator (used to add (%)) - DF[1,"firstRowInd"] <- "first" + ## Add crammed row indicator (used for formatting) DF[1,"crammedRowInd"] <- "crammed" } else { @@ -220,8 +218,6 @@ print.CatTable <- function(x, # CatTable object ## change variable name, and delete the first level. DF$var <- with(DF, paste0(var, " = ", level)) DF <- DF[-1, , drop = FALSE] - ## Add first row indicator (used to add (%)) - DF[1,"firstRowInd"] <- "first" } } else if (!showAllLevels & nRow > 2) { @@ -236,16 +232,15 @@ print.CatTable <- function(x, # CatTable object secondToLastRows <- seq(from = 2,to = nrow(DF), by = 1) DF[secondToLastRows, "var"] <- paste0(" ", DF[secondToLastRows, "level"]) # preceding spaces - ## Add first row indicator (used to add (%)) - DF[1,"firstRowInd"] <- "first" } else if (showAllLevels) { - ## If showAllLevels is TRUE clear names - DF[-1, c("var","n","miss")] <- "" - ## Add first row indicator (used to add (%)) - DF[1,"firstRowInd"] <- "first" + ## If showAllLevels is TRUE, clear these except in 1st row + DF[-1, c("var","n","miss","p.miss")] <- "" } + ## Add first row indicator (used to add (%)) + DF[1,"firstRowInd"] <- "first" + ## Return a data frame DF }, From 291b9aeac0445dafa76cd168e9dc03ffd47216e6 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Sun, 16 Feb 2014 22:35:26 -0500 Subject: [PATCH 22/31] cleaning code (browser on for justification checking) --- R/print.CatTable.R | 161 ++++++++++++++++++++++---------------------- R/print.ContTable.R | 2 +- 2 files changed, 81 insertions(+), 82 deletions(-) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 7d2b8b0..5dfce9e 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -159,94 +159,93 @@ print.CatTable <- function(x, # CatTable object if (!is.null(LIST)) { ## Returns an empty list if the stratum is null (empty). - LIST <- sapply(X = seq_along(LIST), # Loop over variables (list element is DF) - FUN = function(i) { - - ## Extract the data frame (list element) - DF <- LIST[[i]] - - ## Extract the variable name - varName <- names(LIST)[i] - - ## Check number of rows (levels) - nRow <- nrow(DF) + LIST <- + sapply(X = seq_along(LIST), # Loop over variables (list element is DF) + FUN = function(i) { + + ## Extract the data frame (list element) + DF <- LIST[[i]] + + ## Extract the variable name + varName <- names(LIST)[i] + + ## Check number of rows (levels) + nRow <- nrow(DF) + + ## Add a variable name to the left as a character vector + DF <- cbind(var = rep(varName, nRow), + DF) + + ## Format percent and cum.percent as strings + DF[c("p.miss","percent","cum.percent")] <- + lapply(X = DF[c("p.miss","percent","cum.percent")], + FUN = sprintf, + fmt = fmt1) + + + ## Make all variables strings (freq is an integer, so direct convert ok) + DF[] <- lapply(X = DF, FUN = as.character) + + ## Add first row indicator column + DF$firstRowInd <- "" + ## Add crammed row indicator column + DF$crammedRowInd <- "" + + ## Format based on the number of levels + if (!showAllLevels & nRow == 1) { + ## If showAllLevels is FALSE AND there are only ONE levels, + ## change variable name to "var = level" + DF$var <- with(DF, paste0(var, " = ", level)) + + } else if (!showAllLevels & nRow == 2) { + ## If showAllLevels is FALSE AND there are only TWO levels, + ## cram two levels in one row if requested + if (unique(DF$var) %in% cramVars) { + ## If cramVars includes var, cram into one line + ## Cram two freq and count with / in between + DF$freq <- paste0(DF$freq, collapse = "/") + DF$percent <- paste0(DF$percent, collapse = "/") + ## change variable name, and delete the first level. + DF$var <- paste0(DF$var, " = ", + paste0(DF$level, collapse = "/")) + ## Delete the first row + DF <- DF[-1, , drop = FALSE] + ## Add crammed row indicator (used for formatting) + DF[1,"crammedRowInd"] <- "crammed" + } else { + ## Otherwise, keep the second level only + ## change variable name, and delete the first level. + DF$var <- with(DF, paste0(var, " = ", level)) + DF <- DF[-1, , drop = FALSE] + } - ## Add a variable name to the left as a character vector - DF <- cbind(var = rep(varName, nRow), + } else if (!showAllLevels & nRow > 2) { + ## If showAllLevels is FALSE AND there are MORE THAN two levels, + ## add an empty row and put the var name, then levels below. + DF <- rbind(rep("", ncol(DF)), DF) + ## Add variable name in the first row + DF[1,"var"] <- DF[2,"var"] - ## Format percent and cum.percent as strings - DF[c("percent","cum.percent")] <- - lapply(X = DF[c("percent","cum.percent")], - FUN = sprintf, - fmt = fmt1) - - - ## Make all variables strings (freq is an integer, so direct convert ok) - DF[] <- lapply(X = DF, FUN = as.character) - - ## Add first row indicator column - DF$firstRowInd <- "" - ## Add crammed row indicator column - DF$crammedRowInd <- "" - - ## Format based on the number of levels - if (!showAllLevels & nRow == 1) { - ## If showAllLevels is FALSE AND there are only ONE levels, - ## change variable name to "var = level" - DF$var <- with(DF, paste0(var, " = ", level)) - - } else if (!showAllLevels & nRow == 2) { - - ## cram two levels in one row if requested - if (unique(DF$var) %in% cramVars) { - ## If cramVars includes var, cram into one line - ## Cram two freq and count with / in between - DF$freq <- paste0(DF$freq, collapse = "/") - DF$percent <- paste0(DF$percent, collapse = "/") - ## change variable name, and delete the first level. - DF$var <- paste0(DF$var, " = ", - paste0(DF$level, collapse = "/")) - ## Delete the first row - DF <- DF[-1, , drop = FALSE] - ## Add crammed row indicator (used for formatting) - DF[1,"crammedRowInd"] <- "crammed" - - } else { - ## Otherwise, keep the second level only - ## If showAllLevels is FALSE AND there are only TWO levels, - ## change variable name, and delete the first level. - DF$var <- with(DF, paste0(var, " = ", level)) - DF <- DF[-1, , drop = FALSE] - } - - } else if (!showAllLevels & nRow > 2) { - ## If showAllLevels is FALSE AND there are MORE THAN two levels, - ## add an empty row and put the var name, then levels below. - DF <- rbind(rep("", ncol(DF)), - DF) - ## Add variable name in the first row - DF[1,"var"] <- DF[2,"var"] - - ## 2nd to last have level names. (nrow has changed by +1) - secondToLastRows <- seq(from = 2,to = nrow(DF), by = 1) - DF[secondToLastRows, "var"] <- - paste0(" ", DF[secondToLastRows, "level"]) # preceding spaces - - } else if (showAllLevels) { - ## If showAllLevels is TRUE, clear these except in 1st row - DF[-1, c("var","n","miss","p.miss")] <- "" - } + ## 2nd to last have level names. (nrow has changed by +1) + secondToLastRows <- seq(from = 2,to = nrow(DF), by = 1) + DF[secondToLastRows, "var"] <- + paste0(" ", DF[secondToLastRows, "level"]) # preceding spaces - ## Add first row indicator (used to add (%)) - DF[1,"firstRowInd"] <- "first" + } else if (showAllLevels) { + ## If showAllLevels is TRUE, clear these except in 1st row + DF[-1, c("var","n","miss","p.miss")] <- "" + } - ## Return a data frame - DF - }, - simplify = FALSE) + ## Add first row indicator (used to add (%)) + DF[1,"firstRowInd"] <- "first" + ## Return a data frame + DF + }, + simplify = FALSE) # Looped over variables (list element is DF) + browser() ## Collapse DFs within each stratum DF <- do.call(rbind, LIST) diff --git a/R/print.ContTable.R b/R/print.ContTable.R index 6701335..478cffe 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -127,7 +127,7 @@ print.ContTable <- function(x, # ContTable object strataN <- sapply(ContTable, FUN = function(stratum) { # loop over strata ## each strutum is a data frame with one row for each variable - ## Obtain n from all variables + ## Obtain n from all variables (matrix) n <- stratum[,"n"] ## Pick the first non-null element n[!is.null(n)][1] From 3f55fa066d030a28b13dcf26b7120ea8d4e82f7e Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Mon, 17 Feb 2014 00:23:09 -0500 Subject: [PATCH 23/31] justification scheme corrected for $CatTable --- R/print.CatTable.R | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 5dfce9e..df7d947 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -245,7 +245,7 @@ print.CatTable <- function(x, # CatTable object }, simplify = FALSE) # Looped over variables (list element is DF) - browser() + ## Collapse DFs within each stratum DF <- do.call(rbind, LIST) @@ -253,34 +253,40 @@ print.CatTable <- function(x, # CatTable object ## Check non-empty rows posNonEmptyRows <- DF$freq != "" - ## Right justify frequency (crammed and non crammed separately) - DF[DF$crammedRowInd == "crammed","freq"] <- - format(DF[DF$crammedRowInd == "crammed","freq"], justify = "right") - DF[DF$crammedRowInd == "","freq"] <- - format(DF[DF$crammedRowInd == "","freq"], justify = "right") + ## Create freq to be added on to the right side within () + DF$freqAddOn <- DF$freq + ## Right justify frequency (crammed and non-crammed at once) + DF$freq <- format(DF$freq, justify = "right") + ## Right justify frequency (non-crammed only) + DF[DF$crammedRowInd == "","freqAddOn"] <- + format(DF[DF$crammedRowInd == "","freqAddOn"], justify = "right") ## Obtain the max width of characters nCharFreq <- max(nchar(DF$freq)) - ## Right justify percent - DF[DF$crammedRowInd == "crammed","percent"] <- - format(DF[DF$crammedRowInd == "crammed","percent"], justify = "right") - DF[DF$crammedRowInd == "","percent"] <- - format(DF[DF$crammedRowInd == "","percent"], justify = "right") + + ## Create percent to be added on to the right side within () + DF$percentAddOn <- DF$percent + ## Right justify percent (crammed and non-crammed at once) + DF$percent <- format(DF$percent, justify = "right") + ## Right justify percent (non-crammed only) + DF[DF$crammedRowInd == "","percentAddOn"] <- + format(DF[DF$crammedRowInd == "","percentAddOn"], justify = "right") ## Obtain the max width of characters nCharPercent <- max(nchar(DF$percent)) + ## Add freq (percent) column (only in non-empty rows) DF$freqPer <- "" DF[posNonEmptyRows,]$freqPer <- sprintf(fmt = "%s (%s) ", DF[posNonEmptyRows,]$freq, - DF[posNonEmptyRows,]$percent) + DF[posNonEmptyRows,]$percentAddOn) ## Add percent (freq) column (only in non-empty rows) DF$perFreq <- "" DF[posNonEmptyRows,]$perFreq <- sprintf(fmt = "%s (%s) ", DF[posNonEmptyRows,]$percent, - DF[posNonEmptyRows,]$freq) + DF[posNonEmptyRows,]$freqAddOn) ## Add aditional attributes attributes(DF) <- c(attributes(DF), @@ -294,7 +300,6 @@ print.CatTable <- function(x, # CatTable object }, simplify = FALSE) - ## browser() ### Obtain the original column width in characters for alignment in print.TableOne ## Name of the column to keep From 6937415ad19ad13be91d799e3f78498515209633 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Mon, 17 Feb 2014 00:26:51 -0500 Subject: [PATCH 24/31] explanation for cramLevels arg fixed in print.CatTable --- R/print.CatTable.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/print.CatTable.R b/R/print.CatTable.R index df7d947..a5050aa 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -11,7 +11,7 @@ ##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param format The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). ##' @param showAllLevels Whether to show all levels. FALSE by default, i.e., for 2-level categorical variables, only the higher level is shown to avoid -##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shown in one row. This should be used via \code{\link{print.TableOne}}. +##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shown in one row. ##' @param test Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param exact A character vector to specify the variables for which the p-values should be those of exact tests. By default all p-values are from large sample approximation tests (chisq.test). ##' @param CrossTable Whether to show the cross table objects held internally using gmodels::CrossTable function. This will give an output similar to the PROC FREQ in SAS. From e81aeed07617b4894ad601179cfd127295f8d503 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Mon, 17 Feb 2014 00:53:03 -0500 Subject: [PATCH 25/31] man change only --- man/CreateContTable.Rd | 31 ++++++++++++++++--------------- man/print.CatTable.Rd | 3 +-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/man/CreateContTable.Rd b/man/CreateContTable.Rd index b0d93c5..df99775 100644 --- a/man/CreateContTable.Rd +++ b/man/CreateContTable.Rd @@ -2,10 +2,11 @@ \alias{CreateContTable} \title{Create an object summarizing continous variables} \usage{ -CreateContTable(vars, strata, data, func.names = c("n", "miss", "mean", "sd", - "median", "p25", "p75", "min", "max", "skew", "kurt"), func.additional, - test = TRUE, testNormal = oneway.test, argsNormal = list(var.equal = - TRUE), testNonNormal = kruskal.test, argsNonNormal = list(NULL)) +CreateContTable(vars, strata, data, funcNames = c("n", "miss", "p.miss", + "mean", "sd", "median", "p25", "p75", "min", "max", "skew", "kurt"), + funcAdditional, test = TRUE, testNormal = oneway.test, + argsNormal = list(var.equal = TRUE), testNonNormal = kruskal.test, + argsNonNormal = list(NULL)) } \arguments{ \item{vars}{Variable(s) to be summarized given as a @@ -19,17 +20,17 @@ CreateContTable(vars, strata, data, func.names = c("n", "miss", "mean", "sd", All variables (both vars and strata) must be in this data frame.} - \item{func.names}{The functions to give the group size, - number with missing values, mean, standard deviations, - median, 25th percentile, 75th percentile, minimum, - maximum, skewness (same definition as in SAS), kurtosis - (same definition as in SAS). All of them can be seen in - the summary method output. The print method uses subset - of these. You can choose subset of them or reorder them. - They are all configure to omit NA values (\code{na.rm = - TRUE}).} - - \item{func.additional}{Additional functions can be given + \item{funcNames}{The functions to give the group size, + number with missing values, proportion with missing + values, mean, standard deviations, median, 25th + percentile, 75th percentile, minimum, maximum, skewness + (same definition as in SAS), kurtosis (same definition as + in SAS). All of them can be seen in the summary method + output. The print method uses subset of these. You can + choose subset of them or reorder them. They are all + configure to omit NA values (\code{na.rm = TRUE}).} + + \item{funcAdditional}{Additional functions can be given as a named list. For example, \code{list(sum = sum)}.} \item{test}{If TRUE, as in the default and there are more diff --git a/man/print.CatTable.Rd b/man/print.CatTable.Rd index dabd35f..1264428 100644 --- a/man/print.CatTable.Rd +++ b/man/print.CatTable.Rd @@ -41,8 +41,7 @@ \item{cramVars}{A character vector to specify the two-level categorical variables, for which both levels - should be shown in one row. This should be used via - \code{\link{print.TableOne}}.} + should be shown in one row.} \item{test}{Whether to show the p-values. TRUE by default. If FALSE, only the numerical summaries are From 6086b95295ab498b5c83b79691c82e62facda34b Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Mon, 17 Feb 2014 20:21:21 -0500 Subject: [PATCH 26/31] Stratified by: addition became modular --- R/modules.R | 32 +++++++++++++++++++++++++++----- R/print.CatTable.R | 17 ++--------------- R/print.ContTable.R | 18 +++--------------- 3 files changed, 32 insertions(+), 35 deletions(-) diff --git a/R/modules.R b/R/modules.R index 4b14928..73b5c24 100644 --- a/R/modules.R +++ b/R/modules.R @@ -151,12 +151,12 @@ ModuleCreateStrataVarAsFactor <- function(result, strata) { ## Create a single variable representing all strata strataLevels <- apply(X = dfStrataLevels, MARGIN = 1, FUN = paste0, collapse = ":") ## The length is the number of potential combinations. Used for the levels argument in the next part. - + ## Create the actual variable from the observed levels. NA if any one of the variables is NA. strataVar <- as.character(interaction(strata, sep = ":")) ## Make it a factor (kruskal.test requires it). Use levels not to drop defined nonexisting levels. strataVar <- factor(strataVar, levels = strataLevels) - + ## Return stratifying variable. The length is the number of observations in the dataset. ## NA for subjects with NA for any of the stratifying variables. return(strataVar) @@ -181,12 +181,12 @@ ModuleConvertNormal <- function(rowMat, digits) { ## Define a function to format a nonnormal variable ModuleConvertNonNormal <- function(rowMat, digits, minMax = FALSE) { - + ## Format for [p25, p75] fmt <- paste0(" [%.", digits,"f, %.",digits,"f]") if (minMax == FALSE) { - ## Create a DF with numeric median column and character [p25, p75] column + ## Create a DF with numeric median column and character [p25, p75] column out <- data.frame(col1 = rowMat[,"median"], col2 = sprintf(fmt = fmt, rowMat[,"p25"], rowMat[,"p75"]), stringsAsFactors = FALSE) @@ -209,7 +209,7 @@ ModuleHandleDefaultOrAlternative <- function(switchVec, nameOfSwitchVec, varName ## Check the number of variables nVars <- length(varNames) - + ## If null, do default print/test for all variables if (is.null(switchVec)) { ## Give one as many as there are variables @@ -243,6 +243,28 @@ ModuleHandleDefaultOrAlternative <- function(switchVec, nameOfSwitchVec, varName } +## Module to return the dimention headers added to the out 2d matrix +ModuleReturnDimHeaders <- function(TableObject) { + + ## Add stratification information to the column header + if (length(TableObject) > 1 ) { + ## Create strata string + strataString <- paste0("Stratified by ", + paste0(names(attr(TableObject, "dimnames")), collapse = ":")) + + ## Name the row dimension with it. 1st dimension name should be empty. + dimHeaders <- c("", strataString) + + } else { + ## If no stratification, no name for the second dimension + dimHeaders <- c("", "") + } + + ## Return the dim header a vector of length 2 + return(dimHeaders) +} + + ### Modules by both print and summary methods ## ModuleQuoteAndPrintMat() ## Takes an matrix object format, print, and (invisibly) return it diff --git a/R/print.CatTable.R b/R/print.CatTable.R index a5050aa..be420b6 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -443,21 +443,8 @@ print.CatTable <- function(x, # CatTable object ## Put back the column names (overkill for non-multivariable cases) colnames(out) <- outColNames - ## Add stratification information to the column header (This is also in the constructor) - if (length(CatTable) > 1 ) { - ## Combine variable names with : in between - strataVarName <- attributes(CatTable)$strataVarName - - ## Create strata string - strataString <- paste0("Stratified by ", strataVarName) - - ## Name the row dimension with it. 1st dimension name should be empty. - names(dimnames(out)) <- c("", strataString) - } else { - - names(dimnames(out)) <- c("", "") - } - + ## Add stratification information to the column header depending on the dimension + names(dimnames(out)) <- ModuleReturnDimHeaders(CatTable) ## Modular version of quote/print toggle. out <- ModuleQuoteAndPrintMat(matObj = out, diff --git a/R/print.ContTable.R b/R/print.ContTable.R index 478cffe..e51f003 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -111,7 +111,7 @@ print.ContTable <- function(x, # ContTable object nameOfSwitchVec = "nonnormal", varNames = varNames) - + ## Check the statistics. If necessary statistics are lacking abort statNames <- colnames(ContTable[[posFirstNonNullElement]]) funcDefault <- c("n","miss","mean","sd","median","p25","p75") @@ -294,7 +294,6 @@ print.ContTable <- function(x, # ContTable object ## Add mean (sd) or median [IQR]/median [range] explanation if requested if (explain) { - ## Create a vector of explanations to be pasted if (minMax == FALSE) { what <- c(" (mean (sd))"," (median [IQR])")[nonnormal] @@ -316,19 +315,8 @@ print.ContTable <- function(x, # ContTable object ## Put back the column names (overkill for non-multivariable cases) colnames(out) <- outColNames - ## Add stratification information to the column header - if (length(ContTable) > 1 ) { - ## Create strata string - strataString <- paste0("Stratified by ", - paste0(names(attr(ContTable, "dimnames")), collapse = ":")) - - ## Name the row dimension with it. 1st dimension name should be empty. - names(dimnames(out)) <- c("", strataString) - } else { - - names(dimnames(out)) <- c("", "") - } - + ## Add stratification information to the column header depending on the dimension + names(dimnames(out)) <- ModuleReturnDimHeaders(ContTable) ## (module) Takes an matrix object format, print if requested out <- ModuleQuoteAndPrintMat(matObj = out, From bbda8c4833094f343a0f35ab4cd7ef0c8b703386 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Mon, 17 Feb 2014 20:43:59 -0500 Subject: [PATCH 27/31] Made p value pick and format part modular. Always have test type column --- R/modules.R | 33 +++++++++++++++++++++++++++++ R/print.CatTable.R | 51 ++++++++++++--------------------------------- R/print.ContTable.R | 51 ++++++++++++--------------------------------- 3 files changed, 59 insertions(+), 76 deletions(-) diff --git a/R/modules.R b/R/modules.R index 73b5c24..ec7e889 100644 --- a/R/modules.R +++ b/R/modules.R @@ -243,6 +243,39 @@ ModuleHandleDefaultOrAlternative <- function(switchVec, nameOfSwitchVec, varName } +## p-value picker/formatter +ModulePickAndFormatPValues <- function(TableObject, switchVec, pDigits) { + + ## nVarsiables x 2 (pNormal,pNonNormal) data frame + pValues <- attr(TableObject, "pValues") + + ## Pick ones specified in exact (a vector with 1s(approx) and 2s(exact)) + pValues <- sapply(seq_along(switchVec), # loop over exact + FUN = function(i) { + ## Pick from a matrix i-th row, exact[i]-th column + ## Logical NA must be converted to a numeric + as.numeric(pValues[i, switchVec[i]]) + }, + simplify = TRUE) + + ## Format p value + fmt <- paste0("%.", pDigits, "f") + pVec <- sprintf(fmt = fmt, pValues) + + ## Create a string like <0.001 + smallPString <- paste0("<0.", paste0(rep("0", pDigits - 1), collapse = ""), "1") + ## Check positions where it is all zero like 0.000 + posAllZeros <- grepl("^0\\.0*$", pVec) + ## Put the string where it is all zero like 0.000 + pVec[posAllZeros] <- smallPString + ## Put a preceding space where it is not like 0.000 + pVec[!posAllZeros] <- paste0(" ", pVec[!posAllZeros]) + + ## Return formatted p-values (as many as there are variables) + return(pVec) +} + + ## Module to return the dimention headers added to the out 2d matrix ModuleReturnDimHeaders <- function(TableObject) { diff --git a/R/print.CatTable.R b/R/print.CatTable.R index be420b6..c94068e 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -368,57 +368,32 @@ print.CatTable <- function(x, # CatTable object ## Add p-values when requested and available if (test == TRUE & !is.null(attr(CatTable, "pValues"))) { - ## nVarsiables x 2 (pNormal,pNonNormal) data frame - pValues <- attr(CatTable, "pValues") - - ## Pick ones specified in exact (a vector with 1s(approx) and 2s(exact)) - pValues <- sapply(seq_along(exact), # loop over exact - FUN = function(i) { - ## Pick from a matrix i-th row, exact[i]-th column - ## Logical NA must be converted to a numeric - as.numeric(pValues[i, exact[i]]) - }, - simplify = TRUE) - - ## Pick test types used + ## Pick test types used (used for annonation) testTypes <- c("","exact")[exact] - ## Format p value - fmt <- paste0("%.", pDigits, "f") - p <- sprintf(fmt = fmt, pValues) - - ## Create a string like <0.001 - smallPString <- paste0("<0.", paste0(rep("0", pDigits - 1), collapse = ""), "1") - ## Check positions where it is all zero like 0.000 - posAllZeros <- grepl("^0\\.0*$", p) - ## Put the string where it is all zero like 0.000 - p[posAllZeros] <- smallPString - ## Put a preceding space where it is not like 0.000 - p[!posAllZeros] <- paste0(" ", p[!posAllZeros]) + ## Pick the p-values requested, and format like <0.001 + pVec <- ModulePickAndFormatPValues(TableObject = CatTable, + switchVec = exact, + pDigits = pDigits) ## Create an empty p-value column and test column out <- cbind(out, p = rep("", nrow(out))) # Column for p-values ## Put the values at the non-empty positions - out[logiNonEmptyRowNames,"p"] <- p + out[logiNonEmptyRowNames,"p"] <- pVec ## Change the indicator wasPValueColumnAdded <- TRUE - ## If exact test is used at least onece, add a test type indicator. - ## if (any(exact == 2)) { - if (TRUE) { - ## Create an empty test type column - out <- cbind(out, - test = rep("", nrow(out))) # Column for test types - - ## Put the test types at the non-empty positions - out[logiNonEmptyRowNames,"test"] <- testTypes + ## Create an empty test type column, and add test types + out <- cbind(out, + test = rep("", nrow(out))) # Column for test types + ## Put the test types at the non-empty positions (all rows in continuous!) + out[logiNonEmptyRowNames,"test"] <- testTypes - ## Change the indicator - wasExactColumnAdded <- TRUE - } + ## Change the indicator + wasExactColumnAdded <- TRUE } diff --git a/R/print.ContTable.R b/R/print.ContTable.R index e51f003..386873c 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -241,54 +241,29 @@ print.ContTable <- function(x, # ContTable object ## Add p-values when requested and available if (test == TRUE & !is.null(attr(ContTable, "pValues"))) { - ## nVariables x 2 (pNormal,pNonNormal) data frame - pValues <- attr(ContTable, "pValues") - - ## Pick ones specified in nonnormal (a vector with 1s(normal) and 2s(nonnormal)) - pValues <- sapply(seq_along(nonnormal), # loop over nonnormal - FUN = function(i) { - ## Pick from a matrix i-th row, nonnormal[i]-th column - ## Logical NA must be converted to a numeric - as.numeric(pValues[i, nonnormal[i]]) - }, - simplify = TRUE) - - ## Pick test types used + ## Pick test types used (used for annonation) testTypes <- c("","nonnorm")[nonnormal] - ## Format - fmt <- paste0("%.", pDigits, "f") - p <- sprintf(fmt = fmt, pValues) - - ## Create a string like <0.001 - smallPString <- paste0("<0.", paste0(rep("0", pDigits - 1), collapse = ""), "1") - ## Check positions where it is all zero like 0.000 - posAllZeros <- grepl("^0\\.0*$", p) - ## Put the string where it is all zero like 0.000 - p[posAllZeros] <- smallPString - ## Put a preceding space where it is not like 0.000 - p[!posAllZeros] <- paste0(" ", p[!posAllZeros]) + ## Pick the p-values requested, and format like <0.001 + pVec <- ModulePickAndFormatPValues(TableObject = ContTable, + switchVec = nonnormal, + pDigits = pDigits) ## Column combine with the output - out <- cbind(out, p = p) + out <- cbind(out, p = pVec) ## Change the indicator wasPValueColumnAdded <- TRUE - ## If nonormal test is used at least onece, add a test type indicator. - ## if (any(nonormal == 2)) { - if (TRUE) { - ## Create an empty test type column - out <- cbind(out, - test = rep("", nrow(out))) # Column for test types - - ## Put the test types at the non-empty positions (all rows in continuous!) - out[ ,"test"] <- testTypes + ## Create an empty test type column, and add test types + out <- cbind(out, + test = rep("", nrow(out))) # Column for test types + ## Put the test types at the non-empty positions (all rows in continuous!) + out[ ,"test"] <- testTypes - ## Change the indicator - wasNonNormalColumnAdded <- TRUE - } + ## Change the indicator + wasNonNormalColumnAdded <- TRUE } From 0c6f4666c21e7d9d83abb3efa44292cf32e49614 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Mon, 17 Feb 2014 21:32:31 -0500 Subject: [PATCH 28/31] column names (strata names) made modular --- R/modules.R | 13 +++++++++++++ R/print.CatTable.R | 7 ++----- R/print.ContTable.R | 6 +----- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/R/modules.R b/R/modules.R index ec7e889..50af9db 100644 --- a/R/modules.R +++ b/R/modules.R @@ -243,6 +243,19 @@ ModuleHandleDefaultOrAlternative <- function(switchVec, nameOfSwitchVec, varName } +## Column name formatter (strata names. "Overvall" if only one preset) +ModuleCreateStrataNames <- function(TableObject) { + + ## Create all combinations and collapse as strings + strataNames <- apply(expand.grid(attr(TableObject, "dimnames")), + MARGIN = 1, + paste0, collapse = ":") + + ## Return the names as a vector + return(strataNames) +} + + ## p-value picker/formatter ModulePickAndFormatPValues <- function(TableObject, switchVec, pDigits) { diff --git a/R/print.CatTable.R b/R/print.CatTable.R index c94068e..1cf03fd 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -342,13 +342,10 @@ print.CatTable <- function(x, # CatTable object ## Add column names if multivariable stratification is used. (No column names added automatically) if (length(attr(CatTable, "dimnames")) > 1) { - colnames(out) <- - ## Create all combinations and collapse as strings. 1st variable cycles fastest. - apply(expand.grid(attr(CatTable, "dimnames")), - MARGIN = 1, - paste0, collapse = ":") + colnames(out) <- ModuleCreateStrataNames(CatTable) } + ## Set the variables names rownames(out) <- CatTableCollapsed[[posFirstNonNullElement]][,"var"] ## Get positions of rows with variable names diff --git a/R/print.ContTable.R b/R/print.ContTable.R index 386873c..495415c 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -230,11 +230,7 @@ print.ContTable <- function(x, # ContTable object ## Add column names if multivariable stratification is used. if (length(attr(ContTable, "dimnames")) > 1) { - colnames(out) <- - ## Create all combinations and collapse as strings - apply(expand.grid(attr(ContTable, "dimnames")), - MARGIN = 1, - paste0, collapse = ":") + colnames(out) <- ModuleCreateStrataNames(ContTable) } From 03c347b60ed233b03adb24c8e4f2f2f5c2bab59f Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Mon, 17 Feb 2014 21:48:35 -0500 Subject: [PATCH 29/31] annotated modules.R --- R/modules.R | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/R/modules.R b/R/modules.R index 50af9db..79afb72 100644 --- a/R/modules.R +++ b/R/modules.R @@ -7,12 +7,14 @@ ### Modules intended for the constructors ################################################################################ +## Check if the data given is a data frame ModuleStopIfNotDataFrame <- function(data) { if (is.data.frame(data) == FALSE) { stop("The data argument needs to be a data frame (no quote).") } } + ## Extract variables that exist in the data frame ModuleReturnVarsExist <- function(vars, data) { @@ -28,11 +30,13 @@ ModuleReturnVarsExist <- function(vars, data) { ## Return existing variables return(vars) } + ## Stop if not vars are left ModuleStopIfNoVarsLeft <- function(vars) { if (length(vars) < 1) {stop("No valid variables.")} } -## + +## Toggle test FALSE if no strata are given ModuleReturnFalseIfNoStrata <- function(strata, test) { # Give strata variable names if(missing(strata)) { @@ -41,7 +45,8 @@ ModuleReturnFalseIfNoStrata <- function(strata, test) { # Give strata variable n } return(test) } -## Check statra variables and conditionally create + +## Check statra variables and conditionally create strata data frame ModuleReturnStrata <- function(strata, data, dat) { # Give strata variable names if(missing(strata)) { @@ -65,6 +70,7 @@ ModuleReturnStrata <- function(strata, data, dat) { # Give strata variable n ## return DF with strata variable(s) return(strata) } + ## Module to create a table for one categorical variable ## Taken from Deducer::frequencies() ModuleCreateTableForOneVar <- function(x) { # Give a vector @@ -100,7 +106,7 @@ ModuleCreateTableForOneVar <- function(x) { # Give a vector return(freq) } -## Create StrataVarName from multiple dimension headers +## Create StrataVarName from multiple dimension headers, for example sex:trt ModuleCreateStrataVarName <- function(obj) { ## Combine variable names with : in between paste0(names(attr(obj, "dimnames")), collapse = ":") @@ -120,10 +126,8 @@ ModuleTryCatchWE <- function(expr) { } ## Function to perform non-failing tests (obj should be xtabs or formula) -## Function has to have $p.value element -## Consider additional options by do.call() ModuleTestSafe <- function(obj, testFunction, testArgs = NULL) { - + ## Result from a function has to have $p.value element out <- ModuleTryCatchWE(do.call(testFunction, args = c(list(obj), testArgs))$p.value) ## If it returns a numeric value, return it. Otherwise, return NA. ifelse(is.numeric(out$value), out$value, NA) @@ -142,7 +146,7 @@ ModuleSasKurtosis <- function(x) { } -## Create a single variable representation of multivariable stratification +## Create a single variable representation of multivariable stratification for individuals ## result: by object; strata: data frame of stratifying variable(s) ModuleCreateStrataVarAsFactor <- function(result, strata) { @@ -243,7 +247,7 @@ ModuleHandleDefaultOrAlternative <- function(switchVec, nameOfSwitchVec, varName } -## Column name formatter (strata names. "Overvall" if only one preset) +## Column name formatter ModuleCreateStrataNames <- function(TableObject) { ## Create all combinations and collapse as strings @@ -328,7 +332,6 @@ ModuleQuoteAndPrintMat <- function(matObj, quote = FALSE, printToggle = TRUE) { names(dimnames(matObj))[1] <- paste0(" ", names(dimnames(matObj))[1]) } - ## print if required and return if (printToggle) { From 25adf33d50f20c3595c245c2add6602fad2ecca7 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Mon, 17 Feb 2014 21:50:38 -0500 Subject: [PATCH 30/31] at_param all made one line (easier for grep) --- R/CreateCatTable.R | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index 4d27973..2a428b6 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -6,22 +6,12 @@ ##' also \code{\link{print.CatTable}} and \code{\link{summary.CatTable}}. ##' ##' @param vars Variable(s) to be summarized given as a character vector. -##' @param strata Stratifying (grouping) variable name(s) given as a character -##' vector. If omitted, the overall results are returned. -##' @param data A data frame in which these variables exist. All variables -##' (both vars and strata) must be in this data frame. -##' @param test If TRUE, as in the default and there are more than two groups, -##' groupwise comparisons are performed. Both tests that require the large -##' sample approximation and exact tests are performed. Either one of the -##' result can be obtained from the print method. -##' @param testApprox A function used to perform the large sample approximation -##' based tests. The default is \code{\link{chisq.test}}. This is not recommended when some -##' of the cell have small counts like fewer than 5. +##' @param strata Stratifying (grouping) variable name(s) given as a character vector. If omitted, the overall results are returned. +##' @param data A data frame in which these variables exist. All variables (both vars and strata) must be in this data frame. +##' @param test If TRUE, as in the default and there are more than two groups, groupwise comparisons are performed. Both tests that require the large sample approximation and exact tests are performed. Either one of the result can be obtained from the print method. +##' @param testApprox A function used to perform the large sample approximation based tests. The default is \code{\link{chisq.test}}. This is not recommended when some of the cell have small counts like fewer than 5. ##' @param argsApprox A named list of arguments passed to the function specified in testApprox. The default is \code{list(correct = TRUE)}, which turns on the continuity correction for \code{\link{chisq.test}}. -##' @param testExact A function used to perform the exact tests. The default is -##' fisher.test. If the cells have large numbers, it will fail because of -##' memory limitation. In this situation, the large sample approximation based -##' should suffice. +##' @param testExact A function used to perform the exact tests. The default is fisher.test. If the cells have large numbers, it will fail because of memory limitation. In this situation, the large sample approximation based should suffice. ##' @param argsExact A named list of arguments passed to the function specified in testExact. The default is \code{list(workspace = 2*10^5)}, which specifies the memory space allocated for \code{\link{fisher.test}}. ##' @return An object of class \code{CatTable}, which really is a \code{\link{by}} object with additional attributes. Each element of the \code{\link{by}} part is a matrix with rows representing variables, and columns representing summary statistics. ##' @author Kazuki Yoshida (based on \code{Deducer::frequencies()}) From b8e13e7ab4d343c9e1c4ff48d6da87f7afd89251 Mon Sep 17 00:00:00 2001 From: kaz-yos Date: Mon, 17 Feb 2014 22:15:05 -0500 Subject: [PATCH 31/31] cramVars added to examples --- DESCRIPTION | 2 +- R/CreateCatTable.R | 3 +++ R/print.CatTable.R | 3 +++ R/print.TableOne.R | 5 +++-- R/tableone-package.R | 4 ++-- man/CreateCatTable.Rd | 3 +++ man/print.CatTable.Rd | 3 +++ man/print.TableOne.Rd | 5 +++-- man/tableone-package.Rd | 4 ++-- 9 files changed, 23 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0e31080..46a4e78 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: tableone Type: Package Title: Create "Table 1" to describe baseline characteristics Version: 0.3.0 -Date: 2014-02-16 +Date: 2014-02-17 Author: Kazuki Yoshida, Justin Bohn Maintainer: Kazuki Yoshida Description: This package creates "Table 1", i.e., description of baseline diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index 2a428b6..1f30163 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -40,6 +40,9 @@ ##' ## unless the variables are specified in the cramVars argument. ##' catTableOverall ##' +##' ## If you need to show both levels for some 2-level factors, use cramVars +##' print(catTableOverall, cramVars = "hepato") +##' ##' ## Use the showAllLevels argument to see all levels for all variables. ##' print(catTableOverall, showAllLevels = TRUE) ##' diff --git a/R/print.CatTable.R b/R/print.CatTable.R index 1cf03fd..fd59144 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -42,6 +42,9 @@ ##' ## For 2-level variables, only the higher level is shown for simplicity. ##' catTableOverall ##' +##' ## If you need to show both levels for some 2-level factors, use cramVars +##' print(catTableOverall, cramVars = "hepato") +##' ##' ## Use the showAllLevels argument to see all levels for all variables. ##' print(catTableOverall, showAllLevels = TRUE) ##' diff --git a/R/print.TableOne.R b/R/print.TableOne.R index a729cb4..7b7a3d1 100644 --- a/R/print.TableOne.R +++ b/R/print.TableOne.R @@ -50,9 +50,10 @@ ##' ##' ## Specifying nonnormal variables will show the variables appropriately, ##' ## and show nonparametric test p-values. Specify variables in the exact -##' ## argument to obtain the exact test p-values. +##' ## argument to obtain the exact test p-values. cramVars can be used to +##' ## show both levels for a 2-level categorical variables. ##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), -##' exact = c("status","stage")) +##' exact = c("status","stage"), cramVars = "hepato") ##' ##' ## Use the summary.TableOne method for detailed summary ##' summary(tableOne) diff --git a/R/tableone-package.R b/R/tableone-package.R index 9eb0551..1961439 100644 --- a/R/tableone-package.R +++ b/R/tableone-package.R @@ -44,7 +44,7 @@ ##' factorVars <- c("status","trt","ascites","hepato","spiders","edema","stage") ##' ##' ## Create a variable list -##' dput(names(pbc)) +##' dput(names(pbc)) # This shows a character vector-creating syntax. ##' vars <- c("time","status","age","sex","ascites","hepato", ##' "spiders","edema","bili","chol","albumin", ##' "copper","alk.phos","ast","trig","platelet", @@ -81,6 +81,6 @@ ##' ## you may benefit from the quote argument. This will quote everything so that ##' ## Excel does not mess up the cells. ##' print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), -##' exact = c("status","stage"), quote = TRUE) +##' exact = c("status","stage"), cramVars = "sex", quote = TRUE) ##' NULL diff --git a/man/CreateCatTable.Rd b/man/CreateCatTable.Rd index 72c6fc4..ec231f5 100644 --- a/man/CreateCatTable.Rd +++ b/man/CreateCatTable.Rd @@ -80,6 +80,9 @@ catTableOverall <- CreateCatTable(vars = catVars, data = pbc) ## unless the variables are specified in the cramVars argument. catTableOverall +## If you need to show both levels for some 2-level factors, use cramVars +print(catTableOverall, cramVars = "hepato") + ## Use the showAllLevels argument to see all levels for all variables. print(catTableOverall, showAllLevels = TRUE) diff --git a/man/print.CatTable.Rd b/man/print.CatTable.Rd index 1264428..9224605 100644 --- a/man/print.CatTable.Rd +++ b/man/print.CatTable.Rd @@ -87,6 +87,9 @@ catTableOverall <- CreateCatTable(vars = catVars, data = pbc) ## For 2-level variables, only the higher level is shown for simplicity. catTableOverall +## If you need to show both levels for some 2-level factors, use cramVars +print(catTableOverall, cramVars = "hepato") + ## Use the showAllLevels argument to see all levels for all variables. print(catTableOverall, showAllLevels = TRUE) diff --git a/man/print.TableOne.Rd b/man/print.TableOne.Rd index d17bbf3..15a6761 100644 --- a/man/print.TableOne.Rd +++ b/man/print.TableOne.Rd @@ -99,9 +99,10 @@ tableOne ## Specifying nonnormal variables will show the variables appropriately, ## and show nonparametric test p-values. Specify variables in the exact -## argument to obtain the exact test p-values. +## argument to obtain the exact test p-values. cramVars can be used to +## show both levels for a 2-level categorical variables. print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), - exact = c("status","stage")) + exact = c("status","stage"), cramVars = "hepato") ## Use the summary.TableOne method for detailed summary summary(tableOne) diff --git a/man/tableone-package.Rd b/man/tableone-package.Rd index 4447ef0..11081de 100644 --- a/man/tableone-package.Rd +++ b/man/tableone-package.Rd @@ -49,7 +49,7 @@ head(pbc) factorVars <- c("status","trt","ascites","hepato","spiders","edema","stage") ## Create a variable list -dput(names(pbc)) +dput(names(pbc)) # This shows a character vector-creating syntax. vars <- c("time","status","age","sex","ascites","hepato", "spiders","edema","bili","chol","albumin", "copper","alk.phos","ast","trig","platelet", @@ -86,7 +86,7 @@ summary(tableOne$ContTable) ## you may benefit from the quote argument. This will quote everything so that ## Excel does not mess up the cells. print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), - exact = c("status","stage"), quote = TRUE) + exact = c("status","stage"), cramVars = "sex", quote = TRUE) } \author{ Kazuki Yoshida, Justin Bohn