Ready for 0.5.0

koenderks · Jan 4, 2021 · f6f4b87 · f6f4b87
1 parent a242c96
commit f6f4b87
Show file tree

Hide file tree

Showing 41 changed files with 4,360 additions and 188 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,4 +1,4 @@
-^\.travis\.yml$
+^.yml$
 ^cran-comments.md$
 ^codecov.yml$
 ^.github$

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,2 @@
-doc
-Meta
 docs
+Meta
diff --git a/.travis.yml b/.travis.yml
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: jfa
 Title: Bayesian and Classical Audit Sampling
 Version: 0.5.0
-Date: 2020-11-07
+Date: 2021-01-04
 Authors@R: 
     person(given = "Koen",
            family = "Derks",

diff --git a/Meta/vignette.rds b/Meta/vignette.rds
diff --git a/R/auditPrior.R b/R/auditPrior.R
@@ -129,8 +129,11 @@ auditPrior <- function(confidence = 0.95, likelihood = "binomial", method = "non
 	if(likelihood == "hypergeometric" && (is.null(N) || N <= 0))
 		stop("The hypergeometric likelihood requires that you specify a positive value for the populatin size N.")
 
-	if(expectedError >= 1 || expectedError < 0)
-		stop("The expected errors must be entered as a proportion.")
+	if(expectedError < 0)
+		stop("The expected errors must be zero or larger than zero.")
+
+	if(expectedError >= 1 && method != "none")
+		stop("The expected errors must be entered as a proportion to use this prior construction method.")
 
 	# Create the prior distribution depending on the specified method
 	if(method == "none"){

diff --git a/R/evaluation.R b/R/evaluation.R
@@ -1,6 +1,6 @@
 #' Evaluation of Audit Samples using Confidence / Credible Bounds
 #'
-#' @description This function takes a data frame (using \code{sample}, \code{bookValue}, and \code{auditValues}) or summary statistics (using \code{nSumstats} and \code{kSumstats}) and evaluates the audit sample according to the specified method. The returned object is of class \code{jfaEvaluation} and can be used with associated \code{print()} and \code{plot()} methods.
+#' @description This function takes a data frame (using \code{sample}, \code{bookValues}, and \code{auditValues}) or summary statistics (using \code{nSumstats} and \code{kSumstats}) and evaluates the audit sample according to the specified method. The returned object is of class \code{jfaEvaluation} and can be used with associated \code{print()} and \code{plot()} methods.
 #'
 #' For more details on how to use this function see the package vignette:
 #' \code{vignette("jfa", package = "jfa")}
@@ -14,7 +14,7 @@
 #'             csA = 1, csB = 3, csMu = 0.5) 
 #'
 #' @param confidence    the required confidence level for the bound. Default is 0.95 for 95\% confidence.
-#' @param method        the method that is used to evaluate the sample. This can be either one of \code{poisson}, \code{binomial}, \code{hypergeometric}, \code{stringer}, \code{stringer-meikle}, \code{stringer-lta}, \code{stringer-pvz}, \code{rohrbach}, \code{moment}, \code{direct}, \code{difference}, \code{quotient}, or \code{regression}. 
+#' @param method        the method that is used to evaluate the sample. This can be either one of \code{poisson}, \code{binomial}, \code{hypergeometric}, \code{mpus}, \code{stringer}, \code{stringer-meikle}, \code{stringer-lta}, \code{stringer-pvz}, \code{rohrbach}, \code{moment}, \code{direct}, \code{difference}, \code{quotient}, or \code{regression}. 
 #' @param N             an integer specifying the total number of units (transactions or monetary units) in the population.
 #' @param sample        a data frame containing at least a column of Ist values and a column of Soll (true) values.
 #' @param bookValues    a character specifying the column name for the Ist values in the sample.
@@ -40,6 +40,7 @@
 #'  \item{\code{poisson}:          The confidence bound taken from the Poisson distribution. If combined with \code{prior = TRUE}, performs Bayesian evaluation using a \emph{gamma} prior and posterior.}
 #'  \item{\code{binomial}:         The confidence bound taken from the binomial distribution. If combined with \code{prior = TRUE}, performs Bayesian evaluation using a \emph{beta} prior and posterior.}
 #'  \item{\code{hypergeometric}:   The confidence bound taken from the hypergeometric distribution. If combined with \code{prior = TRUE}, performs Bayesian evaluation using a \emph{beta-binomial} prior and posterior.}
+#'	\item{\code{mpu}}:			   Mean per unit estimator using the observed sample taints.
 #'  \item{\code{stringer}:         The Stringer bound (Stringer, 1963).}
 #'  \item{\code{stringer-meikle}:  Stringer bound with Meikle's correction for understatements (Meikle, 1972).}
 #'  \item{\code{stringer-lta}:     Stringer bound with LTA correction for understatements (Leslie, Teitlebaum, and Anderson, 1979).}
@@ -187,13 +188,13 @@ evaluation <- function(confidence = 0.95, method = "binomial", N = NULL,
 	if(!is.null(minPrecision) && minPrecision == 0)
 		stop("The minimum required precision cannot be zero.")
 
-	if(!(method %in% c("poisson", "binomial", "hypergeometric", "stringer", "stringer-meikle", "stringer-lta", "stringer-pvz", "rohrbach", "moment", "coxsnell", "direct", "difference", "quotient", "regression")) || length(method) != 1)
+	if(!(method %in% c("poisson", "binomial", "hypergeometric", "stringer", "stringer-meikle", "stringer-lta", "stringer-pvz", "rohrbach", "moment", "coxsnell", "direct", "difference", "quotient", "regression", "mpu")) || length(method) != 1)
 		stop("Specify a valid method for the evaluation.")
 
 	if(!is.null(counts) && any(counts < 1))
 		stop("When specified, your 'counts' must all be equal to, or larger than, 1.")          
 
-	if(((class(prior) == "logical" && prior == TRUE) || class(prior) == "jfaPrior") && method %in% c("stringer", "stringer-meikle", "stringer-lta", "stringer-pvz", "rohrbach", "moment", "direct", "difference", "quotient", "regression"))
+	if(((class(prior) == "logical" && prior == TRUE) || class(prior) == "jfaPrior") && method %in% c("stringer", "stringer-meikle", "stringer-lta", "stringer-pvz", "rohrbach", "moment", "direct", "difference", "quotient", "regression", "mpu"))
 		stop("To use a prior distribution, you must use either the poisson, the binomial, or the hypergeometric method.")  
 
  	if((class(prior) == "logical" && prior == TRUE) && kPrior < 0 || nPrior < 0)
@@ -208,7 +209,7 @@ evaluation <- function(confidence = 0.95, method = "binomial", N = NULL,
       		stop("Specify one value for nSumstat and kSumstat")
     	if(kSumstats > nSumstats)
       		stop("The sum of the errors is higher than the sample size")
-    	if(method %in% c("stringer", "stringer-meikle", "stringer-lta", "stringer-pvz", "coxsnell", "rohrbach", "moment", "direct", "difference", "quotient", "regression"))
+    	if(method %in% c("stringer", "stringer-meikle", "stringer-lta", "stringer-pvz", "coxsnell", "rohrbach", "moment", "direct", "difference", "quotient", "regression", "mpu"))
       		stop("The selected method requires raw observations, and does not accomodate summary statistics")
 
 		n <- nSumstats
@@ -220,10 +221,11 @@ evaluation <- function(confidence = 0.95, method = "binomial", N = NULL,
 		if(is.null(bookValues) || is.null(auditValues) || length(bookValues) != 1 || length(auditValues) != 1)
 			stop("Specify a valid book value column name and a valid audit value column name when using a sample")
 
-    	sample <- stats::na.omit(sample)
-		n <- nrow(sample)
-		if(n == 0)
+		missingValues <- unique(c(which(is.na(sample[, bookValues])), which(is.na(sample[, auditValues]))))
+		if(length(missingValues) == nrow(sample))
 			stop("Your sample has 0 rows after removing missing values.")
+		sample <- stats::na.omit(sample)
+		n <- nrow(sample)
 		if(!is.null(counts))
 			n <- sum(counts)
 		bv <- sample[, bookValues]
@@ -319,6 +321,11 @@ evaluation <- function(confidence = 0.95, method = "binomial", N = NULL,
 		bound 		<- out[["confBound"]]
 		mle 		<- out[["mle"]]
 		precision 	<- out[["precision"]]
+	} else if(method == "mpu"){
+		out 		<- .mpuMethod(taints, confidence, n)
+		bound 		<- out[["confBound"]]
+		mle 		<- out[["mle"]]
+		precision 	<- out[["precision"]]		
 	} else if(method == "direct"){
 		out 		<- .directMethod(bv, av, confidence, N, n, populationBookValue)
 		mle 		<- out[["pointEstimate"]]

diff --git a/R/methods.R b/R/methods.R
@@ -116,6 +116,14 @@
 	return(result)
 }
 
+.mpuMethod <- function(taints, confidence, n){
+	result 					<- list()
+	result[["confBound"]] 	<- mean(taints) + stats::qnorm(p = confidence) * (stats::sd(taints) / sqrt(n))
+	result[["mle"]] 		<- sum(taints) / n
+	result[["precision"]] 	<- result[["confBound"]] - result[["mle"]]
+	return(result)	
+}
+
 .directMethod <- function(bookValues, auditValues, confidence, N = NULL, n, populationBookValue = NULL, correction = FALSE){
 	if(is.null(N))
 		stop("The direct method requires that you specify the population size N")

diff --git a/R/planning.R b/R/planning.R
@@ -1,6 +1,6 @@
 #' Frequentist and Bayesian Planning for Audit Sampling
 #'
-#' @description This function calculates the required sample size for an audit, based on the poisson, binomial, or hypergeometric likelihood. A prior can be specified to perform Bayesian planning. The returned object is of class \code{jfaPlanning} and can be used with associated \code{print()} and \code{plot()} methods.
+#' @description This function calculates the required sample size for an audit, based on the Poisson, binomial, or hypergeometric likelihood. A prior can be specified to perform Bayesian planning. The returned object is of class \code{jfaPlanning} and can be used with associated \code{print()} and \code{plot()} methods.
 #'
 #' For more details on how to use this function see the package vignette:
 #' \code{vignette("jfa", package = "jfa")}

diff --git a/R/report.R b/R/report.R
@@ -9,7 +9,7 @@
 #'
 #' @param object an object of class 'jfaEvaluation' as returned by the \code{evaluation()} function.
 #' @param file a string that gives the desired name of the file (e.g. \code{"report.html"}). The report is created in your current working directory.          
-#' @param format can be either one of \code{"html_document"} or \code{"pdf_document"} (required MikTex).
+#' @param format can be either one of \code{"html_document"} or \code{"pdf_document"} (compiling to pdf requires MikTex).
 #'
 #' @return A html or pdf report containing the results of the evaluation.
 #'

diff --git a/README.Rmd b/README.Rmd
@@ -30,6 +30,7 @@ knitr::opts_chunk$set(
 * [Getting started](#getting-started)
 * [Benchmarks](#benchmarks)
 * [Contributing](#contributing) 
+* [Cheatsheet](#cheatsheet)
 * [Functions](#functions)
 * [References](#references)
 * [Package statistics](#package-statistics) 
@@ -40,7 +41,7 @@ The `jfa` package can be used to set up the entire audit sampling workflow.
   <img src="man/figures/readme/banner/jfaBanner.png" alt="banner"/>
 </p>
 
-For complete documentation of the package, see the [package website](https://koenderks.github.io/jfa/) or the [package manual](https://cran.r-project.org/web/packages/jfa/jfa.pdf).
+For complete documentation of the package, see the [package website](https://koenderks.github.io/jfa/) or the [package manual](https://cran.r-project.org/package=jfa/jfa.pdf).
 
 ### Authors
 
@@ -102,6 +103,14 @@ The package vignettes contain explanations about the functionality of `jfa` illu
 
 If you are willing to contribute to the improvement of the package by adding a benchmark, please check out the Wiki page on [how to contribute a benchmark to jfa](https://github.com/koenderks/jfa/wiki/Benchmarks). If you are willing to contribute to the improvement of the package by adding a new statistical method, please check the Wiki page on [how to contribute a new method to jfa](https://github.com/koenderks/jfa/wiki/Methods). 
 
+## Cheatsheet
+
+The cheatsheet can help you get started with the `jfa` package and its workflow. You can download a pdf version [here](https://github.com/koenderks/jfa/raw/master/man/figures/cheatsheet/cheatsheet.pdf).
+
+<p align="center">
+  <img src="man/figures/cheatsheet/cheatsheet.png" alt="cheatsheet"/>
+</p>
+
 ## Functions
 
 Below is a list of the available functions in the current version of `jfa`, sorted by their occurrence in the standard audit sampling workflow.
@@ -213,7 +222,7 @@ The `report()` function takes an object of class `jfaEvaluation` as returned by
 
 `report(object = NULL, file = NULL, format = "html_document")`
 
-For an example report, see the following [link](https://github.com/koenderks/jfa/tree/master/man/figures/readme/report/report.pdf).
+For an example report, see the following [link](https://github.com/koenderks/jfa/raw/master/man/figures/readme/report/report.pdf).
 
 ## References
 
@@ -244,8 +253,8 @@ xBreaks           <- plotData[["date"]]
 yBreaks           <- pretty(c(0, plotData[["count"]], max(plotData[["count"]]) + 200), n = 6)
 
 # Releases 
-releases          <- c("2020-01-01", "2020-08-01", "2020-09-01", "2020-11-01")
-releaseLabs       <- c("v0.1.0", "v0.2.0", "v0.3.0", "v0.4.0")
+releases          <- c("2020-01-01", "2020-08-01", "2020-09-01", "2020-11-01", "2021-01-01")
+releaseLabs       <- c("v0.1.0", "v0.2.0", "v0.3.0", "v0.4.0", "v0.5.0")
 
 p <- ggplot2::ggplot(plotData, ggplot2::aes(x = date, y = count)) +
       	ggplot2::geom_bar(stat = "identity", fill = rgb(65, 104, 195, maxColorValue = 255), 

diff --git a/README.md b/README.md
@@ -29,6 +29,7 @@ prior probability distribution for use in these functions.
   - [Getting started](#getting-started)
   - [Benchmarks](#benchmarks)
   - [Contributing](#contributing)
+  - [Cheatsheet](#cheatsheet)
   - [Functions](#functions)
   - [References](#references)
   - [Package statistics](#package-statistics)
@@ -44,7 +45,7 @@ workflow.
 
 For complete documentation of the package, see the [package
 website](https://koenderks.github.io/jfa/) or the [package
-manual](https://cran.r-project.org/web/packages/jfa/jfa.pdf).
+manual](https://cran.r-project.org/package=jfa/jfa.pdf).
 
 ### Authors
 
@@ -126,6 +127,18 @@ adding a new statistical method, please check the Wiki page on [how to
 contribute a new method to
 jfa](https://github.com/koenderks/jfa/wiki/Methods).
 
+## Cheatsheet
+
+The cheatsheet can help you get started with the `jfa` package and its
+workflow. You can download a pdf version
+[here](https://github.com/koenderks/jfa/raw/master/man/figures/cheatsheet/cheatsheet.pdf).
+
+<p align="center">
+
+<img src="man/figures/cheatsheet/cheatsheet.png" alt="cheatsheet"/>
+
+</p>
+
 ## Functions
 
 Below is a list of the available functions in the current version of
@@ -275,7 +288,7 @@ interpretation, and saves the report to your local computer.
 `report(object = NULL, file = NULL, format = "html_document")`
 
 For an example report, see the following
-[link](https://github.com/koenderks/jfa/tree/master/man/figures/readme/report/report.pdf).
+[link](https://github.com/koenderks/jfa/raw/master/man/figures/readme/report/report.pdf).
 
 ## References
 

diff --git a/doc/jfa.R b/doc/jfa.R
@@ -0,0 +1,43 @@
+## ---- eval=FALSE--------------------------------------------------------------
+#  install.packages("jfa")
+
+## ---- eval=FALSE--------------------------------------------------------------
+#  devtools::install_github("koenderks/jfa")
+
+## -----------------------------------------------------------------------------
+library(jfa)
+
+data("BuildIt")
+BuildIt <- BuildIt[, c("ID", "bookValue")] # Let's remove the auditValue column for this example
+head(BuildIt, n = 10)
+
+## -----------------------------------------------------------------------------
+planning(confidence = 0.95, expectedError = 0, likelihood = "poisson", N = 3500, materiality = 0.05)
+
+## -----------------------------------------------------------------------------
+planning(confidence = 0.95, expectedError = 0, likelihood = "poisson", N = 3500, minPrecision = 0.02)
+
+## -----------------------------------------------------------------------------
+selection(population = BuildIt, sampleSize = 150, units = "records", algorithm = "random")
+
+## -----------------------------------------------------------------------------
+selection(population = BuildIt, sampleSize = 150, units = "mus", algorithm = "interval", 
+          bookValues = "bookValue")
+
+## -----------------------------------------------------------------------------
+result <- selection(population = BuildIt, sampleSize = 150, units = "mus", algorithm = "interval", 
+                    bookValues = "bookValue")
+
+sample <- result$sample
+head(sample, n = 10)
+
+## -----------------------------------------------------------------------------
+evaluation(confidence = 0.95, method = "binomial", N = 3500, nSumstats = 60, kSumstats = 1, materiality = 0.05)
+
+## -----------------------------------------------------------------------------
+sample$auditValue <- sample$bookValue
+
+## -----------------------------------------------------------------------------
+evaluation(confidence = 0.95, method = "stringer", N = 3500, materiality = 0.05,
+           sample = sample, bookValues = "bookValue", auditValues = "auditValue", counts = sample$count)
+
-Original file line number
+Diff line change
@@ -1,3 +1,2 @@
-    doc
-    Meta
     docs
+    Meta