diff --git a/CITATION.cff b/CITATION.cff index d1aecba..c00fd22 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,6 +2,6 @@ message: "If you use this software, please cite it as below." authors: - family-names: "Cook County Assessor's Office" title: "AssessR" -version: 0.6.0 +version: 1.0.0 date-released: 2019-01-01 url: "https://github.com/ccao-data/assessr" diff --git a/DESCRIPTION b/DESCRIPTION index 23ad8f3..253643e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,14 @@ Package: assessr Type: Package Title: Measure Property Assessment Performance -Version: 0.6.0 +Version: 1.0.0 Authors@R: c( person(given = "Dan", family = "Snow", email="daniel.snow@cookcountyil.gov", role=c("aut", "cre")), person(given = "William", family = "Ridgeway", email="william.ridgeway@cookcountyil.gov", role=c("ctb")), person(given = "Rob", family = "Ross", role=c("ctb")), person(given = "Nathan", family = "Dignazio", role=c("ctb")), - person(given = "Damon", family = "Major", role=c("ctb")) + person(given = "Damon", family = "Major", role=c("ctb")), + person(given = "Jean", family = "Cochrane", role=c("ctb")) ) Date: 2023-08-07 Description: An R package to measure the performance of property assessments @@ -19,7 +20,7 @@ Encoding: UTF-8 LazyData: true Imports: stats -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Suggests: covr, devtools, @@ -37,5 +38,5 @@ Suggests: testthat, tibble, tidyr -Depends: +Depends: R (>= 3.5.0) diff --git a/NAMESPACE b/NAMESPACE index be398f2..96d3996 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,8 +4,8 @@ export(boot_ci) export(cod) export(cod_ci) export(cod_met) -export(detect_chasing) export(is_outlier) +export(is_sales_chased) export(ki) export(mki) export(mki_met) diff --git a/R/ci.R b/R/ci.R index f6c6edb..9c1a1a3 100644 --- a/R/ci.R +++ b/R/ci.R @@ -4,14 +4,17 @@ #' for a given numeric input and a chosen function. #' #' @param FUN Function to bootstrap. Must return a single value. -#' @param nboot Default 100. Number of iterations to use to estimate +#' @param estimate A numeric vector of estimated values. Must be the same +#' length as \code{sale_price}. +#' @param sale_price A numeric vector of sale prices. Must be the same +#' length as \code{estimate}. +#' @param nboot Default 1000. Number of iterations to use to estimate #' the output statistic confidence interval. #' @param alpha Default 0.05. Numeric value indicating the confidence #' interval to return. 0.05 will return the 95\% confidence interval. #' @param na.rm Default FALSE. A boolean value indicating whether or not to #' remove NA values. If missing values are present but not removed the #' function will output NA. -#' @param ... Named arguments passed on to \code{FUN}. #' #' @return A two-long numeric vector containing the bootstrapped confidence #' interval of the input vector(s). @@ -19,30 +22,39 @@ #' @examples #' #' # Calculate COD confidence interval -#' boot_ci(cod, nboot = 100, ratio = ratios_sample$ratio) +#' boot_ci(cod, ratios_sample$estimate, ratios_sample$sale_price, nboot = 100) #' #' # Calculate PRD confidence interval #' boot_ci( #' prd, #' nboot = 100, -#' assessed = ratios_sample$assessed, +#' estimate = ratios_sample$estimate, #' sale_price = ratios_sample$sale_price, #' na.rm = FALSE #' ) #' @export -boot_ci <- function(FUN = NULL, nboot = 100, alpha = 0.05, na.rm = FALSE, ...) { # nolint +boot_ci <- function( + FUN, + estimate, + sale_price, + nboot = 1000, + alpha = 0.05, + na.rm = FALSE) { + # Input checking and error handling + check_inputs(estimate, sale_price) # Check that the input function returns a numeric vector - est <- FUN(...) + est <- FUN(estimate, sale_price) stopifnot( length(est) == 1, is.numeric(est), - is.logical(na.rm) + is.logical(na.rm), + nboot > 0 ) # Create an index of missing values, where TRUE when missing. # If na.rm is FALSE and index contains TRUE, return NA - missing_idx <- index_na(...) + missing_idx <- index_na(estimate, sale_price) if (any(missing_idx) && !na.rm) { return(NA_real_) } @@ -62,7 +74,10 @@ boot_ci <- function(FUN = NULL, nboot = 100, alpha = 0.05, na.rm = FALSE, ...) { # For each of the input vectors to FUN, subset by first removing any # index positions that have a missing value, then take a random sample of # each vector using the sample index - sampled <- lapply(list(...), function(x) x[!missing_idx][idx]) + sampled <- lapply( + list(estimate, sale_price), + function(x) x[!missing_idx][idx] + ) # For each bootstrap sample, apply the function and output an estimate for # that sample @@ -85,16 +100,21 @@ boot_ci <- function(FUN = NULL, nboot = 100, alpha = 0.05, na.rm = FALSE, ...) { #' @examples #' #' # Calculate COD confidence interval -#' cod_ci(ratios_sample$ratio) +#' cod_ci(ratios_sample$estimate, ratios_sample$sale_price) #' @export -cod_ci <- function(ratio, nboot = 100, alpha = 0.05, na.rm = FALSE) { # nolint - +cod_ci <- function( + estimate, + sale_price, + nboot = 1000, + alpha = 0.05, + na.rm = FALSE) { cod_ci <- boot_ci( cod, + estimate = estimate, + sale_price = sale_price, nboot = nboot, alpha = alpha, - na.rm = na.rm, - ratio = ratio + na.rm = na.rm ) return(cod_ci) @@ -108,17 +128,21 @@ cod_ci <- function(ratio, nboot = 100, alpha = 0.05, na.rm = FALSE) { # nolint #' @examples #' #' # Calculate PRD confidence interval -#' prd_ci(ratios_sample$assessed, ratios_sample$sale_price) +#' prd_ci(ratios_sample$estimate, ratios_sample$sale_price) #' @export -prd_ci <- function(assessed, sale_price, nboot = 100, alpha = 0.05, na.rm = FALSE) { # nolint - +prd_ci <- function( + estimate, + sale_price, + nboot = 1000, + alpha = 0.05, + na.rm = FALSE) { prd_ci <- boot_ci( prd, + estimate = estimate, + sale_price = sale_price, nboot = nboot, alpha = alpha, - na.rm = na.rm, - assessed = assessed, - sale_price = sale_price + na.rm = na.rm ) return(prd_ci) @@ -132,25 +156,24 @@ prd_ci <- function(assessed, sale_price, nboot = 100, alpha = 0.05, na.rm = FALS #' @examples #' #' # Calculate PRD confidence interval -#' prb_ci(ratios_sample$assessed, ratios_sample$sale_price) +#' prb_ci(ratios_sample$estimate, ratios_sample$sale_price) #' @export -prb_ci <- function(assessed, sale_price, alpha = 0.05, na.rm = FALSE) { # nolint - +prb_ci <- function(estimate, sale_price, alpha = 0.05, na.rm = FALSE) { # Input checking and error handling - check_inputs(assessed, sale_price) + check_inputs(estimate, sale_price) # Remove NAs from input vectors. Otherwise, return NA if the input vectors # contain any NA values - idx <- index_na(assessed, sale_price) + idx <- index_na(estimate, sale_price) if (na.rm) { - assessed <- assessed[!idx] + estimate <- estimate[!idx] sale_price <- sale_price[!idx] } else if (any(idx) && !na.rm) { return(NA_real_) } # Calculate PRB model - prb_model <- calc_prb(assessed, sale_price) + prb_model <- calc_prb(estimate, sale_price) # Extract PRB CI from model prb_ci <- stats::confint(prb_model, level = (1 - alpha))[2, ] diff --git a/R/data.R b/R/data.R index 550efdd..8f2824f 100644 --- a/R/data.R +++ b/R/data.R @@ -3,13 +3,30 @@ #' This sample was take from Evanston and New Trier in 2019. Ratios are #' calculated using assessor certified (post-appeal) fair market values. #' -#' @format A data frame with 979 observation and 4 variables: +#' @format A data frame with 979 observation and 3 variables: #' \describe{ -#' \item{assessed}{The fair market assessed value predicted by CCAO assessment +#' \item{estimate}{The fair market assessed value predicted by CCAO assessment #' models, including any successful appeals} #' \item{sale_price}{The recorded sale price of this property} -#' \item{ratio}{Sales ratio representing fair market value / sale price} #' \item{town}{Township name the property is in} #' } #' "ratios_sample" + +#' Sample of sales and estimated market values provided by Quintos in the +#' following MKI papers: +#' +#' @references +#' Quintos, C. (2020). A Gini measure for vertical equity in property +#' assessments. +#' +#' Quintos, C. (2021). A Gini decomposition of the sources of inequality in +#' property assessments. +#' +#' @format A data frame with 30 observation and 2 variables: +#' \describe{ +#' \item{estimate}{Assessed fair market value} +#' \item{sale_price}{Recorded sale price of this property} +#' } +#' +"quintos_sample" diff --git a/R/formulas.R b/R/formulas.R index 3df4495..50a8954 100644 --- a/R/formulas.R +++ b/R/formulas.R @@ -20,9 +20,10 @@ #' \href{https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf}{IAAO Standard on Ratio Studies} #' Appendix B.1. #' -#' @param ratio A numeric vector of ratios centered around 1, where the -#' numerator of the ratio is the estimated fair market value and the -#' denominator is the actual sale price. +#' @param estimate A numeric vector of assessed values. Must be the same +#' length as \code{sale_price}. +#' @param sale_price A numeric vector of sale prices. Must be the same length +#' as \code{estimate}. #' @param na.rm Default FALSE. A boolean value indicating whether or not to #' remove NA values. If missing values are present but not removed the #' function will output NA. @@ -32,13 +33,15 @@ #' #' @examples #' # Calculate COD -#' cod(ratios_sample$ratio) +#' cod(ratios_sample$estimate, ratios_sample$sale_price) #' @family formulas #' @export -cod <- function(ratio, na.rm = FALSE) { +cod <- function(estimate, sale_price, na.rm = FALSE) { # nolint end - check_inputs(ratio) + check_inputs(estimate, sale_price) + ratio <- estimate / sale_price + if (na.rm) ratio <- stats::na.omit(ratio) # Calculate median ratio @@ -71,10 +74,10 @@ cod <- function(ratio, na.rm = FALSE) { #' as it is extremely sensitive to large outliers. PRD is being deprecated in #' favor of PRB, which is less sensitive to outliers and easier to interpret. #' -#' @param assessed A numeric vector of assessed values. Must be the same +#' @param estimate A numeric vector of assessed values. Must be the same #' length as \code{sale_price}. #' @param sale_price A numeric vector of sale prices. Must be the same length -#' as \code{assessed}. +#' as \code{estimate}. #' #' @inheritParams cod #' @describeIn prd Returns a numeric vector containing the PRD of the @@ -83,23 +86,23 @@ cod <- function(ratio, na.rm = FALSE) { #' #' @examples #' # Calculate PRD -#' prd(ratios_sample$assessed, ratios_sample$sale_price) +#' prd(ratios_sample$estimate, ratios_sample$sale_price) #' @family formulas #' @export -prd <- function(assessed, sale_price, na.rm = FALSE) { +prd <- function(estimate, sale_price, na.rm = FALSE) { # nolint end - check_inputs(assessed, sale_price) - idx <- index_na(assessed, sale_price) + check_inputs(estimate, sale_price) + idx <- index_na(estimate, sale_price) if (na.rm) { - assessed <- assessed[!idx] + estimate <- estimate[!idx] sale_price <- sale_price[!idx] } else if (any(idx) && !na.rm) { return(NA_real_) } # Calculate ratio of assessed values to sale price - ratio <- assessed / sale_price + ratio <- estimate / sale_price # Calculate PRD prd <- mean(ratio) / stats::weighted.mean(ratio, sale_price) @@ -112,11 +115,11 @@ prd <- function(assessed, sale_price, na.rm = FALSE) { ##### PRB ##### # Calculate PRB and return model object -calc_prb <- function(assessed, sale_price) { - ratio <- assessed / sale_price +calc_prb <- function(estimate, sale_price) { + ratio <- estimate / sale_price med_ratio <- stats::median(ratio) lhs <- (ratio - med_ratio) / med_ratio - rhs <- log(((assessed / med_ratio) + sale_price) * 0.5) / log(2) + rhs <- log(((estimate / med_ratio) + sale_price) * 0.5) / log(2) prb_model <- stats::lm(formula = lhs ~ rhs) return(prb_model) @@ -146,24 +149,24 @@ calc_prb <- function(assessed, sale_price) { #' #' @examples #' # Calculate PRB -#' prb(ratios_sample$assessed, ratios_sample$sale_price) +#' prb(ratios_sample$estimate, ratios_sample$sale_price) #' @family formulas #' @export -prb <- function(assessed, sale_price, na.rm = FALSE) { +prb <- function(estimate, sale_price, na.rm = FALSE) { # nolint end - check_inputs(assessed, sale_price) + check_inputs(estimate, sale_price) - idx <- index_na(assessed, sale_price) + idx <- index_na(estimate, sale_price) if (na.rm) { - assessed <- assessed[!idx] + estimate <- estimate[!idx] sale_price <- sale_price[!idx] } else if (any(idx) && !na.rm) { return(NA_real_) } # Calculate PRB - prb_model <- calc_prb(assessed, sale_price) + prb_model <- calc_prb(estimate, sale_price) # Extract PRB from model prb <- unname(stats::coef(prb_model)[2]) @@ -176,22 +179,22 @@ prb <- function(assessed, sale_price, na.rm = FALSE) { ##### MKI_KI ##### # Calculate the Gini cofficients needed for KI and MKI -calc_gini <- function(assessed, sale_price) { - df <- data.frame(av = assessed, sp = sale_price) +calc_gini <- function(estimate, sale_price) { + df <- data.frame(av = estimate, sp = sale_price) df <- df[order(df$sp), ] assessed_price <- df$av sale_price <- df$sp n <- length(assessed_price) av_sum <- sum(assessed_price * seq_len(n)) - g_assessed <- 2 * av_sum / sum(assessed_price) - (n + 1L) - gini_assessed <- g_assessed / n + g_estimate <- 2 * av_sum / sum(assessed_price) - (n + 1L) + gini_estimate <- g_estimate / n sale_sum <- sum(sale_price * seq_len(n)) g_sale <- 2 * sale_sum / sum(sale_price) - (n + 1L) gini_sale <- g_sale / n - result <- list(gini_assessed = gini_assessed, gini_sale = gini_sale) + result <- list(gini_estimate = gini_estimate, gini_sale = gini_sale) return(result) } @@ -240,25 +243,25 @@ calc_gini <- function(assessed, sale_price) { #' @examples #' #' # Calculate KI -#' ki(ratios_sample$assessed, ratios_sample$sale_price) +#' ki(ratios_sample$estimate, ratios_sample$sale_price) #' @family formulas #' @export #' @md -ki <- function(assessed, sale_price, na.rm = FALSE) { +ki <- function(estimate, sale_price, na.rm = FALSE) { # nolint end - check_inputs(assessed, sale_price) + check_inputs(estimate, sale_price) - idx <- index_na(assessed, sale_price) + idx <- index_na(estimate, sale_price) if (na.rm) { - assessed <- assessed[!idx] + estimate <- estimate[!idx] sale_price <- sale_price[!idx] } else if (any(idx) && !na.rm) { return(NA_real_) } - g <- calc_gini(assessed, sale_price) - ki <- g$gini_assessed - g$gini_sale + g <- calc_gini(estimate, sale_price) + ki <- g$gini_estimate - g$gini_sale return(ki) } @@ -270,21 +273,21 @@ ki <- function(assessed, sale_price, na.rm = FALSE) { #' #' @examples #' # Calculate MKI -#' mki(ratios_sample$assessed, ratios_sample$sale_price) +#' mki(ratios_sample$estimate, ratios_sample$sale_price) #' @export -mki <- function(assessed, sale_price, na.rm = FALSE) { - check_inputs(assessed, sale_price) +mki <- function(estimate, sale_price, na.rm = FALSE) { + check_inputs(estimate, sale_price) - idx <- index_na(assessed, sale_price) + idx <- index_na(estimate, sale_price) if (na.rm) { - assessed <- assessed[!idx] + estimate <- estimate[!idx] sale_price <- sale_price[!idx] } else if (any(idx) && !na.rm) { return(NA_real_) } - g <- calc_gini(assessed, sale_price) - mki <- g$gini_assessed / g$gini_sale + g <- calc_gini(estimate, sale_price) + mki <- g$gini_estimate / g$gini_sale return(mki) } diff --git a/R/outliers.R b/R/outliers.R index 1abaec3..fe2cdd3 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -7,22 +7,24 @@ #' outliers. As such, it is often necessary to remove outliers before #' performing a sales ratio study. #' -#' Standard method is to remove outliers that are 3 * IQR. Warnings are thrown +#' The IAAO standard method is to remove outliers that are 3 * IQR. Warnings are thrown #' when sample size is extremely small or when the IQR is extremely narrow. See #' \href{https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf}{IAAO Standard on Ratio Studies} #' Appendix B. Outlier Trimming Guidelines for more information. #' -#' @param x A numeric vector. Must be longer than 2 and not contain -#' \code{Inf} or \code{NaN}. -#' @param method Default "iqr". String indicating outlier detection method. +#' @param x A numeric vector, typically sales ratios. Must be longer than 2 and +#' cannot contain \code{Inf} or \code{NaN}. +#' @param method Default \code{iqr}. String indicating outlier detection method. #' Options are \code{iqr} or \code{quantile}. -#' @param ... Named arguments passed on to methods. +#' @param probs Upper and lower percentiles denoting outlier boundaries for +#' the \code{quantile} method. +#' @param mult Multiplier for IQR to determine outlier boundaries. Default 3. #' #' @return A logical vector this same length as \code{x} indicating whether or #' not each value of \code{x} is an outlier. #' #' @export -is_outlier <- function(x, method = "iqr", ...) { +is_outlier <- function(x, method = "iqr", probs = c(0.05, 0.95), mult = 3) { # nolint end # Check that inputs are well-formed numeric vector @@ -35,10 +37,11 @@ is_outlier <- function(x, method = "iqr", ...) { all(is.finite(x) | is.na(x)) # All values are finite OR are NA }) - out <- switch(method, - "quantile" = quantile_outlier(x, na.rm = TRUE, ...), - "iqr" = iqr_outlier(x, na.rm = TRUE, ...) - ) + if (method == "quantile") { + out <- quantile_outlier(x, probs = probs) + } else { + out <- iqr_outlier(x, mult = mult) + } # Warn about removing data from small samples, as it can severely distort # ratio study outcomes @@ -55,8 +58,7 @@ is_outlier <- function(x, method = "iqr", ...) { #' @describeIn is_outlier Quantile method for identifying outliers. #' @param probs Upper and lower percentiles denoting outlier boundaries. -quantile_outlier <- function(x, probs = c(0.05, 0.95), ...) { # nolint - +quantile_outlier <- function(x, probs = c(0.05, 0.95)) { # Determine valid range of the data range <- stats::quantile(x, probs = probs, na.rm = TRUE) @@ -69,8 +71,7 @@ quantile_outlier <- function(x, probs = c(0.05, 0.95), ...) { # nolint #' @describeIn is_outlier IQR method for identifying outliers. #' @param mult Multiplier for IQR to determine outlier boundaries. -iqr_outlier <- function(x, mult = 3, ...) { # nolint - +iqr_outlier <- function(x, mult = 3) { # Check that inputs are well-formed numeric vector stopifnot(is.numeric(mult), sign(mult) == 1) diff --git a/R/sales_chasing.R b/R/sales_chasing.R index f06ef25..b9b9a59 100644 --- a/R/sales_chasing.R +++ b/R/sales_chasing.R @@ -2,12 +2,18 @@ #' Detect sales chasing in a vector of sales ratios #' #' @description Sales chasing is when a property is selectively reappraised to -#' shift its assessed value toward its actual sale price. Sales chasing is +#' shift its assessed value toward its recent sale price. Sales chasing is #' difficult to detect. This function is NOT a statistical test and does #' not provide the probability of the given result. Rather, it combines two -#' novel methods to roughly estimate if sales chasing has occurred. +#' heuristic methods to roughly estimate if sales chasing has occurred. #' -#' The first method (dist) uses the technique outlined in the +#' The first method (cdf) detects discontinuities in the cumulative +#' distribution function (CDF) of the input vector. Ratios that are not sales +#' chased should have a fairly smooth CDF. Discontinuous jumps in the CDF, +#' particularly around 1, may indicate sales chasing. This can usually be seen +#' visually as a "flat spot" on the CDF. +#' +#' The second method (dist) uses the technique outlined in the #' \href{https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf}{IAAO Standard on Ratio Studies} #' Appendix E, Section 4. It compares the percentage of real data within +-2% #' of the mean ratio to the percentage of data within the same bounds given a @@ -15,23 +21,24 @@ #' The intuition here is that ratios that are sales chased may be more #' "bunched up" in the center of the distribution. #' -#' The second method (cdf) detects discontinuities in the cumulative -#' distribution function (CDF) of the input vector. Ratios that are not sales -#' chased should have a fairly smooth CDF. Discontinuous jumps in the CDF, -#' particularly around 1, may indicate sales chasing. This can usually be seen -#' visually as a "flat spot" on the CDF. -#' -#' @param ratio A numeric vector of ratios centered around 1, where the -#' numerator of the ratio is the estimated fair market value and the -#' denominator is the actual sale price. -#' @param method Default "both". String indicating sales chasing detection +#' @param x A numeric vector. Must be longer than 2 and cannot contain +#' \code{inf} or \code{NA} values. +#' @param method Default \code{both}. String indicating sales chasing detection #' method. Options are \code{cdf}, \code{dist}, or \code{both}. #' @param na.rm Default FALSE. A boolean value indicating whether or not to #' remove NA values. If missing values are present but not removed the #' function will output NA for those values. -#' @param ... Named arguments passed on to methods. +#' @param bounds Default \code{(0.98, 1.02)}. Lower and upper bounds of the +#' range of ratios to consider when detecting sales chasing. Setting this to +#' a narrow band at the center of the ratio distribution prevents detecting +#' false positives at the tails. +#' @param gap Default \code{0.05}. Float tuning factor. For the CDF method, it +#' sets the maximum percentage difference between two adjacent ratios. For the +#' distribution method, it sets the maximum percentage point difference +#' between the percentage of the data between the \code{bounds} in the real +#' distribution compared to the ideal distribution. #' -#' @return A logical value indicating whether or not the input ratios may +#' @return A logical value indicating whether or not the input values may #' have been sales chased. #' #' @examples @@ -42,26 +49,32 @@ #' #' # Plot to view discontinuity #' plot(stats::ecdf(normal_ratios)) -#' detect_chasing(normal_ratios) +#' is_sales_chased(normal_ratios) #' #' plot(stats::ecdf(chased_ratios)) -#' detect_chasing(chased_ratios) +#' is_sales_chased(chased_ratios) #' @export -detect_chasing <- function(ratio, method = "both", na.rm = FALSE, ...) { +is_sales_chased <- function(x, method = "both", bounds = c(0.98, 1.02), gap = 0.03, na.rm = FALSE) { # nolint end # Check that inputs are well-formed numeric vector stopifnot(exprs = { method %in% c("cdf", "dist", "both") - is.vector(ratio) - is.numeric(ratio) - !is.nan(ratio) - length(ratio) > 2 - all(is.finite(ratio) | is.na(ratio)) # All values are finite OR are NA + is.vector(x) + is.numeric(x) + !is.nan(x) + length(x) > 2 + all(is.finite(x) | is.na(x)) # All values are finite OR are NA + is.numeric(gap) + gap > 0 + gap < 1 + is.vector(bounds) + is.numeric(bounds) + bounds[2] > bounds[1] }) # Warn about small sample sizes - if (length(ratio) < 30) { + if (length(x) < 30) { warning(paste( "Sales chasing detection can be misleading when applied to small", "samples (N < 30). Increase N or use a different statistical test." @@ -69,41 +82,22 @@ detect_chasing <- function(ratio, method = "both", na.rm = FALSE, ...) { } # Can't calculate ideal distribution if ratio input contains NA, so output NA - if (any(is.na(ratio)) && !na.rm) { + if (any(is.na(x)) && !na.rm) { return(NA) } out <- switch(method, - "cdf" = detect_chasing_cdf(ratio, ...), - "dist" = detect_chasing_dist(ratio, na.rm = na.rm, ...), - "both" = detect_chasing_cdf(ratio, ...) & - detect_chasing_dist(ratio, na.rm = na.rm, ...) + "cdf" = cdf_sales_chased(x, bounds, gap), + "dist" = dist_sales_chased(x, bounds, gap, na.rm = na.rm), + "both" = cdf_sales_chased(x, bounds, gap) & + dist_sales_chased(x, bounds, gap, na.rm = na.rm) ) return(out) } -#' @describeIn detect_chasing CDF gap method for detecting sales chasing. -#' @param bounds Ratio boundaries to use for detection. The CDF method will -#' return TRUE if the CDF gap exceeding the threshold is found within these -#' bounds. The distribution method will calculate the percentage of ratios -#' within these bounds for the actual data and an ideal normal distribution. -#' Expanding these bounds increases likelihood of detection. -#' @param cdf_gap Ratios that have bunched up around a particular value -#' (typically 1) will appear as a flat spot on the CDF. The longer this flat -#' spot, the worse the potential sales chasing. This variable indicates the -#' length of that flat spot and can be thought of as the proportion of ratios -#' that have the same value. For example, 0.03 means that 3% of ratios share -#' the same value. -detect_chasing_cdf <- function(ratio, bounds = c(0.98, 1.02), cdf_gap = 0.03, ...) { # nolint - - # Check that inputs are well-formed numeric vector - stopifnot( - cdf_gap > 0 & cdf_gap < 1, is.numeric(cdf_gap), - length(bounds) == 2, is.numeric(bounds) - ) - +cdf_sales_chased <- function(ratio, bounds = c(0.98, 1.02), gap = 0.03) { # Sort the ratios AND REMOVE NAs sorted_ratio <- sort(ratio) @@ -117,19 +111,17 @@ detect_chasing_cdf <- function(ratio, bounds = c(0.98, 1.02), cdf_gap = 0.03, .. # Check if the largest different is greater than the threshold and make sure # it's within the specified boundaries diff_loc <- sorted_ratio[which.max(diffs)] - out <- max(diffs) > cdf_gap & (diff_loc > bounds[1] & diff_loc < bounds[2]) + out <- max(diffs) > gap & (diff_loc > bounds[1] & diff_loc < bounds[2]) return(out) } -#' @describeIn detect_chasing Distribution comparison method -#' for detecting sales chasing. -detect_chasing_dist <- function(ratio, bounds = c(0.98, 1.02), na.rm = FALSE, ...) { # nolint - - # Check that inputs are well-formed numeric vector - stopifnot(length(bounds) == 2, is.numeric(bounds)) - +dist_sales_chased <- function( + ratio, + bounds = c(0.98, 1.02), + gap = 0.03, + na.rm = FALSE) { # Return the percentage of x within the specified range pct_in_range <- function(x, min, max) mean(x >= min & x <= max, na.rm = na.rm) @@ -147,5 +139,5 @@ detect_chasing_dist <- function(ratio, bounds = c(0.98, 1.02), na.rm = FALSE, .. # Determine what percentage of the data is actually within the bounds pct_actual <- pct_in_range(ratio, bounds[1], bounds[2]) - return(pct_actual > pct_ideal) + return(abs(pct_actual - pct_ideal) > gap) } diff --git a/README.Rmd b/README.Rmd index 2ff176b..7af8452 100644 --- a/README.Rmd +++ b/README.Rmd @@ -44,7 +44,7 @@ renv::install("ccao-data/assessr") pak::pak("ccao-data/assessr") # Append the @ symbol for a specific version -remotes::install_github("ccao-data/assessr@0.4.4") +remotes::install_github("ccao-data/assessr@1.0.0") ``` Once it is installed, you can use it just like any other package. Simply call `library(assessr)` at the beginning of your script. diff --git a/README.md b/README.md index 6b54d96..25ce620 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ renv::install("ccao-data/assessr") pak::pak("ccao-data/assessr") # Append the @ symbol for a specific version -remotes::install_github("ccao-data/assessr@0.4.4") +remotes::install_github("ccao-data/assessr@1.0.0") ``` Once it is installed, you can use it just like any other package. Simply diff --git a/_pkgdown.yml b/_pkgdown.yml index 07d0562..30e5009 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -58,10 +58,11 @@ reference: desc: Functions for other assessment-related tasks - contents: - boot_ci - - detect_chasing + - is_sales_chased - is_outlier - title: Data desc: Sample data used for testing and demonstrations - contents: - ratios_sample + - quintos_sample diff --git a/data/quintos_sample.rda b/data/quintos_sample.rda new file mode 100644 index 0000000..6e1de34 Binary files /dev/null and b/data/quintos_sample.rda differ diff --git a/data/ratios_sample.rda b/data/ratios_sample.rda index 0d31e20..1589305 100644 Binary files a/data/ratios_sample.rda and b/data/ratios_sample.rda differ diff --git a/man/boot_ci.Rd b/man/boot_ci.Rd index a03c05f..f591022 100644 --- a/man/boot_ci.Rd +++ b/man/boot_ci.Rd @@ -4,12 +4,18 @@ \alias{boot_ci} \title{Calculate bootstrapped confidence intervals} \usage{ -boot_ci(FUN = NULL, nboot = 100, alpha = 0.05, na.rm = FALSE, ...) +boot_ci(FUN, estimate, sale_price, nboot = 1000, alpha = 0.05, na.rm = FALSE) } \arguments{ \item{FUN}{Function to bootstrap. Must return a single value.} -\item{nboot}{Default 100. Number of iterations to use to estimate +\item{estimate}{A numeric vector of estimated values. Must be the same +length as \code{sale_price}.} + +\item{sale_price}{A numeric vector of sale prices. Must be the same +length as \code{estimate}.} + +\item{nboot}{Default 1000. Number of iterations to use to estimate the output statistic confidence interval.} \item{alpha}{Default 0.05. Numeric value indicating the confidence @@ -18,8 +24,6 @@ interval to return. 0.05 will return the 95\% confidence interval.} \item{na.rm}{Default FALSE. A boolean value indicating whether or not to remove NA values. If missing values are present but not removed the function will output NA.} - -\item{...}{Named arguments passed on to \code{FUN}.} } \value{ A two-long numeric vector containing the bootstrapped confidence @@ -32,13 +36,13 @@ Calculate the non-parametric bootstrap confidence interval \examples{ # Calculate COD confidence interval -boot_ci(cod, nboot = 100, ratio = ratios_sample$ratio) +boot_ci(cod, ratios_sample$estimate, ratios_sample$sale_price, nboot = 100) # Calculate PRD confidence interval boot_ci( prd, nboot = 100, - assessed = ratios_sample$assessed, + estimate = ratios_sample$estimate, sale_price = ratios_sample$sale_price, na.rm = FALSE ) diff --git a/man/cod.Rd b/man/cod.Rd index 906bb3e..c48624a 100644 --- a/man/cod.Rd +++ b/man/cod.Rd @@ -6,22 +6,24 @@ \alias{cod_met} \title{Calculate Coefficient of Dispersion (COD)} \usage{ -cod(ratio, na.rm = FALSE) +cod(estimate, sale_price, na.rm = FALSE) -cod_ci(ratio, nboot = 100, alpha = 0.05, na.rm = FALSE) +cod_ci(estimate, sale_price, nboot = 1000, alpha = 0.05, na.rm = FALSE) cod_met(x) } \arguments{ -\item{ratio}{A numeric vector of ratios centered around 1, where the -numerator of the ratio is the estimated fair market value and the -denominator is the actual sale price.} +\item{estimate}{A numeric vector of assessed values. Must be the same +length as \code{sale_price}.} + +\item{sale_price}{A numeric vector of sale prices. Must be the same length +as \code{estimate}.} \item{na.rm}{Default FALSE. A boolean value indicating whether or not to remove NA values. If missing values are present but not removed the function will output NA.} -\item{nboot}{Default 100. Number of iterations to use to estimate +\item{nboot}{Default 1000. Number of iterations to use to estimate the output statistic confidence interval.} \item{alpha}{Default 0.05. Numeric value indicating the confidence @@ -60,10 +62,10 @@ COD is the average absolute percent deviation from the }} \examples{ # Calculate COD -cod(ratios_sample$ratio) +cod(ratios_sample$estimate, ratios_sample$sale_price) # Calculate COD confidence interval -cod_ci(ratios_sample$ratio) +cod_ci(ratios_sample$estimate, ratios_sample$sale_price) } \seealso{ Other formulas: diff --git a/man/detect_chasing.Rd b/man/detect_chasing.Rd deleted file mode 100644 index 70cd37d..0000000 --- a/man/detect_chasing.Rd +++ /dev/null @@ -1,87 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sales_chasing.R -\name{detect_chasing} -\alias{detect_chasing} -\alias{detect_chasing_cdf} -\alias{detect_chasing_dist} -\title{Detect sales chasing in a vector of sales ratios} -\usage{ -detect_chasing(ratio, method = "both", na.rm = FALSE, ...) - -detect_chasing_cdf(ratio, bounds = c(0.98, 1.02), cdf_gap = 0.03, ...) - -detect_chasing_dist(ratio, bounds = c(0.98, 1.02), na.rm = FALSE, ...) -} -\arguments{ -\item{ratio}{A numeric vector of ratios centered around 1, where the -numerator of the ratio is the estimated fair market value and the -denominator is the actual sale price.} - -\item{method}{Default "both". String indicating sales chasing detection -method. Options are \code{cdf}, \code{dist}, or \code{both}.} - -\item{na.rm}{Default FALSE. A boolean value indicating whether or not to -remove NA values. If missing values are present but not removed the -function will output NA for those values.} - -\item{...}{Named arguments passed on to methods.} - -\item{bounds}{Ratio boundaries to use for detection. The CDF method will -return TRUE if the CDF gap exceeding the threshold is found within these -bounds. The distribution method will calculate the percentage of ratios -within these bounds for the actual data and an ideal normal distribution. -Expanding these bounds increases likelihood of detection.} - -\item{cdf_gap}{Ratios that have bunched up around a particular value -(typically 1) will appear as a flat spot on the CDF. The longer this flat -spot, the worse the potential sales chasing. This variable indicates the -length of that flat spot and can be thought of as the proportion of ratios -that have the same value. For example, 0.03 means that 3% of ratios share -the same value.} -} -\value{ -A logical value indicating whether or not the input ratios may - have been sales chased. -} -\description{ -Sales chasing is when a property is selectively reappraised to - shift its assessed value toward its actual sale price. Sales chasing is - difficult to detect. This function is NOT a statistical test and does - not provide the probability of the given result. Rather, it combines two - novel methods to roughly estimate if sales chasing has occurred. - - The first method (dist) uses the technique outlined in the - \href{https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf}{IAAO Standard on Ratio Studies} - Appendix E, Section 4. It compares the percentage of real data within +-2% - of the mean ratio to the percentage of data within the same bounds given a - constructed normal distribution with the same mean and standard deviation. - The intuition here is that ratios that are sales chased may be more - "bunched up" in the center of the distribution. - - The second method (cdf) detects discontinuities in the cumulative - distribution function (CDF) of the input vector. Ratios that are not sales - chased should have a fairly smooth CDF. Discontinuous jumps in the CDF, - particularly around 1, may indicate sales chasing. This can usually be seen - visually as a "flat spot" on the CDF. -} -\section{Functions}{ -\itemize{ -\item \code{detect_chasing_cdf()}: CDF gap method for detecting sales chasing. - -\item \code{detect_chasing_dist()}: Distribution comparison method -for detecting sales chasing. - -}} -\examples{ - -# Generate fake data with normal vs chased ratios -normal_ratios <- c(rnorm(1000, 1, 0.15)) -chased_ratios <- c(rnorm(900, 1, 0.15), rep(1, 100)) - -# Plot to view discontinuity -plot(stats::ecdf(normal_ratios)) -detect_chasing(normal_ratios) - -plot(stats::ecdf(chased_ratios)) -detect_chasing(chased_ratios) -} diff --git a/man/is_outlier.Rd b/man/is_outlier.Rd index 8f21e2e..6db38ec 100644 --- a/man/is_outlier.Rd +++ b/man/is_outlier.Rd @@ -6,21 +6,19 @@ \alias{iqr_outlier} \title{Detect outlier values in a vector using IQR/quantile method} \usage{ -is_outlier(x, method = "iqr", ...) +is_outlier(x, method = "iqr", probs = c(0.05, 0.95), mult = 3) -quantile_outlier(x, probs = c(0.05, 0.95), ...) +quantile_outlier(x, probs = c(0.05, 0.95)) -iqr_outlier(x, mult = 3, ...) +iqr_outlier(x, mult = 3) } \arguments{ -\item{x}{A numeric vector. Must be longer than 2 and not contain -\code{Inf} or \code{NaN}.} +\item{x}{A numeric vector, typically sales ratios. Must be longer than 2 and +cannot contain \code{Inf} or \code{NaN}.} -\item{method}{Default "iqr". String indicating outlier detection method. +\item{method}{Default \code{iqr}. String indicating outlier detection method. Options are \code{iqr} or \code{quantile}.} -\item{...}{Named arguments passed on to methods.} - \item{probs}{Upper and lower percentiles denoting outlier boundaries.} \item{mult}{Multiplier for IQR to determine outlier boundaries.} @@ -36,7 +34,7 @@ Detect outliers in a numeric vector using standard methods. outliers. As such, it is often necessary to remove outliers before performing a sales ratio study. - Standard method is to remove outliers that are 3 * IQR. Warnings are thrown + The IAAO standard method is to remove outliers that are 3 * IQR. Warnings are thrown when sample size is extremely small or when the IQR is extremely narrow. See \href{https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf}{IAAO Standard on Ratio Studies} Appendix B. Outlier Trimming Guidelines for more information. diff --git a/man/is_sales_chased.Rd b/man/is_sales_chased.Rd new file mode 100644 index 0000000..6019728 --- /dev/null +++ b/man/is_sales_chased.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sales_chasing.R +\name{is_sales_chased} +\alias{is_sales_chased} +\title{Detect sales chasing in a vector of sales ratios} +\usage{ +is_sales_chased( + x, + method = "both", + bounds = c(0.98, 1.02), + gap = 0.03, + na.rm = FALSE +) +} +\arguments{ +\item{x}{A numeric vector. Must be longer than 2 and cannot contain +\code{inf} or \code{NA} values.} + +\item{method}{Default \code{both}. String indicating sales chasing detection +method. Options are \code{cdf}, \code{dist}, or \code{both}.} + +\item{bounds}{Default \code{(0.98, 1.02)}. Lower and upper bounds of the +range of ratios to consider when detecting sales chasing. Setting this to +a narrow band at the center of the ratio distribution prevents detecting +false positives at the tails.} + +\item{gap}{Default \code{0.05}. Float tuning factor. For the CDF method, it +sets the maximum percentage difference between two adjacent ratios. For the +distribution method, it sets the maximum percentage point difference +between the percentage of the data between the \code{bounds} in the real +distribution compared to the ideal distribution.} + +\item{na.rm}{Default FALSE. A boolean value indicating whether or not to +remove NA values. If missing values are present but not removed the +function will output NA for those values.} +} +\value{ +A logical value indicating whether or not the input values may + have been sales chased. +} +\description{ +Sales chasing is when a property is selectively reappraised to + shift its assessed value toward its recent sale price. Sales chasing is + difficult to detect. This function is NOT a statistical test and does + not provide the probability of the given result. Rather, it combines two + heuristic methods to roughly estimate if sales chasing has occurred. + + The first method (cdf) detects discontinuities in the cumulative + distribution function (CDF) of the input vector. Ratios that are not sales + chased should have a fairly smooth CDF. Discontinuous jumps in the CDF, + particularly around 1, may indicate sales chasing. This can usually be seen + visually as a "flat spot" on the CDF. + + The second method (dist) uses the technique outlined in the + \href{https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf}{IAAO Standard on Ratio Studies} + Appendix E, Section 4. It compares the percentage of real data within +-2% + of the mean ratio to the percentage of data within the same bounds given a + constructed normal distribution with the same mean and standard deviation. + The intuition here is that ratios that are sales chased may be more + "bunched up" in the center of the distribution. +} +\examples{ + +# Generate fake data with normal vs chased ratios +normal_ratios <- c(rnorm(1000, 1, 0.15)) +chased_ratios <- c(rnorm(900, 1, 0.15), rep(1, 100)) + +# Plot to view discontinuity +plot(stats::ecdf(normal_ratios)) +is_sales_chased(normal_ratios) + +plot(stats::ecdf(chased_ratios)) +is_sales_chased(chased_ratios) +} diff --git a/man/mki_ki.Rd b/man/mki_ki.Rd index e83f3d1..cf1318e 100644 --- a/man/mki_ki.Rd +++ b/man/mki_ki.Rd @@ -6,18 +6,18 @@ \alias{mki_met} \title{Calculate Kakwani and Modified Kakwani Index} \usage{ -mki(assessed, sale_price, na.rm = FALSE) +mki(estimate, sale_price, na.rm = FALSE) -ki(assessed, sale_price, na.rm = FALSE) +ki(estimate, sale_price, na.rm = FALSE) mki_met(x) } \arguments{ -\item{assessed}{A numeric vector of assessed values. Must be the same +\item{estimate}{A numeric vector of assessed values. Must be the same length as \code{sale_price}.} \item{sale_price}{A numeric vector of sale prices. Must be the same length -as \code{assessed}.} +as \code{estimate}.} \item{na.rm}{Default FALSE. A boolean value indicating whether or not to remove NA values. If missing values are present but not removed the @@ -65,10 +65,10 @@ input vectors. }} \examples{ # Calculate MKI -mki(ratios_sample$assessed, ratios_sample$sale_price) +mki(ratios_sample$estimate, ratios_sample$sale_price) # Calculate KI -ki(ratios_sample$assessed, ratios_sample$sale_price) +ki(ratios_sample$estimate, ratios_sample$sale_price) } \references{ Quintos, C. (2020). A Gini measure for vertical equity in property diff --git a/man/prb.Rd b/man/prb.Rd index 1dbe5f7..6b8e785 100644 --- a/man/prb.Rd +++ b/man/prb.Rd @@ -6,18 +6,18 @@ \alias{prb_met} \title{Calculate Coefficient of Price-Related Bias (PRB)} \usage{ -prb(assessed, sale_price, na.rm = FALSE) +prb(estimate, sale_price, na.rm = FALSE) -prb_ci(assessed, sale_price, alpha = 0.05, na.rm = FALSE) +prb_ci(estimate, sale_price, alpha = 0.05, na.rm = FALSE) prb_met(x) } \arguments{ -\item{assessed}{A numeric vector of assessed values. Must be the same +\item{estimate}{A numeric vector of assessed values. Must be the same length as \code{sale_price}.} \item{sale_price}{A numeric vector of sale prices. Must be the same length -as \code{assessed}.} +as \code{estimate}.} \item{na.rm}{Default FALSE. A boolean value indicating whether or not to remove NA values. If missing values are present but not removed the @@ -31,7 +31,7 @@ against IAAO/Quintos standards.} } \description{ PRB is an index of vertical equity that quantifies the - relationship between ratios and assessed values as a percentage. In + relationship betweem ratios and assessed values as a percentage. In concrete terms, a PRB of 0.02 indicates that, on average, ratios increase by 2\% whenever assessed values increase by 100 percent. @@ -56,10 +56,10 @@ input vectors. }} \examples{ # Calculate PRB -prb(ratios_sample$assessed, ratios_sample$sale_price) +prb(ratios_sample$estimate, ratios_sample$sale_price) # Calculate PRD confidence interval -prb_ci(ratios_sample$assessed, ratios_sample$sale_price) +prb_ci(ratios_sample$estimate, ratios_sample$sale_price) } \seealso{ Other formulas: diff --git a/man/prd.Rd b/man/prd.Rd index 4e2d2f4..23ae09f 100644 --- a/man/prd.Rd +++ b/man/prd.Rd @@ -6,24 +6,24 @@ \alias{prd_met} \title{Calculate Price-Related Differential (PRD)} \usage{ -prd(assessed, sale_price, na.rm = FALSE) +prd(estimate, sale_price, na.rm = FALSE) -prd_ci(assessed, sale_price, nboot = 100, alpha = 0.05, na.rm = FALSE) +prd_ci(estimate, sale_price, nboot = 1000, alpha = 0.05, na.rm = FALSE) prd_met(x) } \arguments{ -\item{assessed}{A numeric vector of assessed values. Must be the same +\item{estimate}{A numeric vector of assessed values. Must be the same length as \code{sale_price}.} \item{sale_price}{A numeric vector of sale prices. Must be the same length -as \code{assessed}.} +as \code{estimate}.} \item{na.rm}{Default FALSE. A boolean value indicating whether or not to remove NA values. If missing values are present but not removed the function will output NA.} -\item{nboot}{Default 100. Number of iterations to use to estimate +\item{nboot}{Default 1000. Number of iterations to use to estimate the output statistic confidence interval.} \item{alpha}{Default 0.05. Numeric value indicating the confidence @@ -60,10 +60,10 @@ input vectors. }} \examples{ # Calculate PRD -prd(ratios_sample$assessed, ratios_sample$sale_price) +prd(ratios_sample$estimate, ratios_sample$sale_price) # Calculate PRD confidence interval -prd_ci(ratios_sample$assessed, ratios_sample$sale_price) +prd_ci(ratios_sample$estimate, ratios_sample$sale_price) } \seealso{ Other formulas: diff --git a/man/quintos_sample.Rd b/man/quintos_sample.Rd new file mode 100644 index 0000000..c089060 --- /dev/null +++ b/man/quintos_sample.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{quintos_sample} +\alias{quintos_sample} +\title{Sample of sales and estimated market values provided by Quintos in the + following MKI papers:} +\format{ +A data frame with 30 observation and 2 variables: +\describe{ + \item{estimate}{Assessed fair market value} + \item{sale_price}{Recorded sale price of this property} +} +} +\usage{ +quintos_sample +} +\description{ +Sample of sales and estimated market values provided by Quintos in the + following MKI papers: +} +\references{ +Quintos, C. (2020). A Gini measure for vertical equity in property +assessments. + +Quintos, C. (2021). A Gini decomposition of the sources of inequality in +property assessments. +} +\keyword{datasets} diff --git a/man/ratios_sample.Rd b/man/ratios_sample.Rd index 9cacd96..ea2e185 100644 --- a/man/ratios_sample.Rd +++ b/man/ratios_sample.Rd @@ -5,12 +5,11 @@ \alias{ratios_sample} \title{Sample of ratio and sales data pulled from CCAO records} \format{ -A data frame with 979 observation and 4 variables: +A data frame with 979 observation and 3 variables: \describe{ - \item{assessed}{The fair market assessed value predicted by CCAO assessment + \item{estimate}{The fair market assessed value predicted by CCAO assessment models, including any successful appeals} \item{sale_price}{The recorded sale price of this property} - \item{ratio}{Sales ratio representing fair market value / sale price} \item{town}{Township name the property is in} } } diff --git a/tests/testthat/data/mki_ki_data.csv b/tests/testthat/data/mki_ki_data.csv deleted file mode 100644 index dc99e5e..0000000 --- a/tests/testthat/data/mki_ki_data.csv +++ /dev/null @@ -1,31 +0,0 @@ -"","Sale_Price","Assessed" -"1",32900,37299 -"2",36000,40166 -"3",54000,56317 -"4",64500,66184 -"5",68000,69487 -"6",70000,71515 -"7",74000,75338 -"8",80000,81036 -"9",84900,85673 -"10",89000,85021 -"11",94250,90046 -"12",99000,94089 -"13",105900,100227 -"14",109000,103157 -"15",115000,108290 -"16",124500,117099 -"17",129900,115347 -"18",135000,119678 -"19",149000,131631 -"20",155800,137321 -"21",163500,143974 -"22",175000,153572 -"23",179000,148457 -"24",185600,153488 -"25",199900,165040 -"26",215000,176940 -"27",235000,192959 -"28",250000,180046 -"29",279000,200240 -"30",295000,211445 diff --git a/tests/testthat/test-ci.R b/tests/testthat/test-ci.R index eecceae..4244ee4 100644 --- a/tests/testthat/test-ci.R +++ b/tests/testthat/test-ci.R @@ -4,18 +4,17 @@ context("load testing data") data("ratios_sample") # Extract the components of the dataframe as vectors -ratio <- ratios_sample$ratio sale_price <- ratios_sample$sale_price -assessed <- ratios_sample$assessed +estimate <- ratios_sample$estimate ##### TEST COD CI ##### context("test cod_ci function") -# Calculate PRB CI -cod_ci_out_95 <- cod_ci(ratio, nboot = 1000) -cod_ci_out_80 <- cod_ci(ratio, nboot = 1000, alpha = 0.2) +# Calculate COD CI +cod_ci_out_95 <- cod_ci(estimate, sale_price, nboot = 1000) +cod_ci_out_80 <- cod_ci(estimate, sale_price, nboot = 1000, alpha = 0.2) test_that("returns expected type", { expect_type(cod_ci_out_95, "double") @@ -30,21 +29,22 @@ test_that("output equal to expected", { test_that("bad input data stops execution", { expect_error(cod_ci(numeric(0))) - expect_error(cod_ci(numeric(10))) - expect_error(cod_ci(c(ratio, Inf))) - expect_error(cod_ci(data.frame(ratio))) - expect_error(cod_ci(c(ratio, NaN))) - expect_error(cod_ci(c(ratio, "2"))) - expect_error(cod_ci(ratio, na.rm = "yes")) + expect_error(cod_ci(numeric(10), numeric(10))) + expect_error(cod_ci(c(estimate, Inf), c(sale_price, 0))) + expect_error(cod_ci(estimate, c(sale_price, 10e5))) + expect_error(cod_ci(data.frame(estimate), sale_price)) + expect_error(cod_ci(c(estimate, NaN), c(sale_price, 1))) + expect_error(cod_ci(c(estimate, "2"), c(sale_price, 1))) + expect_error(cod_ci(estimate, sale_price, na.rm = "yes")) }) test_that("incomplete data returns NAs unless removed", { expect_equal( - cod_ci(c(ratio, NA)), + cod_ci(c(estimate, NA), c(sale_price, 10e5)), NA_real_ ) expect_equivalent( - cod_ci(c(ratio, NA), nboot = 1000, na.rm = TRUE), + cod_ci(c(estimate, NA), c(sale_price, 10e5), na.rm = TRUE), c(16.49595, 18.84529), tolerance = 0.04 ) @@ -54,9 +54,9 @@ test_that("incomplete data returns NAs unless removed", { ##### TEST PRD CI ##### context("test prb_ci function") -# Calculate PRB CI -prd_ci_out_95 <- prd_ci(assessed, sale_price, nboot = 1000) -prd_ci_out_80 <- prd_ci(assessed, sale_price, nboot = 1000, alpha = 0.2) +# Calculate PRD CI +prd_ci_out_95 <- prd_ci(estimate, sale_price, nboot = 1000) +prd_ci_out_80 <- prd_ci(estimate, sale_price, nboot = 1000, alpha = 0.2) test_that("returns expected type", { expect_type(prd_ci_out_95, "double") @@ -72,21 +72,21 @@ test_that("output equal to expected", { test_that("bad input data stops execution", { expect_error(prd_ci(numeric(0))) expect_error(prd_ci(numeric(10), numeric(10))) - expect_error(prd_ci(c(assessed, Inf), c(sale_price, 0))) - expect_error(prd_ci(assessed, c(sale_price, 10e5))) - expect_error(prd_ci(data.frame(assessed), sale_price)) - expect_error(prd_ci(c(assessed, NaN), c(sale_price, 1))) - expect_error(prd_ci(c(assessed, "2"), c(sale_price, 1))) - expect_error(prd_ci(assessed, sale_price, na.rm = "yes")) + expect_error(prd_ci(c(estimate, Inf), c(sale_price, 0))) + expect_error(prd_ci(estimate, c(sale_price, 10e5))) + expect_error(prd_ci(data.frame(estimate), sale_price)) + expect_error(prd_ci(c(estimate, NaN), c(sale_price, 1))) + expect_error(prd_ci(c(estimate, "2"), c(sale_price, 1))) + expect_error(prd_ci(estimate, sale_price, na.rm = "yes")) }) test_that("incomplete data returns NAs unless removed", { expect_equal( - prd_ci(c(assessed, NA), c(sale_price, 10e5)), + prd_ci(c(estimate, NA), c(sale_price, 10e5)), NA_real_ ) expect_equivalent( - prd_ci(c(assessed, NA), c(sale_price, 10e5), na.rm = TRUE), + prd_ci(c(estimate, NA), c(sale_price, 10e5), na.rm = TRUE), c(1.034447, 1.062625), tolerance = 0.04 ) @@ -98,8 +98,8 @@ test_that("incomplete data returns NAs unless removed", { context("test prb_ci function") # Calculate PRB CI -prb_ci_out_95 <- prb_ci(assessed, sale_price) -prb_ci_out_80 <- prb_ci(assessed, sale_price, alpha = 0.2) +prb_ci_out_95 <- prb_ci(estimate, sale_price) +prb_ci_out_80 <- prb_ci(estimate, sale_price, alpha = 0.2) test_that("returns expected type", { expect_type(prb_ci_out_95, "double") @@ -115,21 +115,21 @@ test_that("output equal to expected", { test_that("bad input data stops execution", { expect_error(prb_ci(numeric(0))) expect_error(prb_ci(numeric(10), numeric(10))) - expect_error(prb_ci(c(assessed, Inf), c(sale_price, 0))) - expect_error(prb_ci(assessed, c(sale_price, 10e5))) - expect_error(prb_ci(data.frame(assessed), sale_price)) - expect_error(prb_ci(c(assessed, NaN), c(sale_price, 1))) - expect_error(prb_ci(c(assessed, "2"), c(sale_price, 1))) - expect_error(prb_ci(assessed, sale_price, na.rm = "yes")) + expect_error(prb_ci(c(estimate, Inf), c(sale_price, 0))) + expect_error(prb_ci(estimate, c(sale_price, 10e5))) + expect_error(prb_ci(data.frame(estimate), sale_price)) + expect_error(prb_ci(c(estimate, NaN), c(sale_price, 1))) + expect_error(prb_ci(c(estimate, "2"), c(sale_price, 1))) + expect_error(prb_ci(estimate, sale_price, na.rm = "yes")) }) test_that("incomplete data returns NAs unless removed", { expect_equal( - prb_ci(c(assessed, NA), c(sale_price, 10e5)), + prb_ci(c(estimate, NA), c(sale_price, 10e5)), NA_real_ ) expect_equivalent( - prb_ci(c(assessed, NA), c(sale_price, 10e5), na.rm = TRUE), + prb_ci(c(estimate, NA), c(sale_price, 10e5), na.rm = TRUE), c(-0.01404379, 0.01899536), tolerance = 0.04 ) diff --git a/tests/testthat/test-formulas.R b/tests/testthat/test-formulas.R index 99b1b06..80400df 100644 --- a/tests/testthat/test-formulas.R +++ b/tests/testthat/test-formulas.R @@ -1,20 +1,16 @@ context("load testing data") -# Load the ratios sample dataset for testing +# Load the sample datasets for testing data("ratios_sample") +data("quintos_sample") # Extract the components of the dataframe as vectors -ratio <- ratios_sample$ratio sale_price <- ratios_sample$sale_price -assessed <- ratios_sample$assessed +estimate <- ratios_sample$estimate # Load example data from Quintos article -mki_ki_data <- read.csv( - rprojroot::find_testthat_root_file("data/mki_ki_data.csv") -) - -mki_ki_assessed <- mki_ki_data$Assessed -mki_ki_sale_price <- mki_ki_data$Sale_Price +mki_ki_estimate <- quintos_sample$estimate +mki_ki_sale_price <- quintos_sample$sale_price @@ -22,7 +18,7 @@ mki_ki_sale_price <- mki_ki_data$Sale_Price context("test cod function") # Calculate COD -cod_out <- cod(ratio) +cod_out <- cod(estimate, sale_price) test_that("returns numeric vector", { expect_type(cod_out, "double") @@ -35,17 +31,22 @@ test_that("output equal to expected", { test_that("bad input data stops execution", { expect_error(cod(numeric(0))) - expect_error(cod(numeric(10))) - expect_error(cod(c(cod_out, Inf))) - expect_error(cod(data.frame(ratio))) - expect_error(cod(c(ratio, NaN))) - expect_error(cod(c(ratio, "2"))) - expect_error(cod(ratio, na.rm = "yes")) + expect_error(cod(numeric(10), numeric(10))) + expect_error(cod(c(cod_out, Inf), c(prb_out, 0))) + expect_error(cod(estimate, c(sale_price, 10e5))) + expect_error(cod(data.frame(estimate), sale_price)) + expect_error(cod(c(estimate, NaN), c(sale_price, 1))) + expect_error(cod(c(estimate, "2"), c(sale_price, 1))) + expect_error(cod(estimate, sale_price, na.rm = "yes")) }) test_that("incomplete data returns NAs unless removed", { - expect_equal(cod(c(ratio, NA)), NA_real_) - expect_equal(cod(c(ratio, NA), na.rm = TRUE), 17.81457, tolerance = 0.02) + expect_equal(cod(c(estimate, NA), c(sale_price, 10e5)), NA_real_) + expect_equal( + cod(c(estimate, NA), c(sale_price, 10e5), na.rm = TRUE), + 17.81457, + tolerance = 0.02 + ) }) test_that("standard met function", { @@ -58,7 +59,7 @@ test_that("standard met function", { context("test prd function") # Calculate PRD -prd_out <- prd(assessed, sale_price) +prd_out <- prd(estimate, sale_price) test_that("returns numeric vector", { expect_type(prd_out, "double") @@ -73,20 +74,20 @@ test_that("bad input data stops execution", { expect_error(prd(numeric(0))) expect_error(prd(numeric(10), numeric(10))) expect_error(prd(c(prd_out, Inf), c(prb_out, 0))) - expect_error(prd(assessed, c(sale_price, 10e5))) - expect_error(prd(data.frame(assessed), sale_price)) - expect_error(prd(c(assessed, NaN), c(sale_price, 1))) - expect_error(prd(c(assessed, "2"), c(sale_price, 1))) - expect_error(prd(assessed, sale_price, na.rm = "yes")) + expect_error(prd(estimate, c(sale_price, 10e5))) + expect_error(prd(data.frame(estimate), sale_price)) + expect_error(prd(c(estimate, NaN), c(sale_price, 1))) + expect_error(prd(c(estimate, "2"), c(sale_price, 1))) + expect_error(prd(estimate, sale_price, na.rm = "yes")) }) test_that("incomplete data returns NAs unless removed", { expect_equal( - prd(c(assessed, NA), c(sale_price, 10e5)), + prd(c(estimate, NA), c(sale_price, 10e5)), NA_real_ ) expect_equal( - prd(c(assessed, NA), c(sale_price, 10e5), na.rm = TRUE), + prd(c(estimate, NA), c(sale_price, 10e5), na.rm = TRUE), 1.048419, tolerance = 0.02 ) @@ -102,7 +103,7 @@ test_that("standard met function", { context("test prb function") # Calculate PRB -prb_out <- prb(assessed, sale_price) +prb_out <- prb(estimate, sale_price) test_that("returns expected type", { expect_type(prb_out, "double") @@ -117,20 +118,20 @@ test_that("bad input data stops execution", { expect_error(prb(numeric(0))) expect_error(prb(numeric(10), numeric(10))) expect_error(prb(c(prb_out, Inf), c(prb_out, 0))) - expect_error(prb(assessed, c(sale_price, 10e5))) - expect_error(prb(data.frame(assessed), sale_price)) - expect_error(prb(c(assessed, NaN), c(sale_price, 1))) - expect_error(prb(c(assessed, "2"), c(sale_price, 1))) - expect_error(prb(assessed, sale_price, na.rm = "yes")) + expect_error(prb(estimate, c(sale_price, 10e5))) + expect_error(prb(data.frame(estimate), sale_price)) + expect_error(prb(c(estimate, NaN), c(sale_price, 1))) + expect_error(prb(c(estimate, "2"), c(sale_price, 1))) + expect_error(prb(estimate, sale_price, na.rm = "yes")) }) test_that("incomplete data returns NAs unless removed", { expect_equal( - prb(c(assessed, NA), c(sale_price, 10e5)), + prb(c(estimate, NA), c(sale_price, 10e5)), NA_real_ ) expect_equal( - prb(c(assessed, NA), c(sale_price, 10e5), na.rm = TRUE), + prb(c(estimate, NA), c(sale_price, 10e5), na.rm = TRUE), 0.0024757, tolerance = 0.02 ) @@ -146,7 +147,7 @@ test_that("standard met function", { context("test mki function") # Calculate MKI -mki_out <- mki(mki_ki_assessed, mki_ki_sale_price) +mki_out <- mki(mki_ki_estimate, mki_ki_sale_price) test_that("returns expected type", { expect_type(mki_out, "double") @@ -160,21 +161,21 @@ test_that("output equal to expected", { test_that("bad input data stops execution", { expect_error(mki(numeric(0))) expect_error(mki(numeric(10), numeric(10))) - expect_error(mki(c(mki_ki_assessed, Inf), c(mki_ki_sale_price, 0))) - expect_error(mki(mki_ki_assessed, c(mki_ki_sale_price, 10e5))) - expect_error(mki(data.frame(mki_ki_assessed), mki_ki_sale_price)) - expect_error(mki(c(mki_ki_assessed, NaN), c(mki_ki_sale_price, 1))) - expect_error(mki(c(mki_ki_assessed, "2"), c(mki_ki_sale_price, 1))) - expect_error(mki(mki_ki_assessed, mki_ki_sale_price, na.rm = "yes")) + expect_error(mki(c(mki_ki_estimate, Inf), c(mki_ki_sale_price, 0))) + expect_error(mki(mki_ki_estimate, c(mki_ki_sale_price, 10e5))) + expect_error(mki(data.frame(mki_ki_estimate), mki_ki_sale_price)) + expect_error(mki(c(mki_ki_estimate, NaN), c(mki_ki_sale_price, 1))) + expect_error(mki(c(mki_ki_estimate, "2"), c(mki_ki_sale_price, 1))) + expect_error(mki(mki_ki_estimate, mki_ki_sale_price, na.rm = "yes")) }) test_that("incomplete data returns NAs unless removed", { expect_equal( - mki(c(mki_ki_assessed, NA), c(mki_ki_sale_price, 10e5)), + mki(c(mki_ki_estimate, NA), c(mki_ki_sale_price, 10e5)), NA_real_ ) expect_equal( - mki(c(mki_ki_assessed, NA), c(mki_ki_sale_price, 10e5), na.rm = TRUE), + mki(c(mki_ki_estimate, NA), c(mki_ki_sale_price, 10e5), na.rm = TRUE), 0.79, tolerance = 0.01 ) @@ -190,7 +191,7 @@ test_that("standard met function", { context("test ki function") # Calculate KI -ki_out <- ki(mki_ki_assessed, mki_ki_sale_price) +ki_out <- ki(mki_ki_estimate, mki_ki_sale_price) test_that("returns expected type", { expect_type(ki_out, "double") @@ -204,21 +205,21 @@ test_that("output equal to expected", { test_that("bad input data stops execution", { expect_error(ki(numeric(0))) expect_error(ki(numeric(10), numeric(10))) - expect_error(ki(c(mki_ki_assessed, Inf), c(mki_ki_sale_price, 0))) - expect_error(ki(mki_ki_assessed, c(mki_ki_sale_price, 10e5))) - expect_error(ki(data.frame(mki_ki_assessed), mki_ki_sale_price)) - expect_error(ki(c(mki_ki_assessed, NaN), c(mki_ki_sale_price, 1))) - expect_error(ki(c(mki_ki_assessed, "2"), c(mki_ki_sale_price, 1))) - expect_error(ki(mki_ki_assessed, mki_ki_sale_price, na.rm = "yes")) + expect_error(ki(c(mki_ki_estimate, Inf), c(mki_ki_sale_price, 0))) + expect_error(ki(mki_ki_estimate, c(mki_ki_sale_price, 10e5))) + expect_error(ki(data.frame(mki_ki_estimate), mki_ki_sale_price)) + expect_error(ki(c(mki_ki_estimate, NaN), c(mki_ki_sale_price, 1))) + expect_error(ki(c(mki_ki_estimate, "2"), c(mki_ki_sale_price, 1))) + expect_error(ki(mki_ki_estimate, mki_ki_sale_price, na.rm = "yes")) }) test_that("incomplete data returns NAs unless removed", { expect_equal( - ki(c(mki_ki_assessed, NA), c(mki_ki_sale_price, 10e5)), + ki(c(mki_ki_estimate, NA), c(mki_ki_sale_price, 10e5)), NA_real_ ) expect_equal( - ki(c(mki_ki_assessed, NA), c(mki_ki_sale_price, 10e5), na.rm = TRUE), + ki(c(mki_ki_estimate, NA), c(mki_ki_sale_price, 10e5), na.rm = TRUE), -0.0595, tolerance = 0.003 ) diff --git a/tests/testthat/test-sale_chasing.R b/tests/testthat/test-sale_chasing.R index 7ac15ff..ae50b32 100644 --- a/tests/testthat/test-sale_chasing.R +++ b/tests/testthat/test-sale_chasing.R @@ -4,19 +4,19 @@ context("load testing data") data("ratios_sample") # Extract the components of the dataframe as vectors -sample_ratios <- ratios_sample$ratio +sample_ratios <- ratios_sample$estimate / ratios_sample$sale_price normal_ratios <- c(rnorm(1000, 1, 0.15)) chased_ratios <- c(rnorm(900, 1, 0.15), rep(1, 100)) ##### TEST CHASING DETECTION ##### -context("test detect_chashing function") +context("test is_sales_chased function") # Run detection -sample_out <- detect_chasing(sample_ratios) -normal_out <- detect_chasing(normal_ratios) -chased_out <- detect_chasing(chased_ratios) +sample_out <- is_sales_chased(sample_ratios) +normal_out <- is_sales_chased(normal_ratios) +chased_out <- is_sales_chased(chased_ratios) test_that("returns logical value", { expect_type(sample_out, "logical") @@ -31,20 +31,20 @@ test_that("output equal to expected", { }) test_that("bad input data stops execution", { - expect_error(detect_chasing(numeric(0))) - expect_error(detect_chasing(c(sample_ratios, Inf))) - expect_error(detect_chasing(data.frame(sample_ratios))) - expect_error(detect_chasing(c(sample_ratios, NaN))) - expect_error(detect_chasing(c(sample_ratios, "2"))) - expect_error(detect_chasing(sample_ratios, na.rm = "yes")) + expect_error(is_sales_chased(numeric(0))) + expect_error(is_sales_chased(c(sample_ratios, Inf))) + expect_error(is_sales_chased(data.frame(sample_ratios))) + expect_error(is_sales_chased(c(sample_ratios, NaN))) + expect_error(is_sales_chased(c(sample_ratios, "2"))) + expect_error(is_sales_chased(sample_ratios, na.rm = "yes")) }) test_that("incomplete data returns NAs unless removed", { - expect_equal(detect_chasing(c(sample_ratios, NA)), NA) - expect_false(detect_chasing(c(sample_ratios, NA), na.rm = TRUE)) - expect_true(detect_chasing(c(chased_ratios, NA), na.rm = TRUE)) + expect_equal(is_sales_chased(c(sample_ratios, NA)), NA) + expect_false(is_sales_chased(c(sample_ratios, NA), na.rm = TRUE)) + expect_true(is_sales_chased(c(chased_ratios, NA), na.rm = TRUE)) }) test_that("warnings thrown when expected", { - expect_warning(detect_chasing(rnorm(29))) + expect_warning(is_sales_chased(rnorm(29))) }) diff --git a/vignettes/example-ratio-study.Rmd b/vignettes/example-ratio-study.Rmd index 7e2a670..4b8aeff 100644 --- a/vignettes/example-ratio-study.Rmd +++ b/vignettes/example-ratio-study.Rmd @@ -163,10 +163,10 @@ combined <- combined %>% pivot_longer( mailed_tot:board_tot, names_to = "stage", - values_to = "assessed" + values_to = "estimate" ) %>% - mutate_at(vars(sale_price, assessed), as.numeric) %>% - mutate(ratio = (assessed * 10) / sale_price) + mutate_at(vars(sale_price, estimate), as.numeric) %>% + mutate(ratio = (estimate * 10) / sale_price) ``` ### Sales ratio statistics by township @@ -177,19 +177,19 @@ Cook County has jurisdictions called townships that are important units for asse # For each town and stage, calculate COD, PRD, and PRB, and their respective # confidence intervals then arrange by town name and stage of assessment combined %>% - filter(assessed > 0) %>% + filter(estimate > 0) %>% group_by(township_name, stage) %>% summarise( n = n(), - cod = cod(ratio, na.rm = TRUE), + cod = cod(estimate, sale_price, na.rm = TRUE), cod_ci = paste( - round(cod_ci(ratio, nboot = 1000, na.rm = TRUE), 3), + round(cod_ci(estimate, sale_price, nboot = 1000, na.rm = TRUE), 3), collapse = ", " ), cod_met = cod_met(cod), - prb = prb(assessed, sale_price, na.rm = TRUE), + prb = prb(estimate, sale_price, na.rm = TRUE), prb_ci = paste( - round(prb_ci(assessed, sale_price, na.rm = TRUE), 3), + round(prb_ci(estimate, sale_price, na.rm = TRUE), 3), collapse = ", " ), prb_met = prb_met(prb) @@ -276,20 +276,20 @@ Using the ordered data, you can plot the classic [Lorenz curve](https://en.wikip ```{r} gini_data <- combined %>% - select(sale_price, assessed) %>% + select(sale_price, estimate) %>% arrange(sale_price) sale_price <- gini_data$sale_price -assessed <- gini_data$assessed +estimate <- gini_data$estimate lorenz_data_price <- data.frame( pct = c(0, cumsum(sale_price) / sum(sale_price)), cum_pct = c(0, seq_along(sale_price)) / length(sale_price) ) -lorenz_data_assessed <- data.frame( - pct = c(0, cumsum(assessed) / sum(assessed)), - cum_pct = c(0, seq_along(assessed)) / length(assessed) +lorenz_data_estimate <- data.frame( + pct = c(0, cumsum(estimate) / sum(estimate)), + cum_pct = c(0, seq_along(estimate)) / length(estimate) ) ggplot() + @@ -298,7 +298,7 @@ ggplot() + aes(x = cum_pct, y = pct), color = "blue" ) + geom_line( - data = lorenz_data_assessed, + data = lorenz_data_estimate, aes(x = cum_pct, y = pct), color = "red" ) + geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "green") + @@ -337,27 +337,27 @@ To translate these curves to a single metric, the Kakwani Index (KI) and Modifie - **Modified Kakwani Index:** `Assessed Gini / Sale Price Gini` ```{r} -# Calculate the sum of the n elements of the assessed vector -n <- length(assessed) -g_assessed <- sum(assessed * seq_len(n)) +# Calculate the sum of the n elements of the estimate vector +n <- length(estimate) +g_estimate <- sum(estimate * seq_len(n)) # Compute the Gini coefficient based on the previously calculated sum # and the increasing sum of all elements in the assessed vector -g_assessed <- 2 * g_assessed / sum(assessed) - (n + 1L) +g_estimate <- 2 * g_estimate / sum(estimate) - (n + 1L) # Normalize the Gini coefficient by dividing it by n. -gini_assessed <- g_assessed / n +gini_estimate <- g_estimate / n # Follow the same process for the sale_price vector g_sale <- sum(sale_price * seq_len(n)) g_sale <- 2 * g_sale / sum(sale_price) - (n + 1L) gini_sale <- g_sale / n -MKI <- round(gini_assessed / gini_sale, 4) -KI <- round(gini_assessed - gini_sale, 4) +MKI <- round(gini_estimate / gini_sale, 4) +KI <- round(gini_estimate - gini_sale, 4) ``` -The output for the Modified Kakwani Index is `r MKI`, and the Kakwani Index is `r KI`. According to the following table, this means that the assessments are slightly regressive. +The output for the Modified Kakwani Index is `r MKI`, and the Kakwani Index is `r KI`. According to the following table, this means that the assessments are slightly regressive. | KI Range | MKI Range | Interpretation | |:-------------------|:-------------------|:-------------------| @@ -388,8 +388,8 @@ ggplot() + ```{r} # Detect chasing for each vector tibble( - "Blue Chased?" = detect_chasing(normal_ratios), - "Red Chased?" = detect_chasing(chased_ratios) + "Blue Chased?" = is_sales_chased(normal_ratios), + "Red Chased?" = is_sales_chased(chased_ratios) ) %>% kable(format = "markdown", digits = 3) ```