diff --git a/.RData b/.RData deleted file mode 100644 index c18bcffd3..000000000 Binary files a/.RData and /dev/null differ diff --git a/.Rbuildignore b/.Rbuildignore index 38a2c484d..d82f4d649 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -27,4 +27,8 @@ Spreadsheets ^tic\.R$ rcppExports.cpp stanExports_* -^data-raw$ \ No newline at end of file +^data-raw$ +version_lookup.R +version_lookup.csv +.gitsum +Presentations diff --git a/.gitignore b/.gitignore index 6580fc6a8..75320bd7e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,5 +19,8 @@ Spreadsheets *.h *.o *.so -docs/ +.gitsum .RData +/doc/ +/Meta/ +Presentations diff --git a/DESCRIPTION b/DESCRIPTION index 13d51e14e..e8bb066c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: DemoTools Type: Package Title: Standardize, Evaluate, and Adjust Demographic Data -Version: 01.13.39 -Date: 2021-03-23 +Version: 01.13.80 +Date: 2023-12-29 Authors@R: c( person("Tim", "Riffe", role = c("aut", "cre"), email = "tim.riffe@gmail.com", comment = c(ORCID = "0000-0002-2673-4622")), @@ -22,7 +22,7 @@ License: file LICENSE LazyLoad: yes LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.3 Depends: R (>= 3.6), Rcpp (>= 0.12.0), @@ -31,15 +31,16 @@ Suggests: testthat (>= 2.1.0), knitr, rmarkdown, + markdown, DT, ggplot2 RdMacros: Rdpack Imports: data.table (>= 1.13.6), demogR, - DemoToolsData (>= 0.1.1), + DemoToolsData (>= 0.1.5), dplyr, - fertestr (>= 0.0.5), + fertestr (>= 0.10.00), lubridate, magrittr, MortalityLaws (>= 1.7.0), @@ -48,10 +49,11 @@ Imports: rstan (>= 2.18.1), tibble, tidybayes, - ungroup + ungroup (>= 1.4.2) BugReports: https://github.com/timriffe/DemoTools/issues Remotes: - github::josehcms/fertestr, - github::timriffe/DemoToolsData + github::timriffe/fertestr, + github::timriffe/DemoToolsData, + github::timriffe/MortalityLaws Encoding: UTF-8 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index cbc1bafd5..0f09fb057 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export("%>%") export(ADM) export(AGEN) +export(HMD_old_logquad) export(ID) export(IRD) export(OPAG) @@ -71,6 +72,7 @@ export(is_age_redundant) export(is_age_sequential) export(is_single) export(loess_smth1) +export(logquad_augmented) export(lt_a_closeout) export(lt_a_pas) export(lt_a_un) @@ -161,6 +163,7 @@ importFrom(data.table,rbindlist) importFrom(data.table,setDT) importFrom(data.table,uniqueN) importFrom(demogR,cdmltw) +importFrom(dplyr,case_when) importFrom(dplyr,group_by) importFrom(dplyr,mutate) importFrom(dplyr,rename) @@ -192,3 +195,4 @@ importFrom(tibble,tibble) importFrom(tidybayes,gather_draws) importFrom(ungroup,pclm) importFrom(utils,head) +importFrom(utils,tail) diff --git a/NEWS.md b/NEWS.md index fd983f7e1..e1b87183d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,14 +1,17 @@ # Changes in this update -2018-08-15 + +## 2018-08-15 - NEWS.md file added. When a first release comes out, major functionality and features will be listed here at that time. -2019-11-28 +## 2019-11-28 - new function mig_calculate_rc() added - several function names harmonized, see ?DemoTools-renamed - - graduate_sprage() and other graduation functions no longer support matrix inputs + - graduate_sprague() and other graduation functions no longer support matrix inputs +## 2023-12-29 + - minor fixes to pass checks \ No newline at end of file diff --git a/R/AGEINT.R b/R/AGEINT.R index e875e4b16..790cc5c5d 100644 --- a/R/AGEINT.R +++ b/R/AGEINT.R @@ -89,7 +89,8 @@ interpolatePop <- #' @param datesOut vector of dates. The desired dates to interpolate to. See details for ways to express it. #' @param method string. The method to use for the interpolation, either \code{"linear"}, \code{"exponential"}, or \code{"power"}. Default \code{"linear"}. #' @param power numeric power to interpolate by, if \code{method = "power"}. Default 2. -#' @param extrap logical. In case \code{datesOut} is out of range of datesIn, do extrapolation using slope in extreme pairwise. Deafult \code{FALSE}. +#' @param extrap logical. In case \code{datesOut} is out of range of `datesIn`, do extrapolation using slope in extreme pairwise. Default \code{FALSE}. +#' @param negatives logical. In case negative output are accepted, set to \code{TRUE}. Default \code{FALSE}. #' @param ... arguments passed to \code{stats::approx}. For example, \code{rule}, which controls extrapolation behavior. #' @details The age group structure of the output is the same as that of the input. Ideally, \code{datesOut} should be within the range of \code{datesIn}. If not, the left-side and right-side output are held constant outside the range if \code{rule = 2} is passed in, otherwise \code{NA} is returned (see examples). Dates can be given in three ways 1) a \code{Date} class object, 2) an unambiguous character string in the format \code{"YYYY-MM-DD"}, or 3) as a decimal date consisting in the year plus the fraction of the year passed as of the given date. #' @@ -198,6 +199,7 @@ interp <- function(popmat, method = c("linear", "exponential", "power"), power = 2, extrap = FALSE, + negatives = FALSE, ...) { # ... args passed to stats::approx . Can give control over extrap assumptions # IW: extrap=T for extrapolate following each slope in extreme pairwise. @@ -282,10 +284,10 @@ interp <- function(popmat, int <- int ^ power } - # IW: no negatives when extrapolate. Thinking in pop and lt expressions - if(all(!is.na(int)) & any(int<0)){ - cat("Negative values were turned 0. No accepted in population counts, fertility rates or life table functions.\n") - int[int<0] <- 0 + # IW: no negatives when extrapolate. Thinking in pop and lt expressions. Inactive when explicitly are accepted negatives + if(!negatives & all(!is.na(int)) & any(int < 0)){ + cat("Negative values have been replaced with 0s.\nNegatives not accepted in population counts,\n fertility rates or life table functions.\nYou can allow negatives (e.g. interpolating net migration)\n by specifying negatives = TRUE") + int[int < 0] <- 0 } int diff --git a/R/AGESEX.R b/R/AGESEX.R index 836b4a067..3b5f5634e 100644 --- a/R/AGESEX.R +++ b/R/AGESEX.R @@ -19,7 +19,7 @@ #' It is also assumed that the final age group is open, unless \code{ageMax < max(Age)}. #' Setting \code{OAG = FALSE} will override this and potentially include \code{max(Age)} in calculations. #' The method argument determines the weighting of numerators and denominators, where the UN method puts -#' twice the numerator over the sum of the adjacent ages classes, Zelnich does thrice the +#' twice the numerator over the sum of the adjacent ages classes, Zelnik does thrice the #' numerator over the sum of the whole range from the next lowest to the next highest age, and #' Ramachandran does four times the numerator over the same sum, but with the central age #' double-counted in the numerator. diff --git a/R/MAV.R b/R/MAV.R index f57291a54..659dfca6f 100644 --- a/R/MAV.R +++ b/R/MAV.R @@ -1,22 +1,22 @@ -# Author: JG -# Edited 9-Dec-2017 by TR -# Edited Aug 2018 by TR - -############################################################################### #' Calculate the moving average (mav) over 3 or 5 years. #' @description This arithmetic smoothing technique aims to eliminate irregularities of the population pyramid by averaging values in a moving window of user-defined width. -#' @details The moving window is applied symmetrically. Data endpoints are imputed with \code{NA}s in output: the is nothing under 0 or over the highest closed age group to average with. The open age group is imputed with \code{NA} prior to calculations, since it cannot be averaged into the next lowest group. For example, for \code{n=3}, age 0 will be \code{NA}, as will the open age group and next lowest age. Age intervals are assumed uniform. This function could be used with either single or 5-year age groups. +#' @details +#' The moving window is applied symmetrically. By default (`tails = FALSE`) data endpoints are imputed with `NA`s in output: the is nothing under 0 or over the highest closed age group to average with. The open age group is not used in averaging, and it is returned as-is. Age intervals are assumed uniform. This function could be used with either single or 5-year age groups. +#' +#' If `tails` is set to `TRUE`, then tails have been imputed using moving averages with successively smaller values of `n`, the cascade method. #' @param Value numeric. A vector of demographic counts in single age groups. #' @param n integer. A single number, (often 3 or 5), indicating the number of years taken to smooth the population distribution by single ages. #' @param Age integer. A vector of ages corresponding to the lower integer bound of the counts. -#' @param OAG logical. Whether or not the top age group is open. Default \code{TRUE}. -#' @details Ages may be single or grouped, but all age intervals are assumed equal. +#' @param OAG logical. Whether or not the top age group is open. Default `TRUE`. +#' @param tails logical. If set to `TRUE`, smaller-n moving averages are applied on both tails +#' such that all values are non-NA. If `FALSE` (default), tails are set to NA +#' due to the lag of moving averages. #' @return Vector with the smoothed demographic counts. +#' #' @export -#' @author Juan Galeano #' @references #' \insertRef{GDA1981IREDA}{DemoTools} @@ -31,9 +31,16 @@ #' 323263,9535,13906,9063,8294,90459,9817,6376,8884,3773,160609) #'Age <- 0:70 #'# final age group assumed open -#'mav(Pop, n = 3, Age = Age) +#' mav(Pop, n = 3, Age = Age) +#' #'\dontrun{ -#' nwindows <- sapply(seq(3, 11, by = 2),mav, Value = Pop, Age = Age) +#' odds <- seq(3, 11, by = 2) +#' nwindows <- sapply(odds, +#' mav, +#' Value = Pop, +#' Age = Age, +#' OAG = TRUE, +#' tails = FALSE) #' cols <- gray(seq(.8, 0, length = 5)) #' lwds <- seq(3, 1, length = 5) #' plot(Age,Pop, col = "red", xlab = "Age", ylab = "The counts", pch=16, @@ -45,21 +52,92 @@ #' lwd = lwds, #' legend = paste0("n=",seq(3,11,by=2))) #'} +#' +#' # For cascading smoothing on the tails: +#' mav(Pop, Age, tails = TRUE) +#' +#'\dontrun{ +#'# Compare +#' nwindows_tails <- sapply(odds, +#' mav, +#' Value = Pop, +#' Age = Age, +#' OAG = TRUE, +#' tails = TRUE) +#' +#' colnames(nwindows) <- odds +#' colnamaes(nwindows_tails) <- odds +#' +#' # NA triangles are completed with +#' # successively smaller ns. +#' head(nwindows) +#' head(nwindows_tails) +#' +#' tail(nwindows) +#' tail(nwindows_tails) +#' } -mav <- function(Value, Age, n = 3, OAG = TRUE) { +mav <- function(Value, Age, n = 3, OAG = TRUE, tails = FALSE) { In <- Value if (missing(Age)) { Age <- as.integer(names(Value)) } + + # save OAG if (OAG) { + OrigOAGpop <- Value[length(Value)] Value[length(Value)] <- NA } - # TR: not sure why n needs to be hard coded + Out <- ma(Value, n) + # apply cascading tails if needed + if (tails){ + Out <- mav_tails(Value = Value, + Age = Age, + MavOut = Out, + n = n, + OAG = OAG) + } + # plug OAG back in + if (OAG){ + Out[length(Value)] <- OrigOAGpop + } + structure(Out, names = Age) } +# Not exported since it can be called with tails = FALSE on mav. +mav_tails <- function(Value, Age, MavOut, n = 3, OAG = TRUE) { + NewMavOut <- MavOut + + #Last should point to last age group to use + Last <- length(Age) + if (OAG) { + Last <- Last - 1 + NewMavOut[Last+1] <- Value[Last+1] + } + + NewMavOut[1] <- Value[1] + NewMavOut[Last] <- Value[Last] + + MavLev <- c(1,2,4,6,8) + + if (n >= 2) { + for(i in 2:(as.integer(n/2))) { + + # TR: why not just calculate the whole thing and once and pick out + # the two values as needed? + NewMavOut[i] <- ma(Value[1:(MavLev[i]+1)], n = MavLev[i])[i] + # subscripts right and select just the correct age + NewMavOut[Last - i + 1] <- ma(Value[(Last - MavLev[i] ):Last], + n = MavLev[i])[ i ] + + } + } + + NewMavOut +} #Pop <-c(303583,390782,523903,458546,517996,400630,485606,325423,471481,189710, diff --git a/R/OGIVE.R b/R/OGIVE.R index 4b9be88e8..48665b544 100644 --- a/R/OGIVE.R +++ b/R/OGIVE.R @@ -182,7 +182,7 @@ poly_smth1 <- #' @details LOESS (locally weighted smoothing) helps to smooth data over age, preserving the open age group if necessary. #' It is a popular tool to create a smooth line through a timeplot or scatter plot. #' Loess smoothness may be tweaked by specifying an optional \code{"span"} argument. -#' Polynomial fitting is used to mooth data over age or time fitting linear models. +#' Polynomial fitting is used to smooth data over age or time fitting linear models. #' It can be tweaked by changing the degree and by either log or power transforming. #' The open age group can be kept as-is if desired by specifying \code{OAG = TRUE}. #' May be used on any age groups, including irregularly spaced, single age, or 5-year age groups. diff --git a/R/OPAG.R b/R/OPAG.R index 047b35dd7..0516cd6ce 100644 --- a/R/OPAG.R +++ b/R/OPAG.R @@ -2,22 +2,29 @@ # [ ] All choice of Age_fit / AgeInt_fit based on criteria (see PJ Drive folder) # [ ] ensure age groups are flexible as required. # [ ] what happens when one input is in a different age group than another? -# [ ] OPAG_simple() should allow for non-single ages +# [x] OPAG_simple() should allow for non-single ages # [ ] add unit tests # [ ] check for age group commensurability and warn if necessary # [ ] add more examples to OPAG? -# [ ] remove rownames message from DownloadLx(), haha +# [ ] remove rownames message from DownloadLx(), haha I DON'T SEE THIS # [ ] test OPAG_simple() with non-single ages groups, update documentation if necessary. # [ ] harmonize args betwenn OPAG_simple and OPAG family. +# [x] make AgeInt not required +# [ ] document Age_fit better +# [x] change open age formula in warp function +# [x] fix continuous = FALSE formula/ then removed +# [x] ensure Lx is single ages once for use throughout +# [x] change default method to "mono" + # Author: tim ############################################################################### # distribute population in open age group over higher ages. # The PAS implementation uses stable populations, and it will be added -# here in the future, as well as other optiond. The main missing piece +# here in the future, as well as other options. The main missing piece # is a good collection of model lifetables. -#' redistripute an open age group count over higher ages proportional to an arbitrary standard +#' redistribute an open age group count over higher ages proportional to an arbitrary standard #' @description This method could be useful whenever a reasonable standard is available. At present the standard must be supplied by the user. #' @details In this implementation both the original population counts and the standard must be in single ages. #' @param Pop numeric vector of population counts @@ -72,7 +79,7 @@ OPAG_simple <- StPop, StAge, OAnew = max(StAge)) { - # # assume single + # # assume single NOT NEEDED See age concordance # stopifnot(is_single(Age)) # stopifnot(is_single(StAge)) # OAG can be less than or equal to max age @@ -80,13 +87,15 @@ OPAG_simple <- stopifnot(OAnew %in% StAge) # age and pop vectors must match lengths, assume ordered stopifnot(length(Pop) == length(Age)) + stopifnot(length(StPop) == length(StAge)) # age concordance - #stopifnot(all(Age %in% StAge)) + minStAge = min(StAge) + stopifnot(all(Age[Age >= minStAge] %in% StAge)) # group pop down to OAG Pop <- groupOAG(Pop, Age, OAnow) StPop <- groupOAG(StPop, StAge, OAnew) - + # even up lengths N <- length(Pop) Age <- Age[1:N] @@ -94,19 +103,19 @@ OPAG_simple <- # same for standard StN <- length(StPop) StAge <- StAge[1:StN] - - # make stadnard distribution. + + # make standard distribution. standard <- rescale_vector(StPop[StAge >= OAnow], scale = 1) # redistribute OAG PopUpper <- OAtot * standard # keep lower ages of Pop PopLower <- Pop[1:(N - 1)] - + # graft, name, and return out <- c(PopLower, PopUpper) Ageout <- sort(unique(c(Age, StAge))) names(out) <- Ageout - + out } @@ -114,16 +123,13 @@ OPAG_simple <- #' Warps a given stationary population into a stable population #' @description We take `nLx` as indicative of a stationary population age structure, #' then subject the population structure to long-term growth by a constant rate, `r`. -#' @details `nLx` could be any population structure of any scale, as long as you're comfortable +#' @details `Lx1` could be any population structure of any scale, as long as you're comfortable #' assuming it's stationary and can be warped into stable. For the oldest ages, this is probably #' quite often an acceptable and useful approximation. The transformation is applied at the single-age scale, even if the input `nLx` is in wider (e.g. abridged) age groups. When needed, we reduce to single ages using (default) `graduate_uniform()`, then apply the transformation, then group back. This is innocuous if `nLx` is given in single ages. You may want to change `method` to `"mono"` or `"pclm"`. #' -#' @param nLx numeric vector of stationary population age structure in arbitrary integer age groups -#' @param Age interger vector of lower bounds of age groups of `nLx` +#' @param Lx1 numeric vector of stationary population age structure in arbitrary integer age groups +#' @param Age_Lx1 integer vector of lower bounds of age groups of `nLx` #' @param r stable growth rate -#' @param AgeInt optional integer vector of widths of age groups, inferred if not given. -#' @param continuous logical. If `TRUE` we use the growth adjustment. `e^(-age*r)`. If `FALSE` we assume `r` is geometric growth, and we use `(1+r)^age` for the growth adjustment. -#' @param method character, graduation method used for intermediate graduation. Default `"uniform"`. Other reasonable choices include `"mono"` or `"pclm"`. #' @return numeric vector of the transformed `nLx`. Note, this vector sums to `1`. #' @export #' @examples @@ -138,47 +144,40 @@ OPAG_simple <- #' } #' -OPAG_nLx_warp_r <- function(nLx, - Age, - r, - AgeInt = NULL, - continuous = TRUE, - method = "uniform"){ - # Let's do this in single ages :-) - # for now, just uniform, but could pass in args to graduate of course - # if that is preferred. - Lx1 <- graduate(nLx, Age, method = method, constrain = TRUE) - a1 <- names2age(Lx1) - if (continuous){ - wLx <- exp(-r * (a1 + .5)) * Lx1 +OPAG_nLx_warp_r <- function(Lx1, + Age_Lx1, + r +){ + a1 <- Age_Lx1 + w1Lx <- exp(-r * (a1 + .5)) * Lx1 + nAges <- length(Age_Lx1) + + if (r == 0 ) { + w1Lx[nAges] <- Lx1[nAges] } else { - # then geometric - w <- (1 + r) ^ (a1 + .5) - wLx <- w * nLx - } - wLx <- wLx / sum(wLx) - if (is.null(AgeInt)){ - AgeInt <- age2int(Age, OAvalue = 1) + Tlast <- Lx1[nAges] + Tprev <- Tlast + Lx1[nAges-1] + # PJ's closeout + abar <- -((log(Lx1[nAges-1])-r*a1[nAges-1])-log(Tprev*exp(r)-Tlast)) / r + w1Lx[nAges] <- exp(-r * abar) * Tlast } - a12A <- rep(Age, AgeInt) - nwLx <- groupAges(wLx, Age = a1, AgeN = a12A) - nwLx + + w1Lx <- w1Lx / sum(w1Lx) + + w1Lx } #' calculates residual for optimizing growth rate r for OPAG family #' @description For a given set of age groups to fit against, and a given stable growth rate, $r$, #' what is the error implied given the current $r$ and stationary standard? -#' @details This is a utiltiy function for `OPAG()`, which needs to optimize $r$ for a +#' @details This is a utility function for `OPAG()`, which needs to optimize $r$ for a #' given population vector and stationary standard. #' @param r given stable growth rate #' @param Pop_fit numeric vector of at least two population counts to use for fitting #' @param Age_fit integer vector of lower bounds for age groups of `Pop_fit` #' @param AgeInt_fit integer vector of widths of age groups of `Pop_fit` -#' @param nLx numeric vector of stable population standard -#' @param Age_nLx integer vector of lower bounds for age groups of `nLx` -#' @param AgeInt_nLx optional integer vector of widths of age groups of `nLx`, inferred if not given. -#' @param continuous logical. If `TRUE` we use the growth adjustment. `e^(-age*r)`. If `FALSE` we assume `r` is geometric growth, and we use `(1+r)^age` for the growth adjustment. -#' @param method character. Graduation method, default `"uniform"`. `"mono"` or `"pclm"` would also be good choices. +#' @param Lx1 numeric vector of stable population standard by single ages +#' @param Age_Lx1 integer vector of lower bounds for age groups of `Lx1` #' @return numeric. A residual that you're presumably trying to minimize. #' @export @@ -188,95 +187,70 @@ OPAG_nLx_warp_r <- function(nLx, #' Age_fit <- c(70,80) #' AgeInt_fit <- c(10,10) #' nLx <- downloadnLx(NULL, "Spain","female",1971) -#' Age_nLx <- names2age(nLx) +#' # graduate(nLx, Age_nLx, method = method, constrain = TRUE) +#' Ageab <- names2age(nLx) +#' Lx1 <- graduate(c(nLx), Ageab, method = "mono", constrain = TRUE) +#' Age_Lx1 <- 0:100 #' r <- .01 #' #' OPAG_r_min(r, -#' Pop_fit, -#' Age_fit, -#' AgeInt_fit, -#' nLx, -#' Age_nLx) +#' Pop_fit = Pop_fit, +#' Age_fit = Age_fit, +#' AgeInt_fit = AgeInt_fit, +#' Lx1 = Lx1, +#' Age_Lx1 = Age_Lx1) #' #' (r_opt <- optimize(OPAG_r_min, #' Pop_fit = Pop_fit, #' Age_fit = Age_fit, #' AgeInt_fit = AgeInt_fit, -#' nLx = nLx, -#' Age_nLx = Age_nLx, +#' Lx1 = Lx1, +#' Age_Lx1 = Age_Lx1, #' interval = c(-0.05,.05))$min) #' -#' ai <- age2int(Age_nLx) -#' -#' # Note the whole age range is being scaled to 1 here, but in practice -#' # you'd only be doing this in the highest ages. If only two fitting -#' # ages are given, then we can get an r that matches them perfectly, -#' # at least within reasonable bounds. -#' \dontrun{ -#' plot(Age, nLx / sum(nLx) / ai, type = 's') -#' lines(Age,OPAG_nLx_warp_r(Lx,Age,r=r_opt)/ai,type='s',col = "red") -#' } OPAG_r_min <- function(r, - Pop_fit, Age_fit, + Pop_fit, AgeInt_fit, # necessary - nLx, - Age_nLx, - AgeInt_nLx = NULL, - continuous = TRUE, - method = "uniform"){ - if (is.null(AgeInt_nLx)){ - AgeInt_nLx <- age2int(Age_nLx, OAvalue = 1) - } - # This is the standard we want to match to Pop, + Lx1, + Age_Lx1 +){ + AgeInt_nLx <- age2int(Age_Lx1, OAvalue = 1) + + # 1) This is the standard we want to match to Pop, # which has presumably been cut down / grouped to the # ages we want to calibrate to. - wnLx <- OPAG_nLx_warp_r( - nLx = nLx, - Age = Age_nLx, - r = r, - AgeInt = AgeInt_nLx, - continuous = continuous) - - # now need to get it to the same age groups as Pop + w1Lx <- OPAG_nLx_warp_r( + Lx1 = Lx1, + Age_Lx1 = Age_Lx1, + r = r + ) + + # 2) now need to get it to the same age groups as Pop # so that we can get a residual - - # 1) Move stable pop to single ages - w1Lx <- graduate( - wnLx, - Age = Age_nLx, - AgeInt = AgeInt_nLx, - method = method) - a1t <- names2age(w1Lx) - a1t <- as.integer(a1t) - - # 2) which single ages implied by Pop? - N <- length(AgeInt_fit) - a1match <- Age_fit[1]:(max(Age_fit) + AgeInt_fit[N] - 1) - a1match <- as.integer(a1match) - - # 3) select down to just those ages: - ind <- a1t %in% a1match - w1Lx <- w1Lx[ind] - - # 4) group w1Lx to same as Pop_fit - ageN <- rep(Age_fit, times = AgeInt_fit) - stand <- groupAges(w1Lx, Age = a1match, AgeN = ageN) - - # 5) rescale standard and Pop_fit to sum to 1 - stand <- rescale_vector(stand, scale = 1) + + w1Lx_fit <- rep(NA, length(Age_fit)) + + for (i in 1:length(Age_fit)){ + ind <- Age_Lx1 >= Age_fit[i] & Age_Lx1 < (Age_fit[i] + AgeInt_fit[i]) + w1Lx_fit[i] <- sum(w1Lx[ind]) + } + + # 3) rescale standard and Pop_fit to sum to 1 + stand <- rescale_vector(w1Lx_fit, scale = 1) Pop_fit <- rescale_vector(Pop_fit, scale = 1) - - # 6) return the residual + + # 4) return the residual sum(abs(stand - Pop_fit)) } #' creates stable standard based on optimizing the growth rate #' @description The stationary standard, `nLx` is transformed into a stable standard by optimizing a growth rate, `r` such that the stable standard matches observed population counts in selected age groups. Usually the ages used for fitting are wide age groups in older ages preceding the open age group. The standard output by this function is used by `OPAG` to create the standard used to redistribute counts over older age groups up to a specified open age group, such as 100. -#' @details The arguments `method` and `continous` don't have much leverage on the result. In short, the stable population transformation is done by ungrouping `nLx` to single ages (if it isn't already), and `method` controls which graduation method is used for this, where `"uniform"`, `"mono"`, `"pclm"` are the reasonable choices at this writing. In single ages, the difference between using a geometric `r` versus continuous `r` are quite small for this task. -#' +#' @details The argument `method` don't have much leverage on the result. In short, the stable population transformation is done by ungrouping `nLx` to single ages (if it isn't already), and `method` controls which graduation method is used for this, where `"uniform"`, `"mono"`, `"pclm"` are the reasonable choices at this writing. +#' +#' #' @inheritParams OPAG_r_min #' @return #' list constaining @@ -289,71 +263,64 @@ OPAG_r_min <- function(r, #' @examples #' Pop_fit <- c(85000,37000) #' Age_fit <- c(70,80) -#' AgeInt_fit <- c(10,10) #' nLx <- downloadnLx(NULL, "Spain","female",1971) #' Age_nLx <- names2age(nLx) -#' -#' # India Males, 1991 +#' Lx1 <- graduate(nLx,Age=Age_nLx,method = "mono") +#' Age_Lx1 <- 0:100 +#' # India Males, 1971 #' Pop <- smooth_age_5(pop1m_ind, #' Age = 0:100, #' method = "Arriaga") #' Pop80 <- groupOAG(Pop, names2age(Pop), 80) #' Age <- names2age(Pop80) -#' AgeInt <- age2int(Age, OAvalue = 1) -#' -#' nLx <- downloadnLx(NULL, "India","male",1991) +#' +#' nLx <- downloadnLx(NULL, "India","male",1971) #' Age_nLx <- names2age(nLx) -#' AgeInt_nLx <- age2int(Age_nLx,OAvalue = 1) -#' +#' +#' #' Pop_fit <- groupAges(Pop80, Age, N = 10)[c("60","70")] #' Age_fit <- c(60,70) #' AgeInt_fit <- c(10,10) -#' +#' #' Standard <- OPAG_fit_stable_standard( -#' Pop_fit, -#' Age_fit, -#' AgeInt_fit, -#' nLx = nLx, -#' Age_nLx = Age_nLx, -#' AgeInt_nLx = AgeInt_nLx, -#' method = "uniform", -#' continuous = TRUE) -#' +#' Pop_fit = Pop_fit, +#' Age_fit = Age_fit, +#' AgeInt_fit = AgeInt_fit, +#' Lx1=Lx1, +#' Age_Lx1 = Age_Lx1 +#' ) +#' #' # A visual comparison: #' nL60 <- rescale_vector(nLx[Age_nLx >= 60]) -#' St60p <- rescale_vector( Standard$Standard[Age_nLx >= 60] ) +#' St60p <- rescale_vector( Standard$Standard[c(0:100) >= 60] ) #' ages_plot <- seq(60,100,by=5) #' \dontrun{ -#' plot(ages_plot,nL60, type = 'l') -#' lines(ages_plot, St60p, col = "blue") +#' plot(ages_plot,nL60, type = 'l') +#' lines(60:100, St60p, col = "blue") #' } OPAG_fit_stable_standard <- function(Pop_fit, Age_fit, AgeInt_fit, - nLx, - Age_nLx, - AgeInt_nLx, - method = "uniform", - continuous = TRUE){ - - + Lx1, + Age_Lx1 +){ + + # optimize the parameter r r_opt <- optimize(OPAG_r_min, Pop_fit = Pop_fit, Age_fit = Age_fit, AgeInt_fit = AgeInt_fit, - nLx = nLx, - Age_nLx = Age_nLx, - interval = c(-0.05, .05)) - - - standard <- OPAG_nLx_warp_r(nLx = nLx, - Age = Age_nLx, - r = r_opt$min, - AgeInt = AgeInt_nLx, - continuous = continuous, - method = method) + Lx1 = Lx1, + Age_Lx1 = Age_Lx1, + interval = c(-0.02, .05)) # changed interval + + + standard <- OPAG_nLx_warp_r(Lx1 = Lx1, + Age_Lx1 = Age_Lx1, + r = r_opt$min + ) # return both stable standard and the optimization output, # which will let us know if r is simply unreasonable or similar. out <- list(Standard = standard, @@ -373,44 +340,39 @@ OPAG_fit_stable_standard <- function(Pop_fit, #' @details It may be helpful to try more than one fitting possibility, #' and more than one `Redistribute_from` cut point, as results may vary. #' -#' The argument `"method"` refers to which graduation method (see `?graduate`) -#' is only relevant if input data are in grouped ages. This is innocuous if -#' ages are single to begin with. The choice of whether to assume -#' `continuous = TRUE` constant growth versus geometric (`FALSE`) growth -#' has little leverage. -#' #' `Redistribute_from` can be lower than your current open age group, #' and `OAnew` can be higher, as long as it is within the range of `Age_nLx`. #' If `Age_nLx` doesn't go high enough for your needs, you can extrapolate -#' it ahead of time. For this, you'd want the `nMx` the underly it, and you +#' it ahead of time. For this, you'd want the `nMx` the underlie it, and you #' can use `lt_abridged()`, specifying a higher open age, and then #' extracting `nLx` again from it. #' #' @inheritParams OPAG_r_min #' @param Pop numeric vector of population counts #' @param Age_Pop integer vector of the lower bounds of the population age groups -#' @param AgeInt_Pop integer vector of the population age group interval widths, using `Inf` for the open age group. +#' @param nLx numeric vector of stationary population age structure in arbitrary integer age groups +#' @param Age_nLx integer vector of lower bounds of age groups of `nLx` #' @param Redistribute_from integer lower age bound that forms the cutoff, above which we redistribute counts using the stable standard. #' @param OAnew integer. Desired open age group in the output (must being element of `Age_nLx`) +#' @param method character, graduation method used for intermediate graduation. Default `"mono"`. Other reasonable choices include `"pclm"` or `"uniform"`. #' @export +#' @importFrom utils tail #' @examples -#' # India Males, 1991 +#' # India Males, 1971 #' Pop <- smooth_age_5(pop1m_ind, #' Age = 0:100, #' method = "Arriaga") #' Age_Pop <- names2age(Pop) #' AgeInt_Pop <- age2int(Age_Pop, OAvalue = 1) #' -#' nLx <- downloadnLx(NULL, "India","male",1991) +#' nLx <- downloadnLx(NULL, "India","male",1971) #' Age_nLx <- names2age(nLx) #' AgeInt_nLx <- age2int(Age_nLx, OAvalue = 1) #' #' Pop_fit <- OPAG(Pop, #' Age_Pop = Age_Pop, -#' AgeInt_Pop = AgeInt_Pop, #' nLx = nLx, #' Age_nLx = Age_nLx, -#' AgeInt_nLx, #' Age_fit = c(60,70), #' AgeInt_fit = c(10,10), #' Redistribute_from = 80) @@ -428,17 +390,15 @@ OPAG_fit_stable_standard <- function(Pop_fit, OPAG <- function(Pop, Age_Pop, - AgeInt_Pop, - nLx, - Age_nLx, - AgeInt_nLx = NULL, + nLx, + Age_nLx, Age_fit = NULL, AgeInt_fit = NULL, Redistribute_from = max(Age_Pop), OAnew = max(Age_nLx), - method = "uniform", - continuous = TRUE){ - + method = "mono" +){ + # ensure OAnew is possible stopifnot(OAnew <= max(Age_nLx)) @@ -450,21 +410,38 @@ OPAG <- function(Pop, if(!identical(as.integer(unique(diff(Age_Pop))), as.integer(unique(diff(Age_nLx))))){ # put a different cat("\nAge_Pop and Age_nLx age intervals are different!\n") } - # what if one age vec is single and the other isn't? we should warn, but continue. - # is_single(Age_Pop) - # is_abridged(Age_Pop) + + # PJ adds this. Note final age group not assigned a width + AgeInt_Pop <- diff(Age_Pop) + # AgeInt_nLx <- diff(Age_Pop) # setup, prelims: # 0) if Age_fit isn't given assume last two 10-year age groups. + if (is.null(Age_fit)){ OA <- max(Age_Pop) Age_fit <- OA - c(20,10) AgeInt_fit <- c(10,10) stopifnot(Age_fit %in% Age_Pop) } - + if (is.null(AgeInt_fit)){ + # assume age intervals are age differences, and repeat last one + AgeInt_fit <- diff(Age_fit) + AgeInt_fit <- c(AgeInt_fit, tail(AgeInt_fit, n=1)) + # if Age_fit includes pop OA then set last fit age int to Inf + if (tail(Age_fit,1) == tail(Age_Pop,1)) { + AgeInt_fit[length(AgeInt_fit)] <- Inf + } + } + if (any(!Age_fit %in% Age_Pop)){ + ind <- Age_fit %in% Age_Pop + Age_fit <- Age_fit[ind] + AgeInt_fit <- AgeInt_fit[ind] + stopifnot(length(Age_fit) > 1) + } + # 1) get Pop_fit - + # TR: note: this calls for a special age utility function I think # earmarking this code chunk to swap it out in the future. Pop_fit <- rep(NA, length(Age_fit)) @@ -472,47 +449,55 @@ OPAG <- function(Pop, ind <- Age_Pop >= Age_fit[i] & Age_Pop < (Age_fit[i] + AgeInt_fit[i]) Pop_fit[i] <- sum(Pop[ind]) } - - # 2) get the standard + + # 2) make sure Lx is single ages + Lx1 <- graduate(nLx, Age_nLx, method = method, constrain = TRUE) + Age_Lx1 <- as.integer(names(Lx1)) + Stab_stand <- OPAG_fit_stable_standard(Pop_fit, Age_fit, AgeInt_fit, - nLx, - Age_nLx, - AgeInt_nLx, - method = method, - continuous = continuous) + Lx1, + Age_Lx1 + ) StPop <- Stab_stand$Standard - + # 3) get total to redistribute: OAG_total <- sum(Pop[Age_Pop >= Redistribute_from]) - + # 4) select standard in those age groups. - StPop_sel <- StPop[Age_nLx >= Redistribute_from] + StPop_sel <- StPop[Age_Lx1 >= Redistribute_from] StPop_sel <- rescale_vector(StPop_sel, scale = 1) - + # 5) redistribute Pop_redistributed <- StPop_sel * OAG_total - + + # 5a) regroup into original pop age grouping + # TR: possibly not the bes tuse of AgeInt_Pop.. + if (tail(AgeInt_Pop, n=2)[-1] == 5) { + Pop_redistributed <- groupAges(Pop_redistributed, N = 5) + } + # 6) graft together Pop_grafted <- c(Pop[Age_Pop < Redistribute_from], Pop_redistributed) Age_grafted <- c(Age_Pop[Age_Pop < Redistribute_from], Age_nLx[Age_nLx >= Redistribute_from]) - + + names(Pop_grafted) <- Age_grafted # 7) potentially group down OAG Pop_out <- groupOAG(Value = Pop_grafted, Age = Age_grafted, OAnew = OAnew) Age_out <- names2age(Pop_out) - + # 8) compose list for output out <- list( - Pop_out = Pop_out, - Age_out = Age_out, - Pop_in = Pop, - Standard = StPop, - r_opt = Stab_stand$r_opt) - + Pop_out = Pop_out, + Age_out = Age_out, + Pop_in = Pop, + Standard = StPop, + r_opt = Stab_stand$r_opt) + out } diff --git a/R/basepop.R b/R/basepop.R index 46d1e27fd..aa5078522 100644 --- a/R/basepop.R +++ b/R/basepop.R @@ -141,8 +141,8 @@ #' * `nLxm` numeric matrix of male `nLx`, abridged ages in rows and (potentially interpolated) time in columns. Potentially downloaded. #' * `Asfr` numeric matrix of age specific fertility in 5-year age groups ages 15-19 until 45-49 in rows, and (potentially interpolated) time in columns. Potentially downloaded. #' * `Exposure_female` numeric matrix of approximated age-specific exposure in 5-year age groups ages 15-19 until 45-49 in rows, and (potentially interpolated) time in columns. -#' * `Bt` births at three time points prior to census corrsponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9. -#' * `SRB` sex ratio at birth at three time points prior to census corrsponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9. Potentially downloaded. +#' * `Bt` births at three time points prior to census corresponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9. +#' * `SRB` sex ratio at birth at three time points prior to census corresponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9. Potentially downloaded. #' * `Age` age groups of the input population counts. #' # #' `basepop_single` is used, the return value is a numeric vector with diff --git a/R/check_heaping.R b/R/check_heaping.R index 5fa8949d1..9f80e5eae 100644 --- a/R/check_heaping.R +++ b/R/check_heaping.R @@ -504,7 +504,7 @@ check_heaping_spoorenberg <- function(Value, #' @param Agei integer. The age on which the index is centered. #' @param pow either \code{"exp"} (default) or a power such as 2. See details #' -#' @details The index looks down two ages and up two ages, so the data must accommodate that range. The denominator is a mean of the counts in the sourrounding 5 single ages. The kind of mean can be controlled with the \code{pow} argument. By default, this takes the antilog of the arithmetic mean of the natural log of the five denominator counts. That will fail if one of the counts is equal to 0. In such cases, another power, such as 2 or 10 or 100 may be used, which is more robust to 0s. The higher the power, the closer the result will resemble the default output. If \code{pow=="exp"} but a 0 is detected among the denominator ages, then \code{pow} is assigned a value of 1000. \code{pow=1} would imply an arithmetic mean in the denominator. +#' @details The index looks down two ages and up two ages, so the data must accommodate that range. The denominator is a mean of the counts in the surrounding 5 single ages. The kind of mean can be controlled with the \code{pow} argument. By default, this takes the antilog of the arithmetic mean of the natural log of the five denominator counts. That will fail if one of the counts is equal to 0. In such cases, another power, such as 2 or 10 or 100 may be used, which is more robust to 0s. The higher the power, the closer the result will resemble the default output. If \code{pow=="exp"} but a 0 is detected among the denominator ages, then \code{pow} is assigned a value of 1000. \code{pow=1} would imply an arithmetic mean in the denominator. #' @return The value of the index. #' #' @references @@ -679,13 +679,13 @@ heapify <- function(Value, #' Detect if heaping is worse on terminal digits 0s than on 5s #' -#' @description Ages ending in 0 often have higher apparent heaping than ages ending in 5. In this case, data in 5-year age bins might show a sawtooth pattern. If heaping ocurrs in roughly the same amount on 0s and 5s, then it may be sufficient to group data into 5-year age groups and then graduate back to single ages. However, if heaping is worse on 0s, then this procedure tends to produce a wavy pattern in count data, with 10-year periodicity. In this case it is recommended to use one of the methods of \code{smooth_age_5()} as an intermediate step before graduation. +#' @description Ages ending in 0 often have higher apparent heaping than ages ending in 5. In this case, data in 5-year age bins might show a sawtooth pattern. If heaping occurs in roughly the same amount on 0s and 5s, then it may be sufficient to group data into 5-year age groups and then graduate back to single ages. However, if heaping is worse on 0s, then this procedure tends to produce a wavy pattern in count data, with 10-year periodicity. In this case it is recommended to use one of the methods of \code{smooth_age_5()} as an intermediate step before graduation. #' #' @details Data is grouped to 5-year age bins. The ratio of each value to the average of its neighboring values is calculated. If 0s have stronger attraction than 5s then we expect these ratios to be >1 for 0s and <1 for 5s. Ratios are compared within each 10-year age group in the evaluated age range. If in the evaluated range there are at most two exceptions to this rule (0s>5s), then the ratio of the mean of these ratios is returned, and it is recommended to use a smoother method. Higher values suggest use of a more aggressive method. This approach is only slightly different from that of Feeney, as implemented in the \code{smooth_age_5_zigzag_inner()} functions. This is not a general measure of roughness, but rather an indicator of this particular pattern of age attraction. #' @export #' @inheritParams heapify #' @param ageMin integer evenly divisible by 10. Lower bound of evaluated age range, default 40. -#' @param ageMax integer evently divisibly by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10. +#' @param ageMax integer evenly divisible by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10. #' @return numeric, ratio of 0s to 5s. If > 1 then the pattern is present. #' @references #' \insertRef{feeney1979}{DemoTools} @@ -762,7 +762,7 @@ check_heaping_sawtooth <- } #' Evaluate roughness of data in 5-year age groups -#' @description For a given age-structured vector of counts, how rough is data after grouping to 5-year age bins? Data may require smoothing even if there is no detectable sawtooth pattern. It is best to use the value in this method together with visual evidence to gauage whether use of \code{smooth_age_5()} is recommended. +#' @description For a given age-structured vector of counts, how rough is data after grouping to 5-year age bins? Data may require smoothing even if there is no detectable sawtooth pattern. It is best to use the value in this method together with visual evidence to gauge whether use of \code{smooth_age_5()} is recommended. #' @details First we group data to 5-year age bins. Then we take first differences (d1) of these within the evaluated age range. Then we smooth first differences (d1s) using a generic smoother (\code{ogive()}). Roughness is defined as the mean of the absolute differences between \code{mean(abs(d1 - d1s) / abs(d1s))}. Higher values indicate rougher data, and may suggest more aggressive smoothing. Just eyeballing, one could consider smoothing if the returned value is greater than ca 0.2, and values greater than 0.5 already highly recommend it (pending visual verification). #' @export #' @inheritParams check_heaping_sawtooth diff --git a/R/data.R b/R/data.R index 6717eb73e..5bfbf3126 100644 --- a/R/data.R +++ b/R/data.R @@ -21,9 +21,9 @@ -#' Indian male population 1991 +#' Indian male population 1971 #' -#' Indian male population 1991 +#' Indian male population 1971 #' @docType data #' @format #' A numeric vector of length 101 @@ -200,7 +200,7 @@ #' A data frame with: #' \describe{ #' \item{Date}{Reference time for the rates estimate.} -#' \item{Age}{Inferior age for abridged groups. Carefull: last age 100 is not an OAG} +#' \item{Age}{Inferior age for abridged groups. Careful: last age 100 is not an OAG} #' \item{Sex}{Male \code{m} and female \code{m}.} #' \item{nMx}{Mortality rates.} #' } @@ -230,7 +230,7 @@ #' A matrix of dimensions 21 x 21 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "pop_m_mat_five" #' Population matrix for females five year age groups between 1950 and 2050 @@ -242,7 +242,7 @@ #' A matrix of dimensions 21 x 21 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "pop_f_mat_five" #' Survival rates matrix for males five year age groups between 1950 and 2045 @@ -254,7 +254,7 @@ #' A matrix of dimensions 21 x 20 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "sr_m_mat_five" #' Survival rates matrix for females five year age groups between 1950 and 2045 @@ -266,7 +266,7 @@ #' A matrix of dimensions 21 x 20 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "sr_f_mat_five" #' Age-specific fertility rates for age groups 15 to 45 between 1950 and 2045 @@ -278,7 +278,7 @@ #' A matrix of dimensions 7 x 20 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "asfr_mat_five" #' Sex ratio at birth between 1950 and 2045 @@ -289,7 +289,7 @@ #' A vector of length 20 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "srb_vec_five" #' Ages between 0 and 100 abridged in five year age groups @@ -301,7 +301,7 @@ #' A vector of length 21 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "ages_five" #' Ages between 15 and 45 in five year age groups @@ -313,7 +313,7 @@ #' A vector of length 7 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "ages_asfr_five" #' Population matrix for males single ages between 1999 and 2019 @@ -325,7 +325,7 @@ #' A matrix of dimensions 101 x 21 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "pop_m_mat_single" #' Population matrix for females single ages between 1999 and 2019 @@ -337,7 +337,7 @@ #' A matrix of dimensions 101 x 21 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "pop_f_mat_single" #' Survival rates matrix for males single ages between 1999 and 2019 @@ -350,7 +350,7 @@ #' A matrix of dimensions 101 x 20 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "sr_m_mat_single" #' Survival rates matrix for females single ages between 1999 and 2019 @@ -363,7 +363,7 @@ #' A matrix of dimensions 101 x 20 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "sr_f_mat_single" #' Age-specific fertility rates for single ages 15 to 49 between 1999 and 2018 @@ -376,7 +376,7 @@ #' A matrix of dimensions 35 x 20 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "asfr_mat_single" #' Sex ratio at birth between 1999 and 2019 @@ -388,7 +388,7 @@ #' A vector of length 20 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "srb_vec_single" #' Single ages between 0 and 100 @@ -399,7 +399,7 @@ #' A vector of length 101 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "ages_single" #' Single ages between 15 and 49 @@ -410,7 +410,7 @@ #' A vector of length 36 #' #' @source -#' Migration residual PAS spreadhseet +#' Migration residual PAS spreadsheet "ages_asfr_single" #' Parameters for considered migration profiles diff --git a/R/extra_mortality.R b/R/extra_mortality.R index 6e8ed4d4d..9f374178e 100644 --- a/R/extra_mortality.R +++ b/R/extra_mortality.R @@ -26,12 +26,13 @@ #' @param opt.method character. Default `"LF2"`, see `MortalityLaws::MortalityLaw` for a description of choices. #' @param ... Other arguments to be passed on to the #' \code{\link[MortalityLaws]{MortalityLaw}} function. +#' @details If fitting fails to converge, then we refit assuming Gompertz mortality with explicit starting parameters of `parS = c(A = 0.005, B = 0.13)` and a warning is issued. #' @seealso #' \code{\link[MortalityLaws]{MortalityLaw}} #' \code{\link[MortalityLaws]{predict.MortalityLaw}} #' @return An object of class \code{lt_rule_m_extrapolate} with the following components: #' \item{input}{List with arguments provided in input. Saved for convenience.} -#' \item{call}{An unevaluated function call, that is, an unevaluated expressionwhich consists of the named function applied to the given arguments.} +#' \item{call}{An unevaluated function call, that is, an unevaluated expression that consists of the named function applied to the given arguments.} #' \item{fitted.model}{An object of class \code{\link[MortalityLaws]{MortalityLaw}}. Here one can find fitted values, residuals, goodness of fit measures etc.} #' \item{values}{A vector or matrix containing the complete mortality data, that is the modified input data following the extrapolation procedure.} #' @@ -53,7 +54,8 @@ #' f2 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "kannisto_makeham") #' f3 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "gompertz") #' f4 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "ggompertz") -#' f5 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "makeham") +#' # makeham falls back to gompertz for this data +#' suppressWarnings(f5 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "makeham")) #' f6 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "beard") #' f7 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "beard_makeham") #' f8 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "quadratic") @@ -125,17 +127,7 @@ #' lty = c(NA, NA, 1:8), pch = c(16, 16, rep(NA, 8)), #' col = c(1, 4, 2:9), lwd = 2, pt.cex = 2) #'} -#' # ---------------------------------------------- -#' # Example 3 - Extrapolate mortality for multiple years at once -#' -#' # Create some data -#' mx_matrix <- matrix(rep(mx1, 3), ncol = 3) %*% diag(c(1, 1.05, 1.1)) -#' dimnames(mx_matrix) <- list(age = x1, year = c("year1", "year2", "year3")) -#' -#' F1 <- lt_rule_m_extrapolate(mx_matrix, x = x1, x_fit, x_extr, law = "kannisto") -#' F1 -#' ls(F1) -#' coef(F1) + #' @author Marius D. Pascariu #' @export lt_rule_m_extrapolate <- function(mx, @@ -145,7 +137,10 @@ lt_rule_m_extrapolate <- function(mx, law = "kannisto", opt.method = "LF2", ...) { - + dm <- dim(mx) + if (length(dm) == 1 | (length(mx) == 2 & any(dm == 1))){ + mx <- c(mx) + } all_the_laws_we_care_about <- c("kannisto", "kannisto_makeham", "makeham", @@ -171,10 +166,36 @@ lt_rule_m_extrapolate <- function(mx, opt.method = opt.method, ... ) - + + if (!is.null(M$opt.diagnosis)){ + if( M$opt.diagnosis$convergence != 0){ + warning("Extrapolation failed to converge\nFalling back to Gompertz with starting parameters:\n parS = c(A = 0.005, B = 0.13))", + immediate. = TRUE) + + parS <- c(A = 0.005, B = 0.13) + law <- "gompertz" + M <- MortalityLaw( + x = x, + mx = mx, + fit.this.x = x_fit, + law = law, + parS = parS, + ...) + } + } + + # TR: this will fail if a matrix is given where we only x_extr for 1 age. + chop <- FALSE + if (length(x_extr) == 1){ + chop <- TRUE + x_extr <- c(x_extr, x_extr + 1) + } pv <- predict(object = M, x = x_extr) - + if (chop){ + pv <- pv[1] + x_extr <- x_extr[1] + } # which ages are not to be replaced with fitted values? L <- !(x %in% x_extr) diff --git a/R/graduate.R b/R/graduate.R index b5da87b99..cc95772c0 100644 --- a/R/graduate.R +++ b/R/graduate.R @@ -11,7 +11,7 @@ #' #' @return Numeric vector of counts for single year age groups. #' -#' @details Assumes that the population is uniformly distributed across each age interval, and that initial age intervals are integers greater than or equal to 1. If \code{AgeInt} is given, its final value is used as the interval for the final age group. If \code{AgeInt} is missing, then \code{Age} must be given, and the open age group is by default preserved \code{OAvalue} rather than split. To instead split the final age group into, e.g., a 5-year age class, either give \code{AgeInt}, *or* give \code{Age}, \code{OAG = TRUE}, and \code{OAvalue = 5}. +#' @details Assumes that the population is uniformly distributed across each age interval, and that initial age intervals are integers greater than or equal to 1. If \code{AgeInt} is given, its final value is used as the interval for the final age group. If \code{AgeInt} is missing, then \code{Age} must be given, and the open age group is by default preserved \code{OAvalue} rather than split. To instead split the final age group into, e.g., a 5-year age class, either give \code{AgeInt}, *or* give \code{Age}, \code{OAG = TRUE}, and \code{OAvalue = 5}. `Age` be any age range, it does not need to start at 0. #' #' @export #' @examples @@ -52,7 +52,7 @@ graduate_uniform <- #' @description This method is used to interpolate counts based on the Sprague formula. It is based on the first stage of the Sprague R script prepared by Thomas Buettner and Patrick Gerland, itself based on the description in Siegel and Swanson, 2004, p. 727. #' #' @inheritParams graduate -#' @details Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age). Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as or coercible to a single-column matrix. This method may produce negative values, most likely in the youngest or oldest ages. This case is dealt with in the \code{graduate()} wrapper function but not in this function. +#' @details Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age). Dimension labeling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as or coercible to a single-column matrix. This method may produce negative values, most likely in the youngest or oldest ages. This case is dealt with in the \code{graduate()} wrapper function but not in this function. #' #' If the highest age does not end in a 0 or 5, and \code{OAG == TRUE}, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and \code{OAG == FALSE}, then results extend to single ages covering the entire 5-year age group. #' @@ -203,6 +203,8 @@ graduate_sprague_expand <- function( # get the split coefficients # block for ages 0-9 + + # TR: 5-5-2021, this assumes ages start at 0... g1g2 <- matrix( c( 0.3616, -0.2768, 0.1488, -0.0336, 0.0000, 0.2640, -0.0960, 0.0400, @@ -387,7 +389,7 @@ graduate_grabill_expand <- function(Value, Age, OAG = TRUE) { #' Sprague estimated single-age population counts for the first and final ten ages. Open age groups are preserved, as are annual totals. #' #' @inheritParams graduate -#' @details Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as a single-column matrix. Data may be given in either single or grouped ages. If the highest age does not end in a 0 or 5, and \code{OAG == TRUE}, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and \code{OAG == FALSE}, then results extend to single ages covering the entire 5-year age group. +#' @details Dimension labeling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as a single-column matrix. Data may be given in either single or grouped ages. If the highest age does not end in a 0 or 5, and \code{OAG == TRUE}, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and \code{OAG == FALSE}, then results extend to single ages covering the entire 5-year age group. #' #' @return numeric vector in single ages. #' @@ -716,7 +718,7 @@ graduate_beers_expand <- function(Value, #' @inheritParams graduate #' @param method character. Valid values are `"ord"` or `"mod"`. Default `"ord"`. #' @param johnson logical. Whether or not to adjust young ages according to the \href{https://www.census.gov/data/software/dapps.html}{Demographic Analysis & Population Projection System Software} method. Default `FALSE.` -#' @details Ages should refer to lower age bounds. `Value` must be labelled with ages unless `Age` is given separately. There must be at least six 5-year age groups (including the open group, 5 otherwise). If you want the `johnson` adjustment then `Value` must contain a single-year estimate of the population count in age 0. That means `Value` must come either as standard abridged or single age data. +#' @details Ages should refer to lower age bounds. `Value` must be labeled with ages unless `Age` is given separately. There must be at least six 5-year age groups (including the open group, 5 otherwise). If you want the `johnson` adjustment then `Value` must contain a single-year estimate of the population count in age 0. That means `Value` must come either as standard abridged or single age data. #' #' `method` option `"ord"` conserves sums in 5-year age groups, whereas `"mod"` does some smoothing between 5-year age groups too, and is not constrained. #' @@ -893,6 +895,8 @@ graduate_beers_johnson <- function(Age0, pop5, pop1) { #' @details The PCLM method can also be used to graduate rates using an offset if both numerators and denominators are available. In this case \code{Value} is the event count and \code{offset} is person years of exposure. The denominator must match the length of \code{Value} or else the length of the final single age result \code{length(min(Age):OAnew)}. This method can be used to redistribute counts in the open age group if \code{OAnew} gives sufficient space. Likewise, it can give a rate extrapolation beyond the open age. #' #' If there are 0s in `Value`, these are replaced with a small value prior to fitting. If negatives result from the pclm fit, we retry after multiplying `Value` by 10, 100, or 1000, as sometimes a temporary rescale for fitting can help performance. +#' +#' `Age` be any age range, it does not need to start at 0. #' #' @inheritParams graduate #' @param ... further arguments passed to \code{ungroup::pclm()} @@ -1004,7 +1008,7 @@ graduate_pclm <- function(Value, Age, AgeInt, OAnew = max(Age), OAG = TRUE, ...) #' Graduate age groups using a monotonic spline. #' @description Take the cumulative sum of \code{Value} and then run a monotonic spline through it. The first differences split back single-age estimates of \code{Value}. Optionally keep the open age group untouched. #' -#' @details The \code{"hyman"} method of \code{stats::splinefun()} is used to fit the spline because 1) it passes exactly through the points, 2) it is monotonic and therefore guarantees positive counts, and 3) it seems to be a bit less wiggly (lower average first differences of split counts). Single-age data is returned as-is. If you want to use this function as a smoother you first need to group to non-single ages. +#' @details The \code{"hyman"} method of \code{stats::splinefun()} is used to fit the spline because 1) it passes exactly through the points, 2) it is monotonic and therefore guarantees positive counts, and 3) it seems to be a bit less wiggly (lower average first differences of split counts). Single-age data is returned as-is. If you want to use this function as a smoother you first need to group to non-single ages. `Age` be any age range, it does not need to start at 0. #' @inheritParams graduate #' @return Numeric. vector of single smoothed age counts. #' @importFrom stats splinefun @@ -1069,9 +1073,9 @@ graduate_mono <- function( # if the final age is Open, then we should remove it and then # stick it back on - AgePred <- c(min(Age), cumsum(AgeInt)) + AgePred <- c(min(Age), cumsum(AgeInt) + min(Age)) y <- c(0, cumsum(Value)) - AgeS <- min(Age):sum(AgeInt) + AgeS <- min(Age):(sum(AgeInt)+ min(Age)) # TR: changed from monoH.FC to hyman 3.3.2021 y1 <- splinefun(y ~ AgePred, method = "hyman")(AgeS) out <- diff(y1) @@ -1119,7 +1123,7 @@ graduate_mono <- function( #' # one may wish to instead rescale results colSums() of #' # popg at age pivotAge and higher. #' sum(grabill.closed.out) - sum(popvec) -#' # also works on an age-labelled vector of data +#' # also works on an age-labeled vector of data #' closed.vec <- graduate_mono_closeout(popvec, Age = a5, OAG = TRUE) #' # let's compare this one with sprague() @@ -1218,7 +1222,7 @@ graduate_mono_closeout <- #' #' \code{OAnew} cannot be higher than \code{max(Age)+4} for \code{"sprague"} or \code{"beers"} methods. For \code{"uniform","mono","pclm"} it can be higher than this, and in each case the open age group is completely redistributed within this range, meaning it's not really open anymore. #' -#' For all methods, negative values are detected in output. If present, we deal with these in the following way: we take the geometric mean between the given output (with negative imputed with 0s) and the output of \code{graduate_mono()}, which is guaranteed non-negative. This only affects age groups where negatives were produced in the first pass. In our experience this only arises when using sprague, beers, or grabill methods, whereas all others are guarateed non-negative. +#' For all methods, negative values are detected in output. If present, we deal with these in the following way: we take the geometric mean between the given output (with negative imputed with 0s) and the output of \code{graduate_mono()}, which is guaranteed non-negative. This only affects age groups where negatives were produced in the first pass. In our experience this only arises when using Sprague, Beers, or Grabill methods, whereas all others are guaranteed non-negative. #' #' For any case where input data are in single ages, constraining results to sum to values in the original age groups will simply return the original input data, which is clearly not your intent. This might arise when using graduation as an implicit two-step smoother (group + graduate). In this case, separate the steps, first group using \code{groupAges()} then use \code{graduate(..., constrain = TRUE)}. #' @@ -1420,7 +1424,7 @@ graduate <- function(Value, } n <- length(out) - a1 <- min(Age):(n - 1) + a1 <- min(Age):(min(Age) + n - 1) # detect negatives. Have default option to replace. # Favor quick over perfect, since this only can arise diff --git a/R/interp_coh.R b/R/interp_coh.R index b23df2a76..78fd02c4f 100644 --- a/R/interp_coh.R +++ b/R/interp_coh.R @@ -27,34 +27,34 @@ shift_census_ages_to_cohorts <- function(pop, date, censusYearOpt = "frac", OAG = TRUE){ - - + + stopifnot(is_single(age)) - + date <- dec.date(date) yr <- floor(date) f1 <- date - yr - + if (OAG){ N <- length(pop) pop <- pop[-N] age <- age[-N] } - + if (is.na(censusYearOpt)){ censusYearOpt <- "NA" } - + upper_part_of_cohort <- pop * f1 lower_part_of_cohort <- pop * (1 - f1) - + shift <- ceiling(f1) pop_out <- shift.vector(lower_part_of_cohort,shift) + upper_part_of_cohort - + cohorts <- yr - age - 1 + shift - + age_out <- round(f1) + age - + if (censusYearOpt == "drop"){ pop_out <- pop_out[-1] age_out <- age_out[-1] @@ -68,7 +68,7 @@ shift_census_ages_to_cohorts <- function(pop, if (censusYearOpt == "NA"){ pop_out[1] <- NA_real_ } - + list(cohort_size = pop_out, birth_year = cohorts, age = age_out, @@ -76,8 +76,8 @@ shift_census_ages_to_cohorts <- function(pop, f1 = f1) } -#' component-free intercensal cohort interpolation -#' @description Cohorts between two censuses are interpolated flexibly using linear, exponential, or power rules. The lower and upper intercensal triangles are filled using within-age interpolation. This function is experimental and still in development. +#' Cohort component intercensal interpolation +#' @description Cohorts between two censuses are interpolated using a cohort component approach. #' @seealso interp #' @param c1 numeric vector. The first (left) census in single age groups #' @param c2 numeric vector. The second (right) census in single age groups @@ -96,6 +96,7 @@ shift_census_ages_to_cohorts <- function(pop, #' @param midyear logical. `FALSE` means all Jan 1 dates between `date1` and `date2` are returned. `TRUE` means all July 1 intercensal dates are returned. #' @param verbose logical. Shall we send informative messages to the console? #' @param ... optional arguments passed to +#' @details The basic approach is to i) align the censuses to single-year cohorts by blending adjacent ages assuming that the birthdays in each age group are uniformly distributed through the year ii) decrement the first census forward within cohorts using period-cohort survival probabilities calculated from (supplied or downloaded) `l(x)` values, iii) redistribute the residual at the time of the second census uniformly over time within cohorts. These steps are always done on Jan 1 reference dates. If `midyear = TRUE`, then we do within-age band arithmetic interpolation to July 1 reference dates. #' @export #' @importFrom data.table := as.data.table melt data.table dcast between #' @examples @@ -113,29 +114,29 @@ shift_census_ages_to_cohorts <- function(pop, #' ) #' } interp_coh <- function( - c1, - c2, - date1, - date2, - age1 = 1:length(c1) - 1, - age2 = 1:length(c2) - 1, - dates_out = NULL, - lxMat = NULL, - age_lx = NULL, - dates_lx = NULL, - births = NULL, - years_births = NULL, - location = NULL, - sex = "both", - midyear = FALSE, - verbose = TRUE, - ... - ) { - + c1, + c2, + date1, + date2, + age1 = 1:length(c1) - 1, + age2 = 1:length(c2) - 1, + dates_out = NULL, + lxMat = NULL, + age_lx = NULL, + dates_lx = NULL, + births = NULL, + years_births = NULL, + location = NULL, + sex = "both", + midyear = FALSE, + verbose = TRUE, + ... +) { + # convert the dates into decimal numbers date1 <- dec.date(date1) date2 <- dec.date(date2) - + res_list <- rup( c1 = c1, c2 = c2, @@ -155,78 +156,78 @@ interp_coh <- function( verbose = verbose, ... = ... ) - + pop_jan1 <- res_list$pop_jan1 dates_out <- res_list$dates_out - + . <- NULL age <- NULL discount <- NULL pop_jan1_pre <- NULL resid <- NULL year <- NULL - + # add "cumulative" residual to the RUP (pop_jan1_pre) - pop_jan1[, `:=`(pop_jan1 = pop_jan1_pre + resid * discount)] + pop_jan1[, `:=`(pop_jan1 = pop_jan1 + resid * discount)] pop_jan1 <- pop_jan1[!is.na(cohort)] - + # TR: to get residualmigbeta prelim result, one takes the cumulative # resid (resid * discount), then decumulates it (within cohorts!), # then sum over age. boo ya Lexis - + PopAP <- pop_jan1 %>% .[, list(age, year, pop_jan1)] %>% data.table::dcast(age ~ year, value.var = "pop_jan1") %>% .[order(age)] - - + + matinterp <- PopAP[age <= max(age1), -1] %>% as.matrix() rownames(matinterp) <- age1 - + # Handle NAs perhaps c1 needs OPAG beforehand?) ind <- is.na(matinterp) if (any(ind) & verbose){ cat("\n",sum(ind),"NA detected in output.\nThese have been imputed with 0s.\nThis could happen in the highest ages,\nand you may consider extending the open ages of the census inputs?\n") matinterp[ind] <- 0 } - + # Handle negatives (small pops, or large negative residuals relative to pop size) ind <- matinterp < 0 if (any(ind) & verbose){ cat("\n",sum(ind),"negatives detected in output.\nThese have been imputed with 0s.\n") matinterp[ind] <- 0 } - + yrsIn <- as.numeric(colnames(matinterp)) if (all(yrsIn > date1)){ matinterp <- cbind(c1, matinterp) yrsIn <- c(date1, yrsIn) } - + if (all(yrsIn < date2)){ matinterp <- cbind(matinterp, c2[1:length(c2)]) yrsIn <- c(yrsIn, date2) } - + colnames(matinterp) <- yrsIn # now we either return Jan1 dates or July 1 dates. - + out <- interp( matinterp, datesIn = yrsIn, datesOut = as.numeric(dates_out), rule = 1 ) - + if (any(out < 0)) { if (verbose) { cat("\nSome of the interpolated values resulted to be negative, replacing with zeroes\n") #nolintr } - + out[out < 0] <- 0 } - + out } @@ -629,6 +630,8 @@ lt_a2s_chunk <- function(chunk, OAnew, ...){ # dplyr::bind_cols() %>% # as.matrix()) +# ZZZ FLAG for possible depreation + interp_coh_lxMat_pxt <- function(lxMat, dates_lx, age_lx, @@ -641,23 +644,23 @@ interp_coh_lxMat_pxt <- function(lxMat, # fixed. date1 <- dec.date(date1) date2 <- dec.date(date2) - + year1 <- floor(date1) + 1 year2 <- floor(date2) - + year_seq <- year1:year2 - + dates_out <- c(dec.date(date1), year_seq) - + # get ndx andnLx from lt_abridged() - + a1 <- 0:OAnew qx1 <- matrix(ncol = ncol(lxMat), nrow = length(a1), dimnames = list(a1, dates_lx)) for (i in 1:ncol(lxMat)){ - + if (is_abridged(age_lx)){ # LTA <- lt_abridged(Age = age_lx, # lx = lxMat[, i], @@ -671,46 +674,52 @@ interp_coh_lxMat_pxt <- function(lxMat, qx1[, i] <- LT1$nqx } else { qx <- lt_id_l_q(lxMat[, i]) - + LT1 <- lt_single_qx(nqx = qx, Age=1:length(qx)-1, OAnew = OAnew, ...) - - + + qx1[, i] <- LT1$nqx } - + } - + # We do linear interpolation of the logit-transformed qx. logit_qx <- log(qx1 / (1 - qx1)) - + logit_qx_interp <- interp( popmat = logit_qx, datesIn = dates_lx, datesOut = dates_out, - rule = 2) + rule = 2, + negatives = TRUE) # transform back QX <- exp(logit_qx_interp) / (1 + exp(logit_qx_interp)) - QX[nrow(QX), ] <- 1 - - + f1 <- diff(dates_out)[1] f2 <- date2 - floor(date2) - + + # get Sx (keep PX name) + PX <- apply(QX,2,function(q){lt_single_qx(nqx = q, + Age=a1, + OAnew = OAnew, + ...)$Sx}) + rownames(PX) <- rownames(QX) # assume linear px change within age class - PX <- 1 - QX + # PX <- 1 - QX + # IW: PX^f1 is not bad. Other option is PX=(1-QX*f). One assumes linear on d_x, the other constant mu_x. PX[,1] <- PX[, 1] ^f1 PX[,ncol(PX)] <- PX[, ncol(PX)] ^f2 - - + + PX } - +### ZZZ FLAG for redux and/or deprecation / name change transform_pxt <- function(lxMat, location, sex, @@ -721,20 +730,22 @@ transform_pxt <- function(lxMat, age_lx, age1, ...) { - + # get the lexis surface of survival probabilities if (is.null(lxMat)){ - pxt <- suppressMessages( + # ZZZ Note this already returns Sx. that was an easy fix. + pxt <- suppressMessages( interp_coh_download_mortality(location = location, sex = sex, date1 = date1, date2 = date2, - OAnew = max(age1) + 1, + OAnew = max(age1), verbose = verbose) ) } else { - + + ### FLAG This all needs to change. if (is.null(dates_lx)){ # if lx dates not given we assume dates evenly distributed from date1 to date2? dates_lx <- seq(date1,date2,length.out = ncol(lxMat)) @@ -742,10 +753,10 @@ transform_pxt <- function(lxMat, cat("lxMat specified, but not dates_lx\nAssuming:",paste(dates_lx,collapse=", "),"\n") } } - + available_dates <- data.table::between(dates_lx, date1, date2) if (!all(available_dates)) stop("All `dates_lx` must be within the range of `date1` and `date2`") - + # if the shortest distance from dates_lx to date1 or date2 is greater than 7 # warn dates_df <- expand.grid(dates_lx = dates_lx, dates = c(date1, date2)) @@ -761,20 +772,20 @@ transform_pxt <- function(lxMat, ") is greater than 7 years. Be wary." ) } - - ic_period <- date2 - date1 - lx_mm <- range(dates_lx) - overlap <- min(c(lx_mm[2], date2)) - c(max(lx_mm[1], date1)) - extrap_low <- lx_mm[1] - min(lx_mm[1],date1) + + ic_period <- date2 - date1 + lx_mm <- range(dates_lx) + overlap <- min(c(lx_mm[2], date2)) - c(max(lx_mm[1], date1)) + extrap_low <- lx_mm[1] - min(lx_mm[1],date1) extrap_high <- max(lx_mm[2],date2) - lx_mm[2] t1 <- overlap / ic_period < .25 t2 <- extrap_low > 6 t3 <- extrap_high > 6 if (any(c(t1, t2, t3))) cat("\nRange between `date1` and `date2` must overlap with `lx_dates` for at least 25% of the range or 6 years.\n") - + if (is.null(age_lx)){ if (nrow(lxMat) < 26){ - + N <- nrow(lxMat) age_lx <- c(0,1,seq(5,5*(N-2),by=5)) } else { @@ -784,7 +795,7 @@ transform_pxt <- function(lxMat, cat("lxMat specified, but Age_lx missing\nAssuming:",paste(age_lx,collapse=", "),"\n") } } - + # ensure lx fills timepoints. # would like to pass ... here for the lifetable part pxt <- interp_coh_lxMat_pxt( @@ -793,11 +804,11 @@ transform_pxt <- function(lxMat, age_lx = age_lx, date1 = date1, date2 = date2, - OAnew = max(age1) + 1, + OAnew = max(age1), control = list(deg = 3, lambda = 100), ...) } - + pxt } @@ -807,12 +818,12 @@ check_args <- function(lxMat, births, location, age1, age2, c1, c2, verbose) { stopifnot(length(age2) == length(c2)) stopifnot(is_single(age1)) stopifnot(is_single(age2)) - + if (length(age1) != length(age2) & verbose){ cat("\nFYI: age ranges are different for c1 and c2\nWe'll still get intercensal estimates,\nbut returned data will be chopped off after age", max(age1), "\n") } - - + + # If lxMat or births are missing -- message requiring location and sex if (is.null(lxMat) & is.null(location)) { stop("lxMat not specified, please specify location and sex\n") @@ -820,22 +831,22 @@ check_args <- function(lxMat, births, location, age1, age2, c1, c2, verbose) { if (is.null(births) & is.null(location)) { stop("births not specified, please specify location and sex\n") } - + if (!is.null(lxMat) && ncol(lxMat) == 1) { stop("lxMat should have at least two or more dates as columns. lxMat contains only one column") #nolintr } - + if (any(c1 < 0)) stop("No negative values allowed in `c1`") if (any(c2 < 0)) stop("No negative values allowed in `c2`") if (any(lxMat < 0)) stop("No negative values allowed in `lxMat`") - + } # If dates_out not given, then we resolve using the midyear argument. # If FALSE (default) we return intermediate Jan 1, not including c1 and c2 # If TRUE we return intermediate July 1 (.5) dates, not including c1 and c2 transform_datesout <- function(dates_out, date1, date2, midyear) { - + if (is.null(dates_out)){ if (! midyear){ # jan 1 dates @@ -854,28 +865,29 @@ transform_datesout <- function(dates_out, date1, date2, midyear) { dates_out <- dates_out[dates_out_lgl] } } - + dates_out } +### ZZZ FLAG for redux or deprecation reshape_pxt <- function( - pxt, - births, - c1, - c2, - age1, - age2, - date1, - date2, - f1, - f2, - yrs_births - ) { - + pxt, + births, + c1, + c2, + age1, + age2, + date1, + date2, + f1, + f2, + yrs_births +) { + # Since we're using data.table, we need to create these empty # variables to avoid having R CMD checks with no visible binding # for global variable. - + age <- NULL year <- NULL px <- NULL @@ -893,146 +905,150 @@ reshape_pxt <- function( discount <- NULL .N <- NULL . <- NULL - + px_triangles <- pxt %>% data.table::as.data.table(keep.rownames = "age") %>% data.table::melt( - id.vars = "age", - variable.name = "year", - value.name = "px", - variable.factor = FALSE - ) - + id.vars = "age", + variable.name = "year", + value.name = "px", + variable.factor = FALSE + ) + # No need for assignment: data.table assigns without creating a copy px_triangles[, `:=`(age = as.numeric(age), year = as.numeric(year), lower = magrittr::raise_to_power(px, 0.5), upper = magrittr::raise_to_power(px, 1 - 0.5))] - + px_triangles <- px_triangles[, list(age, year, lower, upper)] %>% data.table::melt( - id.vars = c("age", "year"), - measure.vars = c("lower", "upper"), - variable.name = "triangle", - value.name = "value", - variable.factor = FALSE - ) - + id.vars = c("age", "year"), + measure.vars = c("lower", "upper"), + variable.name = "triangle", + value.name = "value", + variable.factor = FALSE + ) + px_triangles[, `:=`(adj = ifelse(triangle == "upper", 1, 0))] px_triangles[, `:=`(cohort = magrittr::subtract(year, age) %>% magrittr::subtract(adj) %>% floor())] - + # cohort changes over the whole period # px_cum1 <- px_triangles[, list(n_triangles = .N, coh_p = prod(value)), # keyby = list(cohort)] - + # adjust the census population vectors c1c <- shift_census_ages_to_cohorts(c1, age1, date1, censusYearOpt = "frac") c2c <- shift_census_ages_to_cohorts(c2, age2, date2, censusYearOpt = "frac") - + # correction for the first year age 0 -- only take first for the remaining of # the year births[1] <- births[1] * (1 - f1) - + # correction for the last year age 0 n_yrs <- length(births) births[n_yrs] <- births[n_yrs] * f2 - + cohort_dt <- data.table::data.table( - cohort = yrs_births, - pop = births - ) - + cohort = yrs_births, + pop = births + ) + input <- data.table::data.table(cohort = c1c$birth_year, pop = c1c$cohort_size) %>% .[order(cohort)] %>% rbind(cohort_dt) %>% .[, list(pop = sum(pop)), keyby = list(cohort)] - + # population c2 observed pop_c2 <- data.frame( cohort = c2c$birth_year, pop_c2_obs = c2c$cohort_size ) - + pop_jan1_pre <- px_triangles %>% .[, list(n_triangles = .N, coh_p = prod(value)), keyby = list(year, cohort)] %>% .[order(cohort, year)] - + pop_jan1_pre[, `:=`(coh_lx = cumprod(coh_p)), keyby = list(cohort)] - + pop_jan1_pre <- pop_jan1_pre[input, on = "cohort"] - + pop_jan1_pre[, `:=`( - pop_jan1_pre = pop * coh_lx, + pop_jan1 = pop * coh_lx, age = floor(year) - cohort, year = floor(year) + 1 )] - + pop_jan1_pre[, `:=`(year = ifelse(year == max(year), year + f2 - 1, year))] - # calculate the discrepancy (migration) -- to be disrtibuted uniformly in # cohorts resid <- pop_jan1_pre %>% .[year == max(year)] %>% .[pop_c2, on = "cohort"] - - resid[, `:=`(resid = pop_c2_obs - pop_jan1_pre)] + + resid[, `:=`(resid = pop_c2_obs - pop_jan1)] # Only used in the process for diagnostics # resid[, `:=`(rel_resid = resid / pop_c2_obs)] resid <- resid[, list(cohort, resid)] - + # This should just be one value per cohort. - + # determine uniform error discounts: resid_discounts <- stats::approx( - x = c(date1, date2), - y = c(0, 1), - xout = yrs_births - ) %>% + x = c(date1, date2), + y = c(0, 1), + xout = yrs_births + ) %>% data.table::as.data.table() %>% .[, list(year = x, discount = y)] - + # output pop_jan1 <- pop_jan1_pre %>% merge(resid, by = "cohort", all = TRUE) %>% merge(resid_discounts, by = "year", all = TRUE) - + # for the residual discount, account for boundaries pop_jan1[, `:=`( resid = ifelse(is.na(resid), 0, resid), discount = ifelse(year == max(year), 1, discount) )] - - + + pop_jan1 } +### ZZZ FLAG for redux +### after the under-the hood changes are redone to back out Sx, +### the top level args could expand it optionally let the user +### also provide nLxMat rup <- function( - c1, - c2, - date1, - date2, - age1, - age2, - dates_out, - lxMat, - age_lx, - dates_lx, - births, - years_births, - location, - sex, - midyear, - verbose, - ... - ) { + c1, + c2, + date1, + date2, + age1, + age2, + dates_out, + lxMat, + age_lx, + dates_lx, + births, + years_births, + location, + sex, + midyear, + verbose, + ... +) { + check_args( lxMat = lxMat, births = births, @@ -1043,11 +1059,11 @@ rup <- function( c2 = c2, verbose = verbose ) - + if (is.na(date1) | is.na(date2)){ stop("\nCensus dates didn't parse\n") } - + # TR: resolve dates_out # if some dates were given, let's coerce to numeric and ensure valid if (!is.null(dates_out)){ @@ -1059,15 +1075,15 @@ rup <- function( if (length(dates_out) == 0){ stop("\nno valid dates to interpolate to\n") } - + # if we still have valid dates, then check we're not extrapolating dates_out_keep <- data.table::between(dates_out, date1, date2, incbounds = FALSE) - + dates_out_for_real <- dates_out[dates_out_keep] - + # warn about any dates lost due to extrap request: if (length(dates_out_for_real) != length(dates_out) & verbose){ cat("\nFollowing dates requested, but not returned\nbecause they'd require extrapolation:\n",paste(dates_out[!dates_out_keep],collapse = ", "),"\n") @@ -1076,22 +1092,23 @@ rup <- function( stop("\nuh oh! This method is strictly for cohort component interpolation\nYour requested dates_out didn't have anything between date1 and date2\n") } } - + # If dates_out not given, then we resolve using the midyear argument. # If FALSE (default) we return intermediate Jan 1, not including c1 and c2 # If TRUE we return intermediate July 1 (.5) dates, not including c1 and c2 dates_out <- transform_datesout(dates_out, date1, date2, midyear) - + DD <- date2 - date1 if (DD >= 15 & verbose){ cat("\nFYI, there are",DD,"years between c1 and c2\nBe wary.\n") } - + # let's store the proportions separately f1 <- date1 %>% magrittr::subtract(date1 %>% floor) f2 <- date2 %>% magrittr::subtract(date2 %>% floor) - + # And download if needed + # ZZZ FLAG: this step may pxt <- transform_pxt( lxMat = lxMat, location = location, @@ -1104,7 +1121,7 @@ rup <- function( age1 = age1, ... = ... ) - + yrs_births <- seq(floor(date1), floor(date2), 1) # TR: if right-side is jan 1 then we can cut it off of pxt. if (f2 == 0){ @@ -1112,7 +1129,7 @@ rup <- function( yrs_births <- yrs_births[-length(yrs_births)] f2 <- 1 } - + # Download wpp births if needed births <- fetch_wpp_births( @@ -1122,23 +1139,26 @@ rup <- function( sex = sex, verbose = verbose ) - + # check length of births, also filter using provided dates if necessary if (!is.null(years_births)){ stopifnot(length(births) == length(years_births)) - + years_births <- floor(years_births) yrs_keep <- data.table::between(years_births, min(yrs_births), max(yrs_births), incbounds = TRUE) - + births <- births[yrs_keep] } - + # now that births should be available we can do this check. stopifnot(length(births) == length(yrs_births)) - + + + # ZZZ FLAG this may or may not stay same. pxt could just be different values, + # and perhaps this would work. However, edge conditions need exa pop_jan1 <- reshape_pxt( pxt = pxt, births = births, @@ -1152,7 +1172,7 @@ rup <- function( f2 = f2, yrs_births = yrs_births ) - + list( pop_jan1 = pop_jan1, dates_out = dates_out diff --git a/R/interp_lc_lim.R b/R/interp_lc_lim.R index 281cbf434..da6899462 100644 --- a/R/interp_lc_lim.R +++ b/R/interp_lc_lim.R @@ -1,10 +1,10 @@ #' Lee-Carter method with limited data. #' -#' @description Given a data frame with dates, sex and mortality data by age (rates, conditionated probabilities of death +#' @description Given a data frame with dates, sex and mortality data by age (rates, conditioned probabilities of death #' or survival function), this function interpolate/extrapolate life tables #' using the method for limited data suggested by Li et. al (2004) (at least three observed years). #' -#' @details Based on spreedsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN. +#' @details Based on spreadsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN. #' Useful for abridged or single ages, and allows output in both formats also. #' One option is the use of non-divergent method for sex coherency (Li & Lee, 2005). #' The other is the possibility of fitting `"k"` to replicate `"e_0"` at some given dates. @@ -13,7 +13,7 @@ #' #' @param input data.frame with cols: Date, Sex, Age, nMx (opt), nqx (opt), lx (opt) #' @param dates_out numeric. Vector of decimal years to interpolate or extrapolate. -#' @param Single logical. Wheter or not the lifetable output is by single ages. +#' @param Single logical. Whether or not the lifetable output is by single ages. #' @param dates_e0 numeric. Vector of decimal years where `"e_0"` should be fitted when apply method. #' @param e0_Males numeric. Vector of life expectancy by year to be fitted. Same length than `"dates_e0"`. #' @param e0_Females numeric. Vector of life expectancy by year to be fitted. Same length than `"dates_e0"`. @@ -287,13 +287,22 @@ interp_lc_lim <- function(input = NULL, e0_Males), dates_e0, dates_out, - extrap = TRUE)[1, ] + extrap = TRUE) e0f <- interp(rbind(e0_Females, e0_Females), dates_e0, dates_out, - extrap = TRUE)[1, ] + extrap = TRUE) + + # IW: issue with dimension in case the interpolation is for 1 date only + if(ndates_out==1){ + e0m = e0m[1] + e0f = e0f[1] + }else{ + e0m = e0m[1,] + e0f = e0f[1,] + } # avoid divergence: same bx but not kt. if (prev_divergence){ @@ -304,7 +313,7 @@ interp_lc_lim <- function(input = NULL, ktm_star = ktf_star = c() for (j in 1:ndates_out){ ktm_star[j] <- optimize(f = interp_lc_lim_kt_min, - interval = c(-20, 20), + interval = c(-50, 50), ax = axm, bx = bxm, age = Age, @@ -312,7 +321,7 @@ interp_lc_lim <- function(input = NULL, e0_target = e0m[j], ...)$minimum ktf_star[j] <- optimize(f = interp_lc_lim_kt_min, # TR: add ... - interval = c(-20, 20), + interval = c(-50, 50), ax = axf, bx = bxf, age = Age, @@ -414,13 +423,13 @@ interp_lc_lim_abk_m <- function(k,ax,bx){ } # estimate LC for limited data -#' Estimate LC with limited data params +#' Estimate LC with limited data parameters #' @description Estimate LC with limited data from a matrix of rates (age by dates). -#' @details SVD for ax and bx. Fit a simmple linear model for k and interp/extrapolate for objective dates. -#' @param M numeric. Matrix with many rows as ages and columns as dates_in. +#' @details SVD for `ax` and `bx.` Fit a simple linear model for `k` and interpolate/extrapolate for objective dates. +#' @param M numeric. Matrix with many rows as ages and columns as `dates_in`. #' @param dates_in numeric. Vector of dates with input rates. #' @param dates_out numeric. Vector of dates for estimate a set of rates. -#' @param SVD logical. Use Singular Value Decomposition for estimate b and k or Maximum Likelihood Estimation. Default `FALSE` for Maximum Likelihood Estimation. +#' @param SVD logical. Use Singular Value Decomposition for estimate `b` and `k` or Maximum Likelihood Estimation. Default `FALSE` for Maximum Likelihood Estimation. #' @references #' \insertRef{Li2004}{DemoTools} #' @export @@ -452,7 +461,7 @@ interp_lc_lim_estimate <- function(M, dates_in, dates_out, SVD = F){ # smooth rule previous to solve ambiguous #' Smooth and apply lt_ambiguous #' @description Considering different mortality input for each sex/year data, -#' smooth olders with makeham or kannisto in case no law was specified, +#' smooth older ages with makeham or kannisto in case no law was specified, #' and return a data.frame with standard LT. #' @details Makeham is chosen if last age is less than 90. Else Kannisto. #' @param input data.frame. with cols: Date, Sex, Age, nMx (opt), nqx (opt), lx (opt) diff --git a/R/interp_lc_lim_group.R b/R/interp_lc_lim_group.R index 502ab21e9..efbdc6790 100644 --- a/R/interp_lc_lim_group.R +++ b/R/interp_lc_lim_group.R @@ -9,11 +9,11 @@ # text/messages/warnings. Specially the case when no `id` is given #' -#' @description Given a data frame with groups (country, region, sex), dates, sex and mortality data by age (rates, conditionated probabilities of death +#' @description Given a data frame with groups (country, region, sex), dates, sex and mortality data by age (rates, conditioned probabilities of death #' or survival function), this function interpolate/extrapolate life tables #' using the method for limited data suggested by Li et. al (2004) (at least three observed years). #' -#' @details Based on spreedsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN. +#' @details Based on spreadsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN. #' Useful for abridged or single ages, and allows output in both formats also. #' One option is the use of non-divergent method for sex coherency (Li & Lee, 2005). #' The other is the possibility of fitting `"k"` to replicate `"e_0"` at some given dates. @@ -23,9 +23,9 @@ #' @note Draft Version #' #' @param input data.frame. Columns: id, Date, Sex, Age, nMx (opt), nqx (opt), lx (opt). -#' The first column (id) cn be a numeric index or charcter vector identifying each group. +#' The first column (id) can be a numeric index or character vector identifying each group. #' @param dates_out numeric. Vector of decimal years to interpolate or extrapolate. -#' @param Single logical. Wheter or not the lifetable output is by single ages. +#' @param Single logical. Whether or not the lifetable output is by single ages. #' @param input_e0 data.frame with cols: id, Date, Sex and `"e_0"`. This should be fitted when apply method. #' @param prev_divergence logical. Whether or not prevent divergence and sex crossover between groups. Default `FALSE.` #' @param weights list. For `prev_divergence` option. A double for each element of a list with names as `id` columns. Should sum up to 1. Default: same weight for each group. diff --git a/R/log_quad_augm.R b/R/log_quad_augm.R new file mode 100644 index 000000000..fcbb921e7 --- /dev/null +++ b/R/log_quad_augm.R @@ -0,0 +1,176 @@ + +# author: IW -------------------------------------------------------------- + +#' HMD pattern for adult ages. +#' @description Adjust rates in oldest ages using HMD pattern, based on log-quad method. +#' @details One possible scenario when mortality data on last ages is not reliable, is to use a mortality pattern with some known index in previous ages. +#' This function gives a HMD pattern based on 5q0 and 45q15, using log-quad model. Additionally, a value on mortality between 60 and 75 can be included to make a better adjustment in level. +#' @param nMx numeric. Vector of mortality rates in abridged age classes. +#' @param Age integer. Single ages (abridged not allowed). +#' @param Sex character. Either male \code{"m"}, female \code{"f"}, or both \code{"b"}. +#' @param q0_5 numeric. Probability of death from born to age 5. By default implicit values in `nMx` should be entered. +#' @param q15_45 numeric. Probability of death from age 15 to age 60. By default implicit values in `nMx` should be entered. +#' @param fitted_logquad Optional, defaults to \code{NULL}. An object of class +#' \code{wilmoth}. If full HMD is not enough, one +#' can fit a Log-Quadratic (\url{https://github.com/mpascariu/MortalityEstimate}) model +#' based on any other collection of life tables; +#' @param q60_15 numeric. Probability of death from age 60 to age 75. When external information on those ages level is available, +#' can be included to increase parameter `ax` from log-quad model in last ages (Li, 2003). +#' @param Age_transition integer. Form which age should transition to HMD pattern starts. +#' @param window_transition integer. Number of ages to the left and to the right of `Age_transition` to do a log-linear transition in rates. +#' @param plot_comparison Show or not a plot with the result. +#' @param ... Other arguments to be passed on to the \code{lt_single} function. +#' @export +#' @return life table as in \code{lt_single} function. +#' @examples +#' # Mortality rates from UN Chilean with e0=70. Wat would be the rates based on HMD pattern? +#' # In this case making a transition of 10 years at age 80, and returning an OAG=100. +#' \dontrun{ +#' lt <- DemoToolsData::modelLTx1 +#' lt <- lt[lt$family == "Chilean" & lt$sex == "female" & lt$e0 == 70,] +#' chilean70_adjHMD <- HMD_old_logquad(nMx = lt$mx1, +#' Age = lt$age, +#' Sex = "f", +#' q0_5 = 1 - lt$lx1[lt$age==5]/lt$lx1[lt$age==0], +#' q15_45 = 1 - lt$lx1[lt$age==60]/lt$lx1[lt$age==15], +#' Age_transition = 80, +#' window_transition = 10, +#' plot_comparison = TRUE, +#' OAnew = 100) +#' # We know (as an example) that q60_15 is .5 higher than what HMD pattern would be. +#' chilean70_adjHMD_augm <- HMD_old_logquad(nMx = lt$mx1, +#' Age = lt$age, +#' Sex = "f", +#' q0_5 = 1 - lt$lx1[lt$age==5]/lt$lx1[lt$age==0], +#' q15_45 = 1 - lt$lx1[lt$age==60]/lt$lx1[lt$age==15], +#' q60_15 = (1 - lt$lx1[lt$age==75]/lt$lx1[lt$age==60]) * 1.5, +#' Age_transition = 80, window_transition = 10, +#' OAnew = 100, plot_comparison = TRUE) +#' } + +HMD_old_logquad <- function(nMx, Age = NULL, + Sex = "b", + q0_5 = NULL, q15_45 = NULL, + q60_15 = NULL, + Age_transition = 80, + window_transition = 3, + plot_comparison = FALSE, + fitted_logquad = NULL, + ...){ + + # check if age is not complete + if(!is_single(Age)) stop("Need rates by single age.") + if(is.null(Age)) Age <- 0:length(nMx) + + # check if an optional fitted_logquad is specified + if(is.null(fitted_logquad)){ + if(Sex == "b"){ + fitted_logquad <- DemoTools::fitted_logquad_b + } + if(Sex == "f"){ + fitted_logquad <- DemoTools::fitted_logquad_f + } + if(Sex == "m"){ + fitted_logquad <- DemoTools::fitted_logquad_m + } + } + + # load the log-quad model already fitted in the package (different from MortalityEstimate) and estimate with input data + logquad_model <- lt_model_lq(Sex = Sex, q0_5 = q0_5, q15_45 = q15_45, fitted_logquad = fitted_logquad) + mx_logquad_5q0_45q15 <- logquad_model$lt + + # augmented method if was asked + if(!is.null(q60_15)){ + mx_logquad_5q0_45q15 <- tryCatch({ + logquad_augmented(coeffs = fitted_logquad$coefficients, + k = logquad_model$values$k, + Age = logquad_model$lt$Age, + q0_5 = q0_5, q60_15 = q60_15, Sex = Sex)}, + error = function(e) { + warning("Augmented log-quad was not possible. Revise q60_15. Returned basic log-quad.") + mx_logquad_5q0_45q15}) + } + + # make it single + mx_logquad_5q0_45q15 <- lt_abridged2single(nMx = mx_logquad_5q0_45q15$nMx, + Age = mx_logquad_5q0_45q15$Age, + lx = mx_logquad_5q0_45q15$lx, + Sex = Sex, + ...) + + # smooth transition with a given length window + Ages <- 0:max(mx_logquad_5q0_45q15$Age) + Age_smooth <- (Age_transition-window_transition):(Age_transition+window_transition) + Age_not_smooth <- Ages[!Ages %in% Age_smooth] + nMx_to_smooth_transition <- data.frame(Age = Age_not_smooth, + nMx = c(nMx[Agemax(Age_smooth)])) + nMx_interpolated <- exp(stats::approx(x = nMx_to_smooth_transition$Age, + y = log(nMx_to_smooth_transition$nMx), + xout = Age_smooth)$y) + smooth_transtition_nMx <- dplyr::bind_rows( + nMx_to_smooth_transition, + data.frame(Age = Age_smooth, nMx = nMx_interpolated)) %>% + dplyr::arrange(Age) + + # plot diagnostic + if(plot_comparison){ + Type <- NULL + df1 <- data.frame(Age = Age, nMx = nMx) + df1$Type <- "Input" + df2 <- data.frame(Age = smooth_transtition_nMx$Age, nMx = smooth_transtition_nMx$nMx) + df2$Type <- "Adjusted" + df3 <- data.frame(Age = Age_smooth, nMx = nMx_interpolated) + df3$Type <- "Transition" + rbind(df1, df2, df3) %>% + ggplot2::ggplot(ggplot2::aes(x = Age, y = nMx, color = Type)) + + ggplot2::geom_line() + + ggplot2::geom_vline(xintercept = Age_transition, linetype = "dashed", color = "grey") + + ggplot2::scale_y_log10() + + ggplot2::theme_bw() + } + + # rebuild lt and finish, with input OAnew if was not defined as additional input argument + lt_extra_arguments <- list(...) + if(!("OAnew" %in% names(lt_extra_arguments))){OAnew <- max(Age)} else {OAnew <- lt_extra_arguments$OAnew} + mx_logquad_5q0_45q15 <- lt_single_mx(nMx = smooth_transtition_nMx$nMx, + Age = smooth_transtition_nMx$Age, + Sex = Sex, + OAnew = OAnew) + return(mx_logquad_5q0_45q15) +} + +#' Augmented logquad +#' @description Adjust rates in oldest ages that comes from a HMD model, using an external estimate of 15q60 (Li, 2014). As an example see\code{\link[DemoTools]{HMD_old_logquad}}. +#' @details Parameter \code{a(x)} is augmented based on an external estimate of 15q60. +#' @param coeffs data.frame. Columns \code{a(x)}, \code{b(x)}, \code{c(x)} and \code{v(x)} from fitted logquad model. See \code{fitted_logquad_b}. +#' @param k numeric. Adult mortality related value from log-quad estimatation based on one or two input parameters. See \code{lt_model_lq}. +#' @param Sex character. Either male \code{"m"}, female \code{"f"}, or both \code{"b"}. +#' @param Age integer. Abridged lower bound ages. Same length than rows in `coeffs`. +#' @param q0_5 numeric. Probability of death from born to age 5. +#' @param q60_15 numeric. Probability of death from age 60 to age 75. +#' @param ... Other arguments to be passed on to the \code{lt_abridged} function. +#' @export +#' @return life table as in \code{lt_abridged} function. +#' @references See [Li (2014)](https://www.un.org/development/desa/pd/content/estimating-life-tables-developing-countries). + +logquad_augmented <- function(coeffs, k, q0_5, q60_15, Sex = "b", Age, ...){ + + # arrange parameters and get rates from the model + params <- c(1, log(q0_5), log(q0_5)^2, k) + mx_logquad <- exp(rowSums(as.matrix(coeffs) %*% diag(params))) + + # adjust ax with the ratio between input value and implicit value from model + q60_15_hat <- q60_15 + age_q60_15 <- which(Age == 60) + q60_15 <- 1 - exp(-5*(sum(mx_logquad[c(age_q60_15, age_q60_15+1, age_q60_15+2)]))) + coeffs_hat <- coeffs + coeffs_hat$ax[age_q60_15:nrow(coeffs_hat)] <- coeffs_hat$ax[age_q60_15:nrow(coeffs_hat)] + log(log(1-q60_15_hat)/log(1-q60_15)) + + # new rates with changed ax + mx_logquad_augm <- exp(rowSums(as.matrix(coeffs_hat) %*% diag(params))) + lt_logquad_augm <- lt_abridged(nMx = mx_logquad_augm, Age = Age, Sex = Sex, ...) + + # output + return(lt_logquad_augm) +} \ No newline at end of file diff --git a/R/lt_abridged.R b/R/lt_abridged.R index 6630faf74..4ff6c3c81 100644 --- a/R/lt_abridged.R +++ b/R/lt_abridged.R @@ -20,7 +20,7 @@ #' is aligned with the other columns in all 5-year age groups, but note the #' first two values have a slightly different age-interval interpretation: #' In Age 0, the interpretation is survival from birth until interval 0-4. -#' In Age 1, it is survival from 0-4 into 5-9. Therafter the age groups align. +#' In Age 1, it is survival from 0-4 into 5-9. Thereafter the age groups align. #' This column is required for population projections. #' #' @param Deaths numeric. Vector of death counts in abridged age classes. @@ -36,12 +36,13 @@ #' @param region character. North, East, South, or West: code{"n"}, code{"e"}, code{"s"}, code{"w"}. Default code{"w"}. #' @param IMR numeric. Infant mortality rate \ifelse{html}{\out{q0}}{\eqn{q_0}}, in case available and \code{nqx} is not specified. Default \code{NA}. #' @param mod logical. If \code{"un"} specified for \code{axmethod}, whether or not to use Nan Li's modification for ages 5-14. Default \code{TRUE}. -#' @param SRB the sex ratio at birth (boys / girls), detault 1.05 +#' @param SRB the sex ratio at birth (boys / girls), default 1.05 #' @param OAnew integer. Desired open age group (5-year ages only). Default \code{max(Age)}. If higher then rates are extrapolated. #' @param OAG logical. Whether or not the last element of \code{nMx} (or \code{nqx} or \code{lx}) is an open age group. Default \code{TRUE}. #' @param extrapLaw character. If extrapolating, which parametric mortality law should be invoked? Options include #' \code{"Kannisto", "Kannisto_Makeham", "Makeham", "Gompertz", "GGompertz", "Beard", "Beard_Makeham", "Quadratic"}. Default \code{"Kannisto"} if the highest age is at least 90, otherwise `"makeham"`. See details. #' @inheritParams lt_a_closeout +#' @importFrom dplyr case_when #' @export #' @return Lifetable in data.frame with columns #' \itemize{ @@ -183,9 +184,30 @@ lt_abridged <- function(Deaths = NULL, extrapFrom = max(Age), extrapFit = NULL, ...) { + + # some handy name coercion + a0rule <- case_when(a0rule == "Andreev-Kingkade" ~ "ak", + a0rule == "Coale-Demeny" ~ "cd", + TRUE ~ a0rule) + axmethod <- case_when(axmethod == "UN (Greville)" ~ "un", + axmethod == "PASEX" ~ "pas", + TRUE ~ axmethod) + Sex <- substr(Sex, 1, 1) |> + tolower() + Sex <- ifelse(Sex == "t", "b", Sex) + + region <- substr(region, 1, 1) |> + tolower() + if (!is.null(extrapLaw)){ + extrapLaw <- tolower(extrapLaw) + } + + + # now we check args axmethod <- match.arg(axmethod, choices = c("pas","un")) Sex <- match.arg(Sex, choices = c("m","f","b")) a0rule <- match.arg(a0rule, choices = c("ak","cd")) + if (!is.null(extrapLaw)){ extrapLaw <- tolower(extrapLaw) extrapLaw <- match.arg(extrapLaw, choices = c("kannisto", @@ -318,27 +340,30 @@ lt_abridged <- function(Deaths = NULL, momega <- nMx[length(nMx)] } # -------------------------------- - # begin extrapolation: - # TR: 13 Oct 2018. always extrapolate to 130 no matter what, - # then truncate to OAnew in all cases. This will ensure more robust closeouts - # and an e(x) that doesn't depend on OAnew. 130 is used similarly by HMD. - x_extr <- seq(extrapFrom, 130, by = 5) - - Mxnew <- lt_rule_m_extrapolate( - x = Age, - mx = nMx, - x_fit = extrapFit, - x_extr = x_extr, - law = extrapLaw, - ...) - - nMxext <- Mxnew$values - Age2 <- names2age(nMxext) - - keepi <- Age2 < extrapFrom - nMxext[keepi] <- nMx[Age < extrapFrom] - nMx <- nMxext - Age <- Age2 + + if (max(Age) < 130){ + # begin extrapolation: + # TR: 13 Oct 2018. always extrapolate to 130 no matter what, + # then truncate to OAnew in all cases. This will ensure more robust closeouts + # and an e(x) that doesn't depend on OAnew. 130 is used similarly by HMD. + x_extr <- seq(extrapFrom, 130, by = 5) + + Mxnew <- lt_rule_m_extrapolate( + x = Age, + mx = nMx, + x_fit = extrapFit, + x_extr = x_extr, + law = extrapLaw, + ...) + + nMxext <- Mxnew$values + Age2 <- names2age(nMxext) + + keepi <- Age2 < extrapFrom + nMxext[keepi] <- nMx[Age < extrapFrom] + nMx <- nMxext + Age <- Age2 + } AgeInt <- age2int( Age, OAG = TRUE, @@ -416,7 +441,9 @@ lt_abridged <- function(Deaths = NULL, nMx[N] <- lx[N] / Tx[N] } - Sx <- lt_id_Ll_S(nLx, lx, AgeInt, N = 5) + Sx <- c(lt_id_Ll_S(nLx, lx, Age, AgeInt, N = 5), 0.0) + names(Sx) <- NULL + # output is an unrounded, unsmoothed lifetable out <- data.frame( Age = Age, diff --git a/R/lt_id.R b/R/lt_id.R index 400645dc3..7c6ed00f6 100644 --- a/R/lt_id.R +++ b/R/lt_id.R @@ -118,15 +118,16 @@ lt_id_l_d <- function(lx) { #' @title Derive lifetable death probabilities from survivorship. #' @description This lifetable identity is the same no matter what kind of lifetable is required. #' You can find it in any demography textbook. -#' @details The vector returned is the same length as \code{lx} and it sums to the lifetable radix. -#' If the radix is one then this is the discrete deaths distribution. +#' @details The vector returned is the same length as \code{lx}. #' #' @param lx numeric. Vector of age-specific lifetable survivorship. #' @references #' \insertRef{preston2000demography}{DemoTools} -#' @return ndx vector of lifetable deaths. +#' @return `qx` values of age-specific mortality rates. The last value is always 1.0 #' @export lt_id_l_q <- function(lx) { + # TR note if there are trailing 0s in lx then + # this can be NaN dx <- lt_id_l_d(lx) dx / lx } @@ -271,27 +272,47 @@ lt_id_ma_q <- function(nMx, nax, AgeInt, closeout = TRUE, IMR) { } #' @title Calculate survivor ratios -#' @description An extra lifetable column for use in projections, which require uniform time steps both both age and period. Intervals are either single age (\code{N=1}) or five-year ages (\code{N=5}). Input vectors are assumed to come from either single or standard abridged ages. +#' @description An extra lifetable column for use in projections, which require uniform time steps both both age and period. Intervals are either single age or five-year ages. Input vectors are assumed to come from either single or standard abridged ages. Note that the ages of the output Sx are the ages the population would be after the N-year projection. #' @details This function does not account for \code{nLx} having been pre-binned into uniform 5-year age widths, which will throw an error. Just leave them in abridged ages instead. Note that in the case of abridged ages, the interpretation for the first and second value don't follow the original abridged age intervals: the first value in the probability of surviving from birth into ages 0-4 in the first five years, and the second value is the probability of surviving from 0-4 to 5-9. This represents a slight misalignment with the rest of the lifetable, user beware. -#' @inheritParams lt_abridged #' @param nLx numeric vector of lifetable exposure. +#' @param lx numeric vector of lifetable survivors from same lifetable than \code{nLx}. Infered radix from nLx in case is \code{NULL}. +#' @param Age integer vector of starting ages. +#' @param AgeInt integer vector of age intervals. #' @param N integer, the age width for survivor ratios, either 5 or 1. Default 5. #' @export -lt_id_Ll_S <- function(nLx, lx, AgeInt, N = c(5, 1)) { + +lt_id_Ll_S <- function(nLx, lx = NULL, Age, AgeInt = NULL, N = 5) { + # number ages n <- length(nLx) - stopifnot(length(lx) == n) + # same length of vectors + stopifnot(length(nLx) == length(Age)) # either we're in 1 or 5 year age groups stopifnot(length(N) == 1 & N %in% c(5, 1)) - ## compute Sx (missing from the LTbr computation - Sx <- rep(NA, n) + # infer radix in case lx is not given + radix <- lx[1] + if(is.null(lx)){ + radix <- ifelse(nLx[1]>1, 10^nchar(trunc(nLx[1])), 1) + } + # validate nLx. Some zero in ages>100 could happen. YP should be non-zero in ages<80, even in historical pops (?) + # IW: relax this condition on YP of non-negative YP because of huge heterogeneity on input lt + stopifnot(all(nLx>=0, # nLx[Age<60]>0, + nLx[-n] < (radix*N))) + + ## compute Sx (missing from the LTbr computation) # first age group is survival from births to the second age group if (N == 5) { - # double check because assuming abridged nLx is given... + Sx <- rep(NA, n-1) + # infer AgeInt in case is not given + if(is.null(AgeInt)){ + AgeInt <- inferAgeIntAbr(Age) + } stopifnot(length(AgeInt) == n) ageintcompare <- inferAgeIntAbr(vec = nLx) - stopifnot(all(ageintcompare[-n] == AgeInt[-n])) + if (Age[1] == 0){ + stopifnot(all(ageintcompare[-n] == AgeInt[-n])) + } # birth until 0-4 - Sx[1] <- (nLx[1] + nLx[2]) / ((AgeInt[1] + AgeInt[2]) * lx[1]) + Sx[1] <- (nLx[1] + nLx[2]) / ((AgeInt[1] + AgeInt[2]) * radix) # second age group is survival age 0-4 to age 5-9 Sx[2] <- nLx[3] / (nLx[1] + nLx[2]) # middle age groups @@ -299,18 +320,20 @@ lt_id_Ll_S <- function(nLx, lx, AgeInt, N = c(5, 1)) { Sx[mind] <- nLx[mind + 1] / nLx[mind] # penultimate age group Sx[n - 1] <- nLx[n] / (nLx[n - 1] + nLx[n]) - # closeout - Sx[n] <- 0.0 + # names of ages at arrive + names(Sx) <- seq(0,Age[length(Age)],5) } if (N == 1) { - LLXX <- c(lx[1], nLx) + Sx <- rep(NA, n) + LLXX <- c(radix, nLx) mind <- 1:(n - 1) Sx[mind] <- LLXX[mind + 1] / LLXX[mind] # closeout Sx[n] <- nLx[n] / (nLx[n - 1] + nLx[n]) + # names of ages at arrive + names(Sx) <- 0:(n-1) } - - - Sx } + + diff --git a/R/lt_model_lq.R b/R/lt_model_lq.R index ee601dea1..fa063b710 100644 --- a/R/lt_model_lq.R +++ b/R/lt_model_lq.R @@ -12,7 +12,7 @@ #' Estimate Wilmoth Model Life Table #' -#' Construct model life tables based on the Log-Quadratic (wilmoth) estimates +#' Construct model life tables based on the Log-Quadratic (Wilmoth) estimates #' with various choices of 2 input parameters: #' \code{q0_5, q0_1, q15_45, q15_35} and \code{e0}. There are 8 possible #' combinations (see examples below). @@ -20,7 +20,7 @@ #' @details Due to limitations of the R language the notation for probability #' of dying \code{nqx} is written \code{qx_n}, where \code{x} and \code{n} are #' integers. For example \code{45q15} is represented as \code{q45_15}. -#' @note This function is ported from \code{MortalityEstimate::wilmothLT} experimental package by Marius Pascariu. The package is no longe maintained. The latest version can be found here: \url{https://github.com/mpascariu/MortalityEstimate} +#' @note This function is ported from \code{MortalityEstimate::wilmothLT} experimental package by Marius Pascariu. The package is no longer maintained. The latest version can be found here: \url{https://github.com/mpascariu/MortalityEstimate} #' @param Sex Choose the sex of the population. This choice defines the use #' of a corresponding Log-Quadratic (\code{wilmoth}) #' model fitted for the whole Human Mortality Database (as of Dec 2019, diff --git a/R/lt_regroup_age.R b/R/lt_regroup_age.R index 516d0cde7..b2e557011 100644 --- a/R/lt_regroup_age.R +++ b/R/lt_regroup_age.R @@ -53,7 +53,8 @@ lt_single2abridged <- function(lx, nAx[N] <- ex[N] nMx <- ndx/nLx Tx <- lt_id_L_T(nLx) - Sx <- lt_id_Ll_S(nLx, lx, AgeInt, N = 5) + Sx <- c(lt_id_Ll_S(nLx, lx, Age5, AgeInt, N = 5),0.0) + names(Sx) <- NULL out <- data.frame( Age = Age5, @@ -80,7 +81,7 @@ lt_single2abridged <- function(lx, #' @description Computes single year of age life table by graduating the mortality schedule of an abridged life table, using the `ungroup::pclm()` to ungroup binned count data. Returns complete single-age lifetable. #' @details Similar to `lt_abridged()` details, forthcoming. #' @inheritParams lt_abridged -#' @param ... optional arguments passed to `pclm()`. For example, if you pass an expicit `lambda` parameter via the `control` argument, you can speed up estimation +#' @param ... optional arguments passed to `pclm()`. For example, if you pass an explicit `lambda` parameter via the `control` argument, you can speed up estimation #' @return Single-year lifetable in data.frame with columns #' \itemize{ #' \item{Age}{integer. Lower bound of single year age class}, @@ -98,6 +99,7 @@ lt_single2abridged <- function(lx, #' #' @export #' @importFrom ungroup pclm +#' @importFrom dplyr case_when #' @examples #' Mx <- c(.23669,.04672,.00982,.00511,.00697,.01036,.01169, #' .01332,.01528,.01757,.02092,.02517,.03225,.04241,.06056, @@ -150,6 +152,23 @@ lt_abridged2single <- function( NN <- length(Age) #stopifnot(length(nMx) == NN) + # some handy name coercion + a0rule <- case_when(a0rule == "Andreev-Kingkade" ~ "ak", + a0rule == "Coale-Demeny" ~ "cd", + TRUE ~ a0rule) + axmethod <- case_when(axmethod == "UN (Greville)" ~ "un", + axmethod == "PASEX" ~ "pas", + TRUE ~ axmethod) + Sex <- substr(Sex, 1, 1) |> + tolower() + Sex <- ifelse(Sex == "t", "b", Sex) + + region <- substr(region, 1, 1) |> + tolower() + if (!is.null(extrapLaw)){ + extrapLaw <- tolower(extrapLaw) + } + if (!is.null(extrapLaw)){ extrapLaw <- tolower(extrapLaw) extrapLaw <- match.arg(extrapLaw, choices = c("kannisto", @@ -218,9 +237,9 @@ lt_abridged2single <- function( # redefine Age and extrapFit for single year ages and new maxage a1 <- 1:length(M) - 1 - extrapFit <- a1[a1 >= min(extrapFit, (max(Age)-20)) & a1 <= max(Age)] + extrapFit <- a1[a1 >= min(extrapFit, (max(a1)-20)) & a1 <= max(Age)] # always refit from 110 even if extrapFrom > 110 - extrapFrom <- min(max(Age), 110) + extrapFrom <- min(max(a1), 110) # compute life table columns from single year mx LT <- lt_single_mx(nMx = M, @@ -242,7 +261,7 @@ lt_abridged2single <- function( } -#' calculate an abidged or single age lifetable from abridged or sinlge age data +#' calculate an abridged or single age lifetable from abridged or single age data #' @description This is a wrapper around the other lifetable utilities. We start with either `nMx`, `nqx`, or `lx` in single or abridged ages, and returns a full lifetable in either single or abridged ages. All optional arguments of `lt_abridged()` or `lt_single*()` can be passed in, for instance the `nax` assumptions or the extrapolation arguments. #' #' @param nMx_or_nqx_or_lx numeric vector of either `nMx`, `nqx`, or `lx` @@ -311,7 +330,7 @@ lt_ambiguous <- function(nMx_or_nqx_or_lx = NULL, out <- lt_single_qx(nqx = xx, Age = Age, Sex = Sex, ...) } if (type == "q" & !Single){ - out <- lt_single_qx(qx = xx, Age = Age, Sex = Sex, ...) + out <- lt_single_qx(nqx = xx, Age = Age, Sex = Sex, ...) out <- lt_single2abridged(lx = out$lx,nLx = out$nLx, ex = out$ex) } } diff --git a/R/lt_rule.R b/R/lt_rule.R index 828e81541..b16b56df2 100644 --- a/R/lt_rule.R +++ b/R/lt_rule.R @@ -88,8 +88,8 @@ #' segments(1, M1_4, 5, M1_4) #' text(1, c(M0, M1_4, M0_4), c("M0", "M1_4", "M0_4"), pos = 3) #' } -lt_rule_4m0_D0 <- function(D04, M04, P04, Sex = c("m", "f")) { - +lt_rule_4m0_D0 <- function(D04, M04, P04, Sex = "m") { + stopifnot(Sex %in% c("m","f")) if (missing(M04)) { M5 <- D04 / P04 } else { @@ -225,8 +225,8 @@ lt_rule_4m0_D0 <- function(D04, M04, P04, Sex = c("m", "f")) { #' text(1, c(M0, M1_4, M0_4), c("M0", "M1_4", "M0_4"), pos = 3) #' } -lt_rule_4m0_m0 <- function(M04, D04, P04, Sex = c("m", "f")) { - +lt_rule_4m0_m0 <- function(M04, D04, P04, Sex ="m") { + stopifnot(Sex %in% c("m","f")) if (missing(M04)) { M5 <- D04 / P04 } else { @@ -282,7 +282,7 @@ lt_rule_4m0_m0 <- function(M04, D04, P04, Sex = c("m", "f")) { #' @title estimates a0 using the Andreev-Kingkade rule of thumb starting with IMR #' -#' @description \code{AKq02a0} Andreev Kingkade a0 method. This version has a 3-part segemented linear model, based on cutpoints in q0. Code ported from HMDLifeTables. +#' @description \code{AKq02a0} Andreev Kingkade a0 method. This version has a 3-part segmented linear model, based on cut points in q0. Code ported from HMDLifeTables. #' #' @param q0 a value or vector of values of q0, the death probability in the first year of life. #' @param Sex either "m" or "f" @@ -328,9 +328,9 @@ lt_rule_ak_m0_a0 <- function(M0, Sex ){ #' #' @description This function wraps the two approximations for a0 based on either q0 (IMR) or m0. #' -#' @param M0 a value or vector of values of m0, the death probability in the first year of life. -#' @param q0 a value or vector of values of m0, the death risk in the first year of life. -#' @param Sex either "m" or "f" +#' @param M0 a value or vector of values of `1m0``, the death risk in the first year of life. +#' @param q0 a value or vector of values of `1q0``, the death probability in the first year of life, sometimes approximated with IMR. +#' @param Sex either `"m"` or `"f"` #' #' @return a0, the estimated average age at death of those dying in the first year of life, either a single value or a vector of values. #' @@ -343,7 +343,7 @@ lt_rule_1a0_ak <- function(M0 = NULL, q0 = NULL, Sex){ a0 <- lt_rule_ak_q0_a0(q0,Sex) } if (is.null(q0) & !is.null(M0)){ - a0 <- lt_rule_ak_q0_a0(M0,Sex) + a0 <- lt_rule_ak_m0_a0(M0,Sex) } a0 } @@ -351,14 +351,14 @@ lt_rule_1a0_ak <- function(M0 = NULL, q0 = NULL, Sex){ #' @title calculate a0 in different ways #' -#' @description This function wraps the Coale-Demeny and Andreev-Kingkade approximations for a0, which can come from M0, qo, or IMR. +#' @description This function wraps the Coale-Demeny and Andreev-Kingkade approximations for `a0`, which can come from `M0`, `q0`, or `IMR.` #' @details If sex is given as both, \code{"b"}, then we calculate the male and female results separately, then weight them together using SRB. This is bad in theory, but the leverage is trivial, and it's better than using male or female coefs for the total population. #' #' @inheritParams lt_rule_1a0_cd #' @param rule character. Either \code{"ak"} (Andreev-Kingkade) or \code{"cd"} (Coale-Demeny). #' @param Sex character, either \code{"m"}, \code{"f"}, or \code{"b"} #' @param q0 a value or vector of values of m0, the death risk in the first year of life. -#' @param SRB the sex ratio at birth (boys / girls), detault 1.05 +#' @param SRB the sex ratio at birth (boys / girls), default 1.05 #' @details Neither Coale-Demeny nor Andreev-Kingkade have explicit a0 rules for both-sexes combined. There's not a good way to arrive at a both-sex a0 estimate without increasing data requirements (you'd need data from each sex, which are not always available). It's more convenient to blend sex-specific a0 estimates based on something. Here we use SRB to do this, for no other reason than it has an easy well-known default value. This is bad because it assumes no sex differences in infant mortality, but this choice has a trivial impact on results. #' @return a0, the estimated average age at death of those dying in the first year of life, either a single value or a vector of values. #' diff --git a/R/lt_single.R b/R/lt_single.R index 7abec996f..60cbaac5a 100644 --- a/R/lt_single.R +++ b/R/lt_single.R @@ -19,6 +19,7 @@ #' \item{ex}{numeric. Age-specific remaining life expectancy.} #' } #' @export +#' @importFrom dplyr case_when lt_single_mx <- function(nMx, Age = 1:length(nMx) - 1, radix = 1e5, @@ -36,6 +37,21 @@ lt_single_mx <- function(nMx, ...) { stopifnot(extrapFrom <= max(Age)) + + # some handy name coercion + a0rule <- case_when(a0rule == "Andreev-Kingkade" ~ "ak", + a0rule == "Coale-Demeny" ~ "cd", + TRUE ~ a0rule) + Sex <- substr(Sex, 1, 1) |> + tolower() + Sex <- ifelse(Sex == "t", "b", Sex) + + region <- substr(region, 1, 1) |> + tolower() + if (!is.null(extrapLaw)){ + extrapLaw <- tolower(extrapLaw) + } + Sex <- match.arg(Sex, choices = c("m","f","b")) a0rule <- match.arg(a0rule, choices = c("ak","cd")) if (!is.null(extrapLaw)){ @@ -74,37 +90,40 @@ lt_single_mx <- function(nMx, } # -------------------------- # Now all vectors may end up being longer - x_extr <- seq(extrapFrom, 130, by = 1) - Mxnew <- lt_rule_m_extrapolate( - x = Age, - mx = nMx, - x_fit = extrapFit, - x_extr = x_extr, - law = extrapLaw, - ...) - - nMxext <- Mxnew$values - Age2 <- names2age(nMxext) + if (max(Age) < 130){ + x_extr <- seq(extrapFrom, 130, by = 1) + Mxnew <- lt_rule_m_extrapolate( + x = Age, + mx = nMx, + x_fit = extrapFit, + x_extr = x_extr, + law = extrapLaw, + ...) - keepi <- Age2 < extrapFrom - nMxext[keepi] <- nMx[Age < extrapFrom] - - # overwrite some variables: - nMx <- nMxext - Age <- Age2 + nMxext <- Mxnew$values + Age2 <- names2age(nMxext) + + keepi <- Age2 < extrapFrom + nMxext[keepi] <- nMx[Age < extrapFrom] + + # overwrite some variables: + nMx <- nMxext + Age <- Age2 + } N <- length(Age) AgeInt <- rep(1, N) # get ax: nAx <- rep(.5, N) - nAx[1] <- lt_rule_1a0( - rule = a0rule, - M0 = nMx[1], - IMR = IMR, - Sex = Sex, - region = region, - SRB = SRB) - + if (Age[1] == 0){ + nAx[1] <- lt_rule_1a0( + rule = a0rule, + M0 = nMx[1], + IMR = IMR, + Sex = Sex, + region = region, + SRB = SRB) + } # get qx (if pathological qx > 1, ax replaced, assumed constant hazard) qx <- lt_id_ma_q( nMx = nMx, @@ -146,7 +165,7 @@ lt_single_mx <- function(nMx, AgeInt[N] <- NA # Survival ratios computed only after nLx is closed out - Sx <- lt_id_Ll_S(nLx, lx, AgeInt = AgeInt, N = 1) + Sx <- lt_id_Ll_S(nLx, lx, Age, AgeInt = AgeInt, N = 1) if (OAG) { if (OAnew == OA) { diff --git a/R/lt_single_qx.R b/R/lt_single_qx.R index a164e8667..988fd4886 100644 --- a/R/lt_single_qx.R +++ b/R/lt_single_qx.R @@ -15,7 +15,7 @@ #' \item{lx}{numeric. Lifetable survivorship} #' \item{ndx}{numeric. Lifetable deaths distribution.} #' \item{nLx}{numeric. Lifetable exposure.} -#' \item{Sx}{numeric. Survivor ratios in uniform 5-year age groups.} +#' \item{Sx}{numeric. Survivor ratios in uniform single-year age groups.} #' \item{Tx}{numeric. Lifetable total years left to live above age x.} #' \item{ex}{numeric. Age-specific remaining life expectancy.} #' } @@ -81,13 +81,14 @@ lt_single_qx <- function(nqx, # compute ax: nAx <- rep(.5, N) - nAx[1] <- lt_rule_1a0(rule = a0rule, - q0 = nqx[1], - IMR = IMR, - Sex = Sex, - region = region, - SRB = SRB) - + if (Age[1] == 0){ + nAx[1] <- lt_rule_1a0(rule = a0rule, + q0 = nqx[1], + IMR = IMR, + Sex = Sex, + region = region, + SRB = SRB) + } # compute 1mx from 1qx and 1ax nMx <- lt_id_qa_m(nqx = nqx, nax = nAx, diff --git a/R/mig_beta.R b/R/mig_beta.R index 51aaf2d7a..16fdd4e0c 100644 --- a/R/mig_beta.R +++ b/R/mig_beta.R @@ -1,3 +1,8 @@ +# [ ] Jan 1 thing not ideal. +# [ ] C2 and last year can produce big negative in age 0. +# maybe cut it off and not return estimate for partial year +# [ ] use Lx ratios to project + #' Estimate intercensal migration by comparing census population, by age and #' sex, to the results of a RUP projection. #' @@ -8,6 +13,7 @@ #' the cohort parallelogram assuming uniform distribution assuming it is all #' migration. It finalizes by summing the estimate by age groups across the entire #' intercensal period to have a total migration during the entire period. +#' Alternatively, a child adjustment and an old age adjustment can be applied. #' #' @param c1 numeric vector. The first (left) census in single age groups #' @param c2 numeric vector. The second (right) census in single age groups @@ -25,6 +31,33 @@ #' @param sex character string, either `"male"`, `"female"`, or `"both"` #' @param midyear logical. `FALSE` means all Jan 1 dates between `date1` and `date2` are returned. `TRUE` means all July 1 intercensal dates are returned. #' @param verbose logical. Shall we send informative messages to the console? +#' +#' @param child_adjust The method with which to adjust the youngest age groups. +#' If \code{"none"}, no adjustment is applied (default). If +#' child-woman ratio (\code{"cwr"}) is chosen, the first cohorts reflecting the +#' difference between \code{date2 - date1} are adjusted (plus age 0). If +#' child constant ratio (\code{"constant"}) is chosen, the first 15 age groups +#' are adjusted. +#' +#' @param childage_max The maximum age from which to apply \code{child_adjust}. +#' By default, set to \code{NULL}, which gets translated into all the cohorts +#' between \code{date2} and \code{date1}. If \code{date2} is 2010 and +#' \code{date1} is 2002, the first 8 cohorts are adjusted. Otherwise, the user +#' can supply an integer. +#' +#' @param cwr_factor A numeric between 0 and 1 to which adjust the CWR method +#' for the young ages from \code{child_adjust}. \strong{This is only used +#' when \code{child_adjust} is \code{"cwr"}}. +#' +#' @param oldage_adjust The type of adjustment to apply to ages at and above +#' \code{oldage_min}. \code{'beers'} applies a beers graduation method +#' while \code{'mav'} applies a moving average with cascading on the tails. +#' For more information see \code{?mav} and \code{?graduation_beers}. +#' +#' @param oldage_min The minimum age from which to apply \code{oldage_adjust}. +#' By default, set to 65, so any adjustment from \code{oldage_adjust} will be +#' applied for 65+. +#' #' @param ... optional arguments passed to \code{lt_single_qx} #' @export #' @@ -36,7 +69,8 @@ #' @examples #' #' \dontrun{ -#' mig_beta( +#' +#' mig_beta( #' location = "Russian Federation", #' sex = "male", #' c1 = pop1m_rus2002, @@ -64,13 +98,24 @@ mig_beta <- function( sex = "both", midyear = FALSE, verbose = TRUE, - ... - ) { + child_adjust = c("none", "cwr", "constant"), + childage_max = NULL, + cwr_factor = 0.3, + oldage_adjust = c("none", "beers", "mav"), + oldage_min = 65, + ...) { + child_adjust <- match.arg(child_adjust) + oldage_adjust <- match.arg(oldage_adjust) # convert the dates into decimal numbers date1 <- dec.date(date1) date2 <- dec.date(date2) + # If null, assume, the cohorts between censuses date2 and dates2 + if (is.null(childage_max)) { + childage_max <- as.integer(ceiling(date2) - floor(date1)) + } + res_list <- rup( c1 = c1, c2 = c2, @@ -112,67 +157,79 @@ mig_beta <- function( # and the decum_resid on the values. mat_resid <- data.table::dcast( - pop_jan1[, list(year, age, decum_resid)], - age ~ year, - value.var = "decum_resid" - ) + pop_jan1[, list(year, age, decum_resid)], + age ~ year, + value.var = "decum_resid" + ) + mig_vec <- rowSums(mat_resid[,-1], na.rm = TRUE) # Sum over all ages to get a total decum_resid over all years for each age. - mig <- stats::setNames(rowSums(mat_resid, na.rm = TRUE), mat_resid$age) - mig -} + mig <- stats::setNames(mig_vec, mat_resid$age) + # Child adjustment + mig <- + switch( + child_adjust, + "none" = mig, + "cwr" = mig_beta_cwr(mig, c1, c2, date1, date2, n_cohs = childage_max, cwr_factor = cwr_factor), + "constant" = mig_beta_constant_child(mig, c1, c2, ageMax = childage_max) + ) + # Old age adjustment + mig_oldage <- + switch( + oldage_adjust, + "none" = mig, + "beers" = graduate_beers(mig, as.integer(names(mig)), AgeInt = 1), + "mav" = mav(mig, names(mig), tails = TRUE) + ) + # Only apply the old age adjustment on ages above oldage_min + ages_oldages <- as.integer(names(mig_oldage)) + mig[ages_oldages >= oldage_min] <- mig_oldage[ages_oldages >= oldage_min] + + mig +} -mig_beta_cwr <- function(mig, - c1_females, - c2_females, - date1, - date2, - maternal_window = 30, - maternal_min = 15){ +mig_beta_cwr <- function(mig, + c1_females, + c2_females, + date1, + date2, + maternal_window = 30, + maternal_min = 15, + n_cohs = NULL, + cwr_factor = 0.3) { + age <- names2age(mig) - + # conservative guess at how many child ages to cover: - n_cohs <- as.integer(ceiling(date2) - floor(date1)) - + if (is.null(n_cohs)) n_cohs <- as.integer(ceiling(date2) - floor(date1)) + mig_out <- mig - for (i in 1:n_cohs){ + for (i in 1:n_cohs) { # index maternal ages - a_min <- i + maternal_min - a_max <- min(i + maternal_min + maternal_window, 49) - mat_ind <- a_min:a_max - cwr_i <- (c1_females[i] / sum(c1_females[mat_ind]) + c2_females[i] / sum(c2_females[mat_ind])) / 2 + a_min <- i + maternal_min + a_max <- min(i + maternal_min + maternal_window, 49) + mat_ind <- a_min:a_max + cwr_i <- (c1_females[i] / sum(c1_females[mat_ind]) + c2_females[i] / sum(c2_females[mat_ind])) / 2 # proportional to maternal neg mig. - mig_out[i] <- cwr_i * sum(mig[mat_ind]) + mig_out[i] <- cwr_factor * cwr_i * sum(mig[mat_ind]) } - + mig_out } +# rough stb at constant child adjustment +mig_beta_constant_child <- function(mig, c1, c2, ageMax = 14) { + age <- names2age(mig) + + denom <- (c1 + c2) / 2 -# TR: prep for constant child. Need denom for rates though, -# but ideally without re-calculating an intermediate object -# that was already needed. Maybe be we can get exposures -# from RUP. Hmm. -# mig_beta_constant_child <- function(mig, -# c1, -# c2, -# date1, -# date2, -# maternal_window = 30, -# maternal_min = 15){ -# age <- names2age(mig) -# -# # conservative guess at how many child ages to cover: -# n_cohs <- as.integer(ceiling(date2) - floor(date1)) -# -# mig_out <- mig -# -# -# -# mig_out -# } -# + ind <- age <= ageMax + mig_rate_const <- sum(mig[ind]) / sum(denom[ind]) + mig[ind] <- denom[ind] * mig_rate_const + + mig +} diff --git a/R/mig_rc.R b/R/mig_rc.R index 8ec34743c..28ee1c223 100644 --- a/R/mig_rc.R +++ b/R/mig_rc.R @@ -14,7 +14,7 @@ #' @export #' @details In the full 13 parameter model, the migration rate at age x, \eqn{m(x)} is defined as -#' \deqn{m(x) = a1*exp(-1*alpha1*x) + a2*exp(-1*alpha2*(x - mu2) - exp(-1*lambda2*(x - mu2))) + a3*exp(-1*alpha3*(x - 3) - exp(-1*lambda3*(x - mu3))) + a4*exp(lambda4*x) + c} +#' \deqn{m(x) = a1*exp(-1*alpha1*x) + a2*exp(-1*alpha2*(x - mu2) - exp(-1*lambda2*(x - mu2))) + a3*exp(-1*alpha3*(x - mu3) - exp(-1*lambda3*(x - mu3))) + a4*exp(lambda4*x) + c} #' #' The first, second, third and fourth pieces of the equation represent pre-working age, working age, retirement and post-retirement age patterns, respectively. #' Models with less parameters gradually remove terms at the older ages. Parameters in each family are: diff --git a/R/mig_resid.R b/R/mig_resid.R index 17984aed1..9f387864b 100644 --- a/R/mig_resid.R +++ b/R/mig_resid.R @@ -371,7 +371,7 @@ mig_resid_stock <- function(pop_m_mat, is.numeric(ages_asfr) ) -# <<<<<<< HEAD + # # Check in dimensions are ok - still working on this # if(ncol(asfr_mat) == ncol(pop_f_mat) -1 & nrow(sr_f_mat) == nrow(pop_f_mat) -1){ # print("matrix dimensions are correct") @@ -389,22 +389,23 @@ mig_resid_stock <- function(pop_m_mat, # asfr_mat # sr_f_mat # } -# ======= -# >>>>>>> 362ae9857574b05c519c8de40548461d6b9070dd + # Migration net of only survivors - net_mig_m <- migresid_net_surv(pop_m_mat, sr_m_mat) - net_mig_f <- migresid_net_surv(pop_f_mat, sr_f_mat) + net_mig_m <- migresid_net_surv(pop_mat = pop_m_mat, + sr_mat = sr_m_mat) + net_mig_f <- migresid_net_surv(pop_mat = pop_f_mat, + sr_mat = sr_f_mat) # fertility_index <- which(ages %in% ages_asfr) # Returns all births for all years age_interval <- unique(diff(ages)) all_births <- migresid_births( - pop_f_mat, - asfr_mat, + pop_f_mat = pop_f_mat, + asfr_mat = asfr_mat, # fertility_index, - age_interval + age_interval = age_interval ) # With all_births already calculated, separate between @@ -414,17 +415,17 @@ mig_resid_stock <- function(pop_m_mat, births_f <- all_births * (1 / (1 + srb_vec[byrs])) net_mig_m <- migresid_net_surv_first_ageg( - net_mig_m, - pop_m_mat, - births_m, - sr_m_mat + net_mig = net_mig_m, + pop_mat = pop_m_mat, + births = births_m, + sr_mat = sr_m_mat ) net_mig_f <- migresid_net_surv_first_ageg( - net_mig_f, - pop_f_mat, - births_f, - sr_f_mat + net_mig = net_mig_f, + pop_mat = pop_f_mat, + births = births_f, + sr_mat = sr_f_mat ) # First year is empty, so we exclude @@ -490,12 +491,12 @@ mig_resid_cohort <- function(pop_m_mat, # Adjust last age group in the bounds mig_bounds <- migresid_bounds_last_ageg( - net_mig_m, - net_mig_f, - mig_upper_m, - mig_lower_m, - mig_upper_f, - mig_lower_f + net_mig_m = net_mig_m, + net_mig_f = net_mig_f, + mig_upper_m = mig_upper_m, + mig_lower_m = mig_lower_m, + mig_upper_f = mig_upper_f, + mig_lower_f = mig_lower_f ) mig_upper_m <- mig_bounds$mig_upper_m @@ -540,13 +541,6 @@ mig_resid_time <- function(pop_m_mat, asfr_mat <- args_list$asfr_mat srb_vec <- args_list$srb_vec - # TR: add chunk (maybe a new function?) that - # checks dimensions; names dimensions if necessary (and warns if so) - # and trims dimensions if necessary (warning user if needed). - # warning does mean warning() it just means cat("\nwatch out!\n") - # Not important, but theese could be silenced using a new 'verbose' arg - - # Estimate stock method mig_res <- mig_resid_stock( @@ -604,7 +598,9 @@ migresid_net_surv <- function(pop_mat, sr_mat) { # is treated by migresid_net_surv_first_ageg. res[nrow(res), ] <- NA res <- rbind(matrix(NA, nrow = 1, ncol = ncol(res)), res) - res <- migresid_net_surv_last_ageg(res, pop_mat, sr_mat) + res <- migresid_net_surv_last_ageg(net_mig = res, + pop_mat = pop_mat, + sr_mat = sr_mat) rownames(res) <- rownames(pop_mat) colnames(res) <- colnames(pop_mat)[-p] res diff --git a/R/mig_un_fam.R b/R/mig_un_fam.R index a5f516c19..81a2c8e8b 100644 --- a/R/mig_un_fam.R +++ b/R/mig_un_fam.R @@ -1,9 +1,9 @@ #' Net migration by age for an UN family #' @description Given a total net migration, #' calculate the net migration age schedule based on the Rogers and Castro formula for UN families. -#' @param NM numeric. Total net migration to distribuite between ages and sex. +#' @param NM numeric. Total net migration to distribute between ages and sex. #' @param family character. Could be "Family", "Female Labor", "Male Labor". -#' @param Single logical. Results by simple age. Default `FALSE`. +#' @param Single logical. Results by simple age. Default `TRUE`. #' Typically from pre-working age and working age parts of in Roger-Castro formula. #' @param OAnew The age from which to group all ages into an open ended age group. #' By default it is set to 100, so it groups all ages up to 120, which is the diff --git a/R/nAx.R b/R/nAx.R index d7c0e5f7d..729fcd1d2 100644 --- a/R/nAx.R +++ b/R/nAx.R @@ -788,9 +788,14 @@ lt_a_un <- function(nMx, Sex = Sex, region = region, mod = mod, - # no need to redo extrap in here + # we need to redo extrap in here because otherwise extrapFit + # might be length < 2 and raise problems with MortalityLaws + extrapLaw = extrapLaw, + extrapFrom = extrapFrom, + extrapFit = extrapFit, ... ) + qxnew <- lt_id_ma_q( nMx = mxi, diff --git a/R/smooth_age_5.R b/R/smooth_age_5.R index 45c8a1392..bc73a2246 100644 --- a/R/smooth_age_5.R +++ b/R/smooth_age_5.R @@ -63,11 +63,11 @@ smooth_age_5_cf <- function(Value, out <- Value5 * NA # cut back down (depending) and name interleaf <- c(rbind(vodds, vevens)) - + if (start_on == 5){ interleaf <- interleaf[-1] } - + n <- min(c(length(interleaf), N)) out[1:n] <- interleaf[1:n] @@ -115,7 +115,7 @@ smooth_age_5_kkn <- function(Value, # would need to move this up to ensure? # or in case of 85+ would we want to keep 80-84, 85+ as-is? - Value10 <- groupAges(Value5, Age = Age, N = 10, shiftdown = start_on) + Value10 <- groupAges(Value5, Age = Age5, N = 10, shiftdown = start_on) # what OAG is a strange digit? Then take OAG after grouping. if (OAG) { @@ -133,21 +133,21 @@ smooth_age_5_kkn <- function(Value, vodds <- Value10 / 2 + (v10R - v10L) / 16 # constrained in 10-year age groups vevens <- Value10 - vodds - + # if (start_on == 5){ # # this is the KNN operation # vevens <- Value10 / 2 + (v10R - v10L) / 16 # # constrained in 10-year age groups # vodds <- Value10 - vevens # } - # + # # stagger odd even 5s interleaf <- c(rbind(vodds, vevens)) - + if (start_on == 5){ interleaf <- interleaf[-1] } - + # produce results vector out <- Value5 * NA n <- min(c(length(interleaf), N)) @@ -442,14 +442,24 @@ smooth_age_5_zigzag <- function(Value, } #' Smooth in 5-year age groups using a moving average +#' #' @description Smooth data in 5-year age groups. #' @details This function calls \code{smooth_age_5_zigzag_inner()}, but prepares data in a way consistent with other methods called by \code{smooth_age_5()}. It is probably preferable to call \code{zigzag()} from the top level, or else call this method from \code{agesmth()} for more control over tail imputations. +#' #' @param Value numeric vector of (presumably) counts in 5-year age groups. #' @param Age integer vector of age group lower bounds. #' @param OAG logical. Whether or not the top age group is open. Default \code{TRUE}. #' @param n integer. The width of the moving average. Default 3 intervals (x-5 to x+9). +#' @param tails logical. If tails is \code{FALSE}, both tails are left untouched. +#' Otherwise, the tails are filled out using a cascade method. +#' #' @return numeric vector of smoothed counts in 5-year age groups. -#' @details This function calls \code{mav()}, which itself relies on the more general \code{ma()}. We lose the lowest and highest ages with this method, unless \code{n=1}, in which case data is returned in the original 5-year age groups. The total population count is not constrained to sum to the orignal total. +#' +#' @details If tails is set to \code{FALSE}, this function calls \code{mav()}, which itself relies on the more general \code{ma()}. We lose the lowest and highest ages with this method, unless \code{n=1}, in which case data is returned in the original 5-year age groups. The total population count is not constrained to sum to the original total. +#' +#' If tails is \code{TRUE}, the same results are expected but the tails are +#' filled in using a cascading method. +#' #' @examples #' Age <- c(0,1,seq(5,90,by=5)) #' # defaults @@ -466,11 +476,11 @@ smooth_age_5_zigzag <- function(Value, #' legend("topright", col = cols, lty = 1, lwd = lwds, legend = paste("n =",1:5)) #' } #' @export - smooth_age_5_mav <- function(Value, Age, OAG = TRUE, - n = 3) { + n = 3, + tails = FALSE) { Value <- groupAges(Value, Age = Age, N = 5) Age <- as.integer(names(Value)) @@ -479,7 +489,8 @@ smooth_age_5_mav <- function(Value, Value = Value, Age = Age, OAG = OAG, - n = n + n = n, + tails = tails ) Smoothed @@ -615,10 +626,10 @@ smooth_age_5_feeney <- function(Value, #' Smooth populations in 5-year age groups using various methods #' #' @description Smooth population counts in 5-year age groups using the Carrier-Farrag, -#' Karup-King-Newton, Arriaga, United Nations, Stong, or Zigzag methods. Allows for imputation +#' Karup-King-Newton, Arriaga, United Nations, Strong, MAV or Zigzag methods. Allows for imputation #' of values in the youngest and oldest age groups for the Carrier-Farrag, Karup-King-Newton, #' and United Nations methods. - +#' #' @details The Carrier-Farrag, Karup-King-Newton (KKN), and Arriaga methods do not modify the totals #' in each 10-year age group, whereas the United Nations, Strong, Zigzag, and moving average (MAV) methods do. The age intervals #' of input data could be any integer structure (single, abridged, 5-year, other), but @@ -627,8 +638,8 @@ smooth_age_5_feeney <- function(Value, #' #' The Carrier-Farrag, Karup-King-Newton, and United Nations methods do not produce estimates #' for the first and final 10-year age groups. By default, these are imputed with the original 5-year age group totals, but -#' you can also specify to impute with \code{NA}, or the results of the Arriaga or -#' Strong methods. If the terminal digit of the open age group is 5, then the terminal 10-year +#' you can also specify to impute with \code{NA}, or the results of the Arriaga, +#' Strong and Cascade methods. If the terminal digit of the open age group is 5, then the terminal 10-year #' age group shifts down, so imputations may affect more ages in this case. Imputation can follow #' different methods for young and old ages. #' @@ -640,7 +651,7 @@ smooth_age_5_feeney <- function(Value, #' #' @param Value numeric vector of counts in single, abridged, or 5-year age groups. #' @param Age integer vector of ages corresponding to the lower integer bound of the counts. -#' @param method character string. Options include \code{"Carrier-Farrag"},\code{"Arriaga"},\code{"KKN"},\code{"United Nations"}, \code{"Strong"}, and \code{"Zigzag"}. See details. Default \code{"Carrier-Farrag"}. +#' @param method character string. Options include \code{"Carrier-Farrag"},\code{"Arriaga"},\code{"KKN"},\code{"United Nations"}, \code{"Strong"}, \code{MAV} and \code{"Zigzag"}. See details. Default \code{"Carrier-Farrag"}. #' @param OAG logical. Whether or not the top age group is open. Default \code{TRUE}. #' @param ageMin integer. The lowest age included included in intermediate adjustment. Default 10. Only relevant for Strong method. #' @param ageMax integer. The highest age class included in intermediate adjustment. Default 65. Only relevant for Strong method. @@ -756,7 +767,7 @@ smooth_age_5 <- function(Value, ageMin = 10, ageMax = 65, n = 3, - young.tail = c("Original", "Arriaga", "Strong", NA), + young.tail = c("Original", "Arriaga", "Strong", "Cascade", NA), old.tail = young.tail) { method <- match.arg(method, c("Carrier-Farrag", @@ -766,8 +777,10 @@ smooth_age_5 <- function(Value, "Strong", "Zigzag", "MAV")) - young.tail <- match.arg(young.tail, c("Original", "Arriaga", "Strong", NA)) - old.tail <- match.arg(old.tail, c("Original", "Arriaga", "Strong", NA)) + + tail_methods <- c("Original", "Arriaga", "Strong", "Cascade", NA) + young.tail <- match.arg(young.tail, tail_methods) + old.tail <- match.arg(old.tail, tail_methods) method <- simplify.text(method) young.tail <- simplify.text(young.tail) @@ -858,6 +871,8 @@ smooth_age_5 <- function(Value, original <- groupAges(Value, Age = Age, N = 5) arriaga <- smooth_age_5_arriaga(Value, Age = Age, OAG = OAG) strong <- smooth_age_5_strong(Value, Age = Age, OAG = OAG) + mav_tails <- smooth_age_5_mav(Value, Age = Age, OAG = OAG, tails = TRUE) + # are the final entries NAs? if (nrle$values[length(nrle$values)] == 1 & !is.na(old.tail)) { nrle$values[1] <- 0 @@ -877,7 +892,10 @@ smooth_age_5 <- function(Value, stopifnot(length(strong) == length(out)) out[old.ind] <- strong[old.ind] } - + if (old.tail == "cascade") { + stopifnot(length(mav_tails) == length(out)) + out[old.ind] <- mav_tails[old.ind] + } } nrle <- rle(as.integer(nas)) # take care of young tail @@ -900,6 +918,11 @@ smooth_age_5 <- function(Value, stopifnot(length(strong) == length(out)) out[young.ind] <- strong[young.ind] } + if (young.tail == "cascade") { + stopifnot(length(mav_tails) == length(out)) + out[young.ind] <- mav_tails[young.ind] + } + } } # end tail operations diff --git a/R/utils.R b/R/utils.R index 655601b1b..a47457e2d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -196,7 +196,9 @@ getModelLifeTable <- function(ModelName, Sex) { avg_adj <- function(x) { n <- length(x) - (shift.vector(x,-1, NA) + shift.vector(x, 1, NA)) / 2 + out <- (shift.vector(x,-1, NA) + shift.vector(x, 1, NA)) / 2 + names(out) <- names(x) + out } #' convert strings to concatenation of lower case alphabet diff --git a/R/utilsAge.R b/R/utilsAge.R index 57da8c721..1c6082b01 100644 --- a/R/utilsAge.R +++ b/R/utilsAge.R @@ -58,7 +58,7 @@ AGEN <- #' @param shiftdown integer. Move the grouping down by one or more single ages. Optional argument. #' #' @details If you shift the groupings, then the first age groups may have a negative lower bound -#' (for example of -5). These counts would be discarded for the osculatory version of Sprague smoothing, +#' (for example of -5). These counts would be discarded for the oscillatory version of Sprague smoothing, #' for example, but they are preserved in this function. The important thing to know is that if you shift #' the groups, the first and last groups won't be N years wide. For example if \code{shiftdown} is 1, #' the first age group is 4-ages wide. @@ -272,7 +272,7 @@ age2int <- function(Age, OAG = TRUE, OAvalue = NA) { #' @return Vector of counts in N-year age groups. #' #' @details If you shift the groupings, then the first age groups may have a negative lower bound -#' (for example of -5). These counts would be discarded for the osculatory version of Sprague smoothing, +#' (for example of -5). These counts would be discarded for the oscillatory version of Sprague smoothing, #' for example, but they are preserved in this function. The important thing to know is that if you shift #' the groups, the first and last groups will not be N years wide. For example if \code{shiftdown} is 1, the first age group is 4-ages wide. The ages themselves are not returned, #' but they are the name attribute of the output count vector. Note this will also correctly group abridged ages @@ -324,6 +324,7 @@ groupOAG <- function(Value, Age, OAnew) { N <- length(Value[Age <= OAnew]) Value[N] <- sum(Value[Age >= OAnew]) Value <- Value[1:N] + names(Value) <- Age[1:N] Value } @@ -331,7 +332,7 @@ groupOAG <- function(Value, Age, OAnew) { #' check for coherence within Age and between Age and AgeInt #' #' @description A few checks are carried out to test if \code{Age} is internally consistent, that \code{OAG} is consistent with \code{AgeInt}, and that \code{Age} and \code{AgeInt} are consistent with one another. For \code{Age} to be internally consistent, we cannot have redundant values, and values must be sequential. -#' @details If \code{OAG} is \code{TRUE} then \code{AgeInt} must be coded as \code{NA}. If \code{Age} is not sorted then we sort both \code{Age} and \code{AgeInt}, assuming that they are in matched order. This isn't incoherence per se, but a message is returned to the console. +#' @details If \code{OAG} is \code{TRUE} then \code{AgeInt} must be coded as \code{NA}. If \code{Age} is not sorted then we sort both \code{Age} and \code{AgeInt}, assuming that they are in matched order. This isn't incoherence in itself, but a message is returned to the console. #' #' @param Age integer vector of single ages (lower bound) #' @param AgeInt integer vector. Age interval widths @@ -724,7 +725,7 @@ rescaleAgeGroups <- function(Value1, } #' force a (count) vector to abridged ages -#' @description This is a robustness utility, in place to avoid annoying hang-ups in \code{LTAbr()}. If data are given in non-standard ages, they are forced to standard abrdiged ages on the fly. Really this should happen prior to calling \code{lt_abridged()} +#' @description This is a robustness utility, in place to avoid annoying hang-ups in \code{LTAbr()}. If data are given in non-standard ages, they are forced to standard abridged ages on the fly. Really this should happen prior to calling \code{lt_abridged()} #' @details This should be able to group up and group down as needed. \code{graduate_mono()} is used below the hood. \code{pclm()} or \code{graduate_uniform()} out to be flexible enough to do the same. #' @inheritParams graduate_uniform #' @seealso graduate_mono_closeout, lt_abridged @@ -734,14 +735,16 @@ rescaleAgeGroups <- function(Value1, #' Age <- c(0,1,3,seq(5,100,5)) #' AgeInt <- c(1,2,2,rep(5,19),1) #' Value <- tapply(V1,rep(Age,times=AgeInt), sum) -#' +#' #' is_abridged(Age) -#' age_abridge_force(Value, AgeInt, Age) -age_abridge_force <- function(Value, AgeInt, Age) { +#' age_abridge_force(Value, Age) +age_abridge_force <- function(Value, Age) { + v1 <- graduate_mono_closeout( Value, Age = Age) - a1 <- min(Age):(length(v1) - 1) + #a1 <- min(Age):(length(v1) - 1) + a1 <- (1:length(v1) - 1 + min(Age)) |> as.integer() AgeAbr <- calcAgeAbr(a1) vabr <- tapply(v1, AgeAbr, sum) vabr diff --git a/R/utils_downloads.R b/R/utils_downloads.R index af435bda2..2627408fc 100644 --- a/R/utils_downloads.R +++ b/R/utils_downloads.R @@ -3,7 +3,7 @@ # and potentially others. #' Extract Lx estimates from WPP2019. Mainly an util function for other ones. -#' @description We extract `Lx` from `wpp2019`, interpolated to exact dates. Different methods availables. +#' @description We extract `Lx` from `wpp2019`, interpolated to exact dates. Different methods available. #' A vector of countries can handle, but with an unique sex. Row names are not indicative of countries. #' @param nLx numeric. either `NULL` or a numeric vector of lifetable exposure. If it's the second then we just pass it back. #' @param location vector. UN Pop Div `LocName` or `LocID` @@ -11,7 +11,7 @@ #' @param nLxDatesIn numeric. Vector of three decimal dates produced by (or passed through) `basepop_five()` #' @param method character. Could be `"linear"`, `"exponential"`, or `"power"` #' -#' @return numeric matrix of `nLx` with `length(nLxDatesIn)` and abrdiged ages in rows. +#' @return numeric matrix of `nLx` with `length(nLxDatesIn)` and abridged ages in rows. #' @export #' @importFrom stats setNames #' @importFrom stats reshape @@ -58,7 +58,7 @@ downloadnLx <- function(nLx, location, gender, nLxDatesIn, method="linear") { } if (!any(fertestr::is_LocID(location))) { location_code <- fertestr::get_location_code(location) - }else { + } else { location_code <- as.integer(location) } @@ -121,7 +121,7 @@ downloadnLx <- function(nLx, location, gender, nLxDatesIn, method="linear") { } #' Extract ASFR estimates from WPP2019. Mainly an util function for other ones. -#' @description We extract `ASFRx` from `wpp2019`, interpolated to exact dates. Different methods availables. +#' @description We extract `ASFRx` from `wpp2019`, interpolated to exact dates. Different methods available. #' A vector of countries can handle, but with an unique sex. Row names are not indicative of countries. #' @param Asfrmat numeric. #' @param location vector. UN Pop Div `LocName` or `LocID` @@ -262,7 +262,7 @@ downloadSRB <- function(SRB, location, DatesOut, verbose = TRUE){ fetch_wpp_births <- function(births, yrs_births, location, sex, verbose) { # fetch WPP births if not provided by user - if (is.null(births)) { + if (is.null(births) | length(births) == 0) { # load WPP births requireNamespace("DemoToolsData", quietly = TRUE) @@ -316,7 +316,8 @@ interp_coh_download_mortality <- function(location, sex, date1, date2, OAnew = 1 }) %>% lapply(lt_a2s_chunk, OAnew = OAnew) %>% lapply(function(X){ - 1 - X$nqx + #1 - X$nqx + lt_id_Ll_S(X$nLx, X$lx, X$Age, X$AgeInt, N = 1) }) %>% do.call("cbind",.) diff --git a/README.md b/README.md index 2e0ec7c56..2c972ce7d 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,19 @@ [![R build status](https://github.com/timriffe/DemoTools/workflows/R-CMD-check/badge.svg)](https://github.com/timriffe/DemoTools/actions) [![codecov](https://codecov.io/gh/timriffe/DemoTools/branch/master/graph/badge.svg)](https://codecov.io/gh/timriffe/DemoTools) -[![](https://img.shields.io/badge/devel%20version-01.13.39-yellow.svg)](https://github.com/timriffe/DemoTools) +[![](https://img.shields.io/badge/devel%20version-01.13.79-yellow.svg)](https://github.com/timriffe/DemoTools) [![issues](https://img.shields.io/github/issues-raw/timriffe/DemoTools.svg)](https://github.com/timriffe/DemoTools/issues) [![lifecycle](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) # Tools for aggregate demographic analysis -Date: 2021-03-23 +Date: 2023-12-20 `DemoTools` is an R package that contains simple functions often used in demographic analysis. It is in active development. This project is commissioned by the [UN Population Division](http://www.un.org/en/development/desa/population/) and financed by the [Bill and Melinda Gates Foundation](https://www.gatesfoundation.org/) as part of the [Making Family Planning Count](http://www.un.org/en/development/desa/population/projects/making-family-planning-count/index.shtml) project. Work is also done in collaboration with Sean Fennell, [José Manuel Aburto](https://github.com/jmaburto), [Ilya Kashnitsky](https://ikashnitsky.github.io/), [Marius Pascariu](https://github.com/mpascariu), [Jorge Cimentada](https://github.com/cimentadaj), [Monica Alexander](https://www.monicaalexander.com/), and with minor contributions from [several more](https://github.com/timriffe/DemoTools/graphs/contributors) (thank you!). This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 IGO ([CC BY-SA 3.0 IGO](https://creativecommons.org/licenses/by-sa/3.0/igo/)). -The idea behind `DemoTools` is to provide a common set of functions that can be easily used by analysts and scientists working on demographic analysis and modelling. +The idea behind `DemoTools` is to provide a common set of functions that can be easily used by analysts and scientists working on demographic analysis and modeling. If you detect a bug or have a suggestion please notify us using the [Issues](https://github.com/timriffe/DemoTools/issues) tab on github. Even better if you fix it and make a pull request! See [CONTRIBUTING.md](https://github.com/timriffe/DemoTools/blob/master/CONTRIBUTING.md) for more tips on reporting bugs or offering patches. @@ -29,12 +29,11 @@ If you are getting started with `DemoTools` we recommend taking a look at the tu You can load the ```DemoTools``` package in R like so: ```r -# install.packages("devtools") +# install.packages("remotes") -library(devtools) # requires the development version of rstan, sorry! install.packages("rstan", repos = c("https://mc-stan.org/r-packages/", getOption("repos"))) -install_github("timriffe/DemoTools") +remotes::install_github("timriffe/DemoTools") ``` ## Citation diff --git a/_pkgdown.yml b/_pkgdown.yml index 0c43c45ef..b110f0e37 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -10,8 +10,8 @@ home: title: DemoTools description: Do you love demography and data? If so, you might enjoy using this package. links: - - text: UN Deparment of Economic and Social Affairs Population - href: https://www.un.org/en/development/desa/population/index.asp + - text: United Nations Population Division + href: https://www.un.org/development/desa/pd/ - text: Browse source code href: https://github.com/timriffe/DemoTools/ @@ -47,6 +47,12 @@ reference: desc: Functions to construct a lifetable contents: - '`lt_abridged`' + - '`lt_single_mx`' + - '`lt_single_qx`' + - '`lt_single2abridged`' + - '`lt_abridged2single`' + - '`lt_ambiguous`' + - '`lt_smooth_ambiguous`' - title: "Interpolation" desc: Functions to interpolate counts contents: @@ -54,6 +60,7 @@ reference: - '`interp_coh`' - title: "Extrapolation" desc: Functions to interpolate/extrapolate rates or counts + contents: - '`interp_lc_lim`' - '`lt_rule_m_extrapolate`' - '`OPAG`' diff --git a/data/asfr_mat_five.rda b/data/asfr_mat_five.rda index 78b67259b..3aa59a764 100644 Binary files a/data/asfr_mat_five.rda and b/data/asfr_mat_five.rda differ diff --git a/data/pop_f_mat_five.rda b/data/pop_f_mat_five.rda index aea76202a..115355c24 100644 Binary files a/data/pop_f_mat_five.rda and b/data/pop_f_mat_five.rda differ diff --git a/dev/.gitignore b/dev/.gitignore index 2d6e1e81a..a21fc36c9 100644 --- a/dev/.gitignore +++ b/dev/.gitignore @@ -4,3 +4,5 @@ transitivitytests.R scratch.R junk.R +testLT.R +lx2.RData diff --git a/dev/FunctionInventory.csv b/dev/FunctionInventory.csv new file mode 100644 index 000000000..72f6628a5 --- /dev/null +++ b/dev/FunctionInventory.csv @@ -0,0 +1,171 @@ +Script,Function +LIFIT.R,ADM +utilsAge.R,age_abridge_force +utilsAge.R,age2ageN +utilsAge.R,age2int +utilsAge.R,AGEN +AGESEX.R,ageRatioScore +AGESEX.R,ageSexAccuracy +AGESEX.R,ageSexAccuracyDasGupta +OGIVE.R,agesmth1 +basepop.R,ArgsCheck +utils.R,avg_adj +basepop.R,basepop_five +GRPOPYB.R,birthCohorts +utilsAge.R,calcAgeAbr +utilsAge.R,calcAgeN +interp_coh.R,check_args +check_heaping.R,check_heaping_bachi +check_heaping.R,check_heaping_coale_li +check_heaping.R,check_heaping_jdanov +check_heaping.R,check_heaping_kannisto +check_heaping.R,check_heaping_myers +check_heaping.R,check_heaping_noumbissi +check_heaping.R,check_heaping_roughness +check_heaping.R,check_heaping_sawtooth +check_heaping.R,check_heaping_spoorenberg +check_heaping.R,check_heaping_whipple +utils.R,dec.date +utils_downloads.R,downloadAsfr +utils_downloads.R,downloadnLx +utils_downloads.R,downloadSRB +utils_downloads.R,fetch_wpp_births +lt_model_lq.R,find_my_case +utils_downloads.R,get_LocID +utils_downloads.R,get_LocName +utils.R,getModelLifeTable +graduate.R,graduate +graduate.R,graduate_beers +graduate.R,graduate_beers_expand +graduate.R,graduate_beers_johnson +graduate.R,graduate_grabill +graduate.R,graduate_grabill_expand +graduate.R,graduate_mono +graduate.R,graduate_mono_closeout +graduate.R,graduate_pclm +graduate.R,graduate_sprague +graduate.R,graduate_sprague_expand +graduate.R,graduate_uniform +utilsAge.R,groupAges +utilsAge.R,groupOAG +check_heaping.R,heapify +IRDID.R,ID +utilsAge.R,inferAgeIntAbr +utilsAge.R,int2age +utilsAge.R,int2ageN +AGEINT.R,interp +interp_coh.R,interp_coh +utils_downloads.R,interp_coh_download_mortality +interp_coh.R,interp_coh_lxMat_pxt +interp_lc_lim.R,interp_lc_lim +interp_lc_lim.R,interp_lc_lim_abk_m +interp_lc_lim.R,interp_lc_lim_estimate +interp_lc_lim_group.R,interp_lc_lim_group +interp_lc_lim.R,interp_lc_lim_kt_min +AGEINT.R,interpolatePop +IRDID.R,IRD +utilsAge.R,is_abridged +utilsAge.R,is_age_coherent +utilsAge.R,is_age_redundant +utilsAge.R,is_age_sequential +utils_downloads.R,is_Loc_available +utilsAge.R,is_single +utils_downloads.R,loc_message +OGIVE.R,loess_smth1 +nAx.R,lt_a_closeout +nAx.R,lt_a_pas +nAx.R,lt_a_un +interp_coh.R,lt_a2s_chunk +lt_abridged.R,lt_abridged +lt_regroup_age.R,lt_abridged2single +lt_regroup_age.R,lt_ambiguous +lt_id.R,lt_id_d_l +lt_id.R,lt_id_d_q +lt_id.R,lt_id_l_d +lt_id.R,lt_id_l_q +lt_id.R,lt_id_L_T +lt_id.R,lt_id_lda_L +lt_id.R,lt_id_Ll_S +lt_id.R,lt_id_ma_q +nAx.R,lt_id_morq_a +nAx.R,lt_id_morq_a_greville +lt_id.R,lt_id_q_l +lt_id.R,lt_id_qa_m +lt_id.R,lt_id_qm_a +basepop.R,lt_infer_radix_from_1L0 +lt_model_lq.R,lt_model_lq +lt_rule.R,lt_rule_1a0 +lt_rule.R,lt_rule_1a0_ak +nAx.R,lt_rule_1a0_cd +nAx.R,lt_rule_4a1_cd +lt_rule.R,lt_rule_4m0_D0 +lt_rule.R,lt_rule_4m0_m0 +lt_rule.R,lt_rule_ak_m0_a0 +lt_rule.R,lt_rule_ak_q0_a0 +extra_mortality.R,lt_rule_m_extrapolate +lt_single.R,lt_single_mx +lt_single_qx.R,lt_single_qx +lt_regroup_age.R,lt_single2abridged +interp_lc_lim.R,lt_smooth_ambiguous +lt_model_lq.R,lthat.logquad +utils.R,ma +MAV.R,mav +MAV.R,mav_tails +utilsAge.R,maxA2abridged +mig_beta.R,mig_beta +mig_beta.R,mig_beta_constant_child +mig_beta.R,mig_beta_cwr +mig_rc.R,mig_calculate_rc +mig_rc.R,mig_estimate_rc +mig_resid.R,mig_resid +mig_resid.R,mig_resid_cohort +mig_resid.R,mig_resid_dim_checker +mig_resid.R,mig_resid_stock +mig_resid.R,mig_resid_time +mig_un_fam.R,mig_un_fam +mig_resid.R,migresid_births +mig_resid.R,migresid_bounds +mig_resid.R,migresid_bounds_last_ageg +mig_resid.R,migresid_net_surv +mig_resid.R,migresid_net_surv_first_ageg +mig_resid.R,migresid_net_surv_last_ageg +utilsAge.R,names2age +OPAG.R,OPAG +OPAG.R,OPAG_fit_stable_standard +OPAG.R,OPAG_nLx_warp_r +OPAG.R,OPAG_r_min +OPAG.R,OPAG_simple +OGIVE.R,poly_smth1 +utils.R,ratx +LIFIT.R,RDM +utils.R,rescale_vector +utilsAge.R,rescaleAgeGroups +interp_coh.R,reshape_pxt +utils.R,rlog +interp_coh.R,rup +AGESEX.R,sexRatioScore +interp_coh.R,shift_census_ages_to_cohorts +utils.R,shift.vector +utils.R,simplify.text +utils.R,single2abridged +smooth_age_5.R,smooth_age_5 +smooth_age_5.R,smooth_age_5_arriaga +smooth_age_5.R,smooth_age_5_cf +smooth_age_5.R,smooth_age_5_feeney +smooth_age_5.R,smooth_age_5_kkn +smooth_age_5.R,smooth_age_5_mav +smooth_age_5.R,smooth_age_5_strong +smooth_age_5.R,smooth_age_5_un +smooth_age_5.R,smooth_age_5_zigzag +ZIGZAG.R,smooth_age_5_zigzag_inner +ZIGZAG.R,smooth_age_5_zigzag_min +ZIGZAG.R,smooth_age_5_zigzag_p +SPENCER.R,spencer +splitOscillate.R,splitOscillate +CENSUR.R,surv10 +CENSUR.R,surv5 +CENSUR.R,survN +CENSUR.R,survRatioError +interp_coh.R,transform_datesout +interp_coh.R,transform_pxt +ZELNIK.R,zelnik diff --git a/dev/arriaga_tidy.R b/dev/arriaga_tidy.R new file mode 100644 index 000000000..0ae5e9263 --- /dev/null +++ b/dev/arriaga_tidy.R @@ -0,0 +1,164 @@ +Ages <- seq(0, 80, by = 5) +AMales <- smooth_age_5_arriaga(Value = pop5m_pasex, Age = Ages, OAG = TRUE) +# PAS spreadsheet result: +Atest <- c(662761, 495126, 345744, 287629, 285919, 261018, 237469, 203277, + 161733, 126960, 88586, 67496, 54587, 41257, 28790, 17189, 34729) +all(round(AMales) - Atest == 0, na.rm = TRUE) +plot(Ages, pop5m_pasex) +lines(as.integer(names(AMales)),AMales) +# Before: +smooth_age_5_arriaga <- function(Value, + Age, + OAG = TRUE) { + + # these values are not used, it's just for lengths, and to make sure we + # end on an even 10. Technically we could even provide data in 10-year + # age groups and it'd still not break. + Value1 <- graduate_uniform(Value = Value, Age = Age, OAG = OAG) + Value5 <- + groupAges(Value1, Age = as.integer(names(Value1)), N = 5) + N <- length(Value5) + Age5 <- as.integer(names(Value5)) + + # would need to move this up to ensure? + # or in case of 85+ would we want to keep 80-84, 85+ as-is? + Value10 <- groupAges(Value, Age = Age, N = 10) + + # what OAG is a strange digit? Then take OAG after grouping. + if (OAG) { + OAGvalue <- Value5[length(Value5)] + Value10[length(Value10)] <- NA + Value5[length(Value5)] <- NA + } + + # again get staggered vectors + Value10LL <- shift.vector(Value10,-2, fill = NA) + Value10L <- shift.vector(Value10,-1, fill = NA) + Value10R <- shift.vector(Value10, 1, fill = NA) + Value10RR <- shift.vector(Value10, 2, fill = NA) + + # alternating calc, with differences at tails + vevens <- (-Value10R + 11 * Value10 + 2 * Value10L) / 24 + # tails different + vevens[1] <- + (8 * Value10[1] + 5 * Value10L[1] - Value10LL[1]) / 24 + lastind <- which(is.na(vevens))[1] + vevens[lastind] <- + Value10[lastind] - (8 * Value10[lastind] + 5 * Value10R[lastind] - Value10RR[lastind]) / 24 + # odds are complement + vodds <- Value10 - vevens + + # prepare output + interleaf <- c(rbind(vodds, vevens)) + # produce results vector + out <- Value5 * NA + n <- min(c(length(interleaf), N)) + out[1:n] <- interleaf[1:n] + + # if OA ends in 5, then we can save penultimate value too + na.i <- is.na(out) + out[na.i] <- Value5[na.i] + if (OAG) { + out[N] <- OAGvalue + } + + out +} +library(tidyverse) +data <- tibble(Age = Ages, Pop = pop5m_pasex) + +age2single <- function(Age, OAG = TRUE, AgeInt = NULL){ + maxA <- ifelse(OAG, max(Age), max(Age) + AgeInt[which.max(Age)]) + min(Age):maxA +} + +library(collapse) +library(data.table) +library(tidyfast) +smooth_age_5_arriaga_tidy <- function(data, variable, OAG = TRUE){ + + if (OAG){ + OAvalue <- data |> + fsubset(Age == max(Age)) |> + pull(!!sym(variable)) + } + + data <- + data |> + rename(V = !!variable) + V <- data$V + A <- data$Age + V1 <- graduate_uniform(Value = V, + Age = Age, + OAG = OAG) + A1 <- age2single(A, OAG = OAG) + # force to single + # reframe( + # V = graduate_uniform(Value = V, + # Age = Age, + # OAG = OAG), + # Age = age2single(Age)) + data.frame(V = V1, Age = A1) |> + # data |> + # group to 5 (innocuous if 5-year data given) + fmutate(Age = Age - Age %% 5, + V = fifelse(Age == max(Age) & OAG, NA_real_, V)) |> + fgroup_by(Age) |> + fsummarize(V = sum(V)) |> + fungroup() |> + fmutate(Age10 = Age - Age %% 10) |> + fmutate(V10 = sum(V), .by = Age10) |> + fmutate(V10LL = flag(V10,-4), + V10L = flag(V10,-2), + V10R = flag(V10, 2), + V10RR = flag(V10,4), + vevens = dt_case_when(Age10 == min(Age10) ~ (8 * V10 + 5 * V10L - V10LL) / 24, + Age10 == (max(Age10) - 10) ~ V10 - (8 * V10 + 5 * V10R - V10RR) / 24, + TRUE ~ (-V10R + 11 * V10 + 2 * V10L) / 24), + vodds = V10 - vevens, + V5out = dt_case_when( + Age == max(Age)~ OAvalue, + Age %% 10 == 0 ~ vodds, + Age %% 5 == 0 ~ vevens, + TRUE ~ V), + V5out = fifelse(is.na(V5out), V, V5out)) |> + fselect(Age, Age10, V5out) |> + rename(!!variable := V5out) + + } +# ---------------- # +# test equivalency # +# ---------------- # +# for OAG divisible by 10 +smooth_age_5_arriaga_tidy(tibble(Age = Ages, + Pop = pop5m_pasex), + variable = "Pop", + OAG = TRUE) + mutate(V5orig = smooth_age_5_arriaga(pop5m_pasex, Ages, TRUE)) + + +# for OAG divisible by 5 +Age75 <- seq(0,75,by=5) +v75 <- pop5m_pasex +v75[16] <- sum(pop5m_pasex[16:17]) +v75 <- v75[-17] + +data <- tibble(Age = Age75, + Pop = v75) +smooth_age_5_arriaga_tidy(data, + variable = "Pop", + OAG = TRUE) |> + mutate(V5orig = smooth_age_5_arriaga(v75, Age75, TRUE)) + +# ----------------------- # +# Compare execution speed # +# ----------------------- # +library(rbenchmark) +benchmark(smooth_age_5_arriaga_tidy(data, + variable = "Pop", + OAG = TRUE)) # .522 | 0.402 if we remove reframe() +benchmark(smooth_age_5_arriaga(v75, Age75, TRUE)) # .054, 10 times faster... + +# Conclusion: +# Possibly data.table could do the same a tic faster than collapse/tidyfast, +# but only marginally do, whereas base-powered DemoTools arithmetic is 10x faster diff --git a/dev/build.R b/dev/build.R index 9545ed4bb..d6e841d2e 100644 --- a/dev/build.R +++ b/dev/build.R @@ -18,10 +18,12 @@ shhh <- function(expr){ library(devtools) library(TimUtils) +library(magrittr) +library(git2r) #install.packages("backports") #install.packages("roxygen2") #install_github("hadley/devtools") -#install_github("timriffe/TimUtils/TimUtils") +#install_github("timriffe/TimUtils") # do this whenever new functions are added to /R, or whenever roxygen is updated devtools::document() @@ -30,15 +32,28 @@ devtools::document() devtools::build_vignettes() # devtools::install_github("r-lib/pkgdown") +# usethis::proj_activate(here::here()) pkgdown::build_site() -versionIncrement( +TimUtils::versionIncrement( major = FALSE, # only for releases mid = FALSE, # major functionality added minor = TRUE, # whenever documentation renewed, any patch, tweak, or fix maxdigits = c(2,2,3),# maybe 4 required? README = TRUE) # update README dev version badge +# add line to immediately commit and tag. + +# D <- readLines("DESCRIPTION") +# vs <- D[grepl(D,pattern = "Version: ")] %>% gsub(pattern = "Version: ", replacement = "") %>% +# paste0("v",.) +# commit(message = vs,all=TRUE) +# tag(name =vs,message = vs) +# push(refspec = vs) + +# https://raw.githubusercontent.com/timriffe/DemoTools/59a0f4e50b7696c185a3c9d4e582426f88aac84f/DESCRIPTION + + # run this to get access to already-written functions shhh(load_all()) @@ -47,6 +62,9 @@ shhh(load_all()) Sys.setenv('_R_CHECK_SYSTEM_CLOCK_' = 0) devtools::check(force_suggests = TRUE) + +source("version_lookup.R") +update_lookup() #build(pkg = "/home/tim/git/DemoTools", path = "/home/tim/Desktop") #?devtools::build #devtools::use_testthat("/home/tim/git/DemoTools") diff --git a/dev/build_tests.R b/dev/build_tests.R index b7d9d1ed4..63d848227 100644 --- a/dev/build_tests.R +++ b/dev/build_tests.R @@ -55,4 +55,41 @@ str(d2) names(eliminable) <- names(d2) N - eliminable -unlist(d2) %>% unique() \ No newline at end of file +unlist(d2) %>% unique() + +# what file is each function in? + +A <- lsf.str("package:DemoTools") +devtools::load_all() +attr(attr(inferAgeIntAbr,"srcref"),"srcfile") + + +get_file <- function(filename_no_quotes){ + attr(attr(filename_no_quotes,"srcref"),"srcfile") +} +get_file(inferAgeIntAbr) +lapply(ls("package:DemoTools"),function(x) eval() %>% is.function) +is.function(DemoTools::`:=`) + +# list of functions by script + +scripts <- dir("R") +files <- list() + +for (i in scripts){ +test.env <- new.env() +sys.source(paste0("R/",i), envir = test.env) +A <- lsf.str(envir=test.env) +funs <- lapply(A,"[[",1) %>% unlist() +files[[i]] <- funs +rm(A) +rm(test.env) +} +library(tidyverse) + +lengths <- lapply(files, length) %>% unlist() +DF <- tibble(Script = rep(names(lengths), times = lengths), + Function = unlist(files)) +DF %>% + arrange(Function) %>% + write_csv("dev/FunctionInventory.csv") diff --git a/dev/ediev.R b/dev/ediev.R new file mode 100644 index 000000000..0c5d693fc --- /dev/null +++ b/dev/ediev.R @@ -0,0 +1,217 @@ +smooth_age_zigzag_dd <- function( + Value, #population counts by single year of age + Age, #age index for Value: should be a continuous range of integer values (we will not check that) + OAG = TRUE, #is the last age group an open interval? + ret_ggPlot = TRUE, #if TRUE, we return a list(data=ValueOut, ggplot=myPlot) with a plot for checking results + #if FALSE, we return the smoothed series ValueOut + legendPlot = NULL #legend for the ggPlot +) +{ + #Extension of Feeney (2013) "Removing Zig-Zag from Age Data" for single age data + #in the spirit of Dalkhat M. Ediev (2021) "A model of age heaping with applications to population graduation that retains informative demographic variation" + #Main differences with Feeney's algorithm is that, apart for working with single age data + #to suppose that age heaping can start 2 years before (and symmetrically 2 years after) + #and not only 1 year before and after. + #For example for age 40, we suppose that heaping occurs symmetrically, in the same proportion, at + #ages 39 and 41, as Feeney supposed for five-years age groups with adjacent age groups, + #but in the same spirit we suppose that + #heaping can occur also at ages 38 and 42 towards attracting age 40. + #We also suppose that heaping can only occurs for ages multiple of 5 and 10 + if (OAG) { + #if the last age is an open interval, we drop it + AgeRangeOut <- Age[1:(length(Age)-1)] + } + #determine the ages with the variable parameters + #For example we start with ages 3 and 4 for heaping at age 5 and ages 6 and 7 use symmetrical values + #generally speaking heaping can start at ages x-2, x-1 and then symmetrically at ages x+1, x+2 towards attracting age x + #we suppose that the first and the last age we can treat have at least values for 2 ages before and after + #that means that if the first age in Age is 9 or 10, we will skip the first + getAgeRange <- function(AgeRangeOut) { + ageRangeMin <- max(5, trunc((AgeRangeOut[1] + 2) / 5) * 5) + if (ageRangeMin < AgeRangeOut[1] + 2) ageRangeMin <- ageRangeMin + 5 + ageRangeMax <- trunc((AgeRangeOut[length(AgeRangeOut)] - 2) / 5) * 5 + if (ageRangeMax > AgeRangeOut[length(AgeRangeOut)] - 2) ageRangeMax <- ageRangeMax - 5 + return (c(ageRangeMin, ageRangeMax)) + } + ageRange <- getAgeRange (AgeRangeOut) + ageRangeMin <- ageRange[1] + ageRangeMax <- ageRange[2] + #number of ages multiples of 5 + nAges_mult <- trunc ((ageRangeMax - ageRangeMin) / 5) + 1 + if (nAges_mult < 2) stop ("Age range should include at least two consecutive ages multiple of 5") + #initial values for the parameters + #if first attracting age is 5, age 3 has initial proportion of 0.1, age 4 of 0.2 + #these two parameters will get 'optimized' by R optim + #the same parameters are applied, symmetrically, for age 6 (0.2) and age 7 (0.1) + param <- rep (c(0.1, 0.2), nAges_mult) + param_min <- rep(0, nAges_mult * 2) + param_max <- rep(9, nAges_mult * 2) + + computeNewPop <- function(data, par, Age, ageRangeMin, nAges_mult, ret_Props=FALSE) { + if (ret_Props) props <- rep(NA, length(data)) + for (ageIter in (1:nAges_mult)) { + currFirstAge <- ageRangeMin + (ageIter - 1) * 5 - 2 + indexInData <- currFirstAge - Age[1] + 1 + if (ret_Props) { + props[indexInData] <- par[(ageIter - 1) * 2 + 1] + props[indexInData+1] <- par[(ageIter - 1) * 2 + 2] + props[indexInData+3] <- par[(ageIter - 1) * 2 + 2] + props[indexInData+4] <- par[(ageIter - 1) * 2 + 1] + } else { + data[indexInData + 2] <- data[indexInData + 2] - + (data[indexInData] + data[indexInData + 4]) * par[(ageIter - 1) * 2 + 1] - + (data[indexInData + 1] + data[indexInData + 3]) * par[(ageIter - 1) * 2 + 2] + data[indexInData] <- data[indexInData] * (1 + par[(ageIter - 1) * 2 + 1]) + data[indexInData+1] <- data[indexInData+1] * (1 + par[(ageIter - 1) * 2 + 2]) + data[indexInData+3] <- data[indexInData+3] * (1 + par[(ageIter - 1) * 2 + 2]) + data[indexInData+4] <- data[indexInData+4] * (1 + par[(ageIter - 1) * 2 + 1]) + } + } + + if (ret_Props) { + return (props) + } else { + return (data) + } + } + + optim_fun <- function(data, par) { + popIter <- computeNewPop(data, par, Age, ageRangeMin, nAges_mult) + totPop <- sum (popIter) + objective_to_min <- 0 + for (ageIter in (1:nAges_mult)) { + currFirstAge <- ageRangeMin + (ageIter - 1) * 5 - 2 + indexInData <- currFirstAge - Age[1] + 1 + term1 <- ((popIter[indexInData + 2] - (popIter[indexInData + 1] + popIter[indexInData + 3]) / 2)) / totPop + term2 <- ((popIter[indexInData + 2] - (popIter[indexInData] + popIter[indexInData + 4]) / 2)) / totPop + objective_to_min <- objective_to_min + term1^2 + term2^2 + } + return (objective_to_min) + } + + oldOptimValue <- 10^1000 + + for (rep in (1:10)) { + #apply optimization up to 10 times, using parameters obtained in the previous step, just in case... + parResult <- optim(data=Value, par=param, lower=param_min, upper=param_max, fn=optim_fun, method = "L-BFGS-B") + if (parResult$value >= oldOptimValue) break + param <- parResult$par + oldOptimValue <- parResult$value + } + valueOut <- computeNewPop(Value, parResult$par, Age, ageRangeMin, nAges_mult) + + if (ret_ggPlot) { + library2 <- function(pack) { + #pack<-"tcltk2" + if( !(pack %in% installed.packages())) + {install.packages(pack)} + library(pack,character.only = TRUE) + } + library2("ggplot2") + library2("scales") + + getScales <- function(Value, ageRangeMin, ageRangeMax) { + maxCount <- max(Value) * 1.0 + power <- 0 + while (maxCount > 1) { + maxCount <- maxCount / 10.0 + power <- power + 1 + } + maxCount <- ceiling(maxCount * 10) * 10^(power-1) + leftScale <- seq(0, maxCount, maxCount / 4) + rightScale <- seq(0, 1, 0.25) + xScale <- seq(ageRangeMin-5, ageRangeMax+5, 5) + return (list(left=leftScale, right=rightScale, x=xScale)) + } + + df <- data.frame(Age=Age, val=Value) + df$type <- 'Original' + temp <- data.frame(Age=Age, val=valueOut) + temp$type <- 'Corrected' + df <- rbind(df, temp) + props <- computeNewPop(Value, parResult$par, Age, ageRangeMin, nAges_mult, ret_Prop = TRUE) + temp <- data.frame(Age=Age, val=props) + temp$type <- 'Attraction' + #transform into proportions + temp$val <- temp$val / (1 + temp$val) + scale_axes <- getScales (Value, ageRangeMin, ageRangeMax) + maxY_left <- scale_axes$left[length(scale_axes$left)] + #adjust values in preparation for second axis (we will correct the labels later on in scale_y_continuous) + temp$val <- temp$val * maxY_left + df <- rbind(df, temp) + df$type <- factor (df$type, levels=c("Original", "Corrected", "Attraction")) + myLabelNames <- c("Original", "Corrected", "Attraction factors") + + myPlot <- ggplot(df, aes(x=Age, y=val, color=type, alpha=type, size=type)) + geom_line() + myPlot <- myPlot + scale_y_continuous(breaks = scale_axes$left, name="population counts", + sec.axis = sec_axis(as.formula(paste("~./", maxY_left, sep="")), name="attraction proportions", + breaks=scale_axes$right), + labels = scales::label_number()) + myPlot <- myPlot + scale_x_continuous(breaks = scale_axes$x) + myPlot <- myPlot + scale_colour_manual(values=c("red", "blue", "grey40"), labels=myLabelNames) + myPlot <- myPlot + scale_size_manual(values=c(1, 2, 2), labels=myLabelNames) + myPlot <- myPlot + scale_alpha_manual(values=c(1, 1, 0.2), labels=myLabelNames) + myPlot <- myPlot + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank(), legend.position="bottom") + myPlot <- myPlot + theme_text(1) + myPlot <- myPlot + labs(color="", alpha="", size="") + if (!is.null(legendPlot)) myPlot <- myPlot + ggtitle(legendPlot) + + return (list(data=valueOut, ggplot=myPlot)) + } else { + return (valueOut) + } +} + +#data for Colombia 1973 +col1973 <- structure(list( + Age = 0:99, + Value = c(493538L, 497954L, 580000L, + 610204L, 609266L, 609670L, 599184L, 628352L, 624302L, 549158L, + 624112L, 527810L, 611266L, 541084L, 490342L, 466650L, 435310L, + 423912L, 444468L, 334732L, 345390L, 301686L, 319468L, 312100L, + 267828L, 293902L, 230690L, 230770L, 244100L, 178590L, 278284L, + 136552L, 201120L, 218514L, 159054L, 217800L, 156786L, 162270L, + 203560L, 141258L, 247758L, 94344L, 166104L, 164336L, 116024L, + 182858L, 104268L, 102916L, 136222L, 90550L, 180598L, 68132L, + 109788L, 108058L, 81438L, 116446L, 77230L, 62844L, 73906L, 49812L, + 136486L, 37506L, 61658L, 69702L, 43570L, 72760L, 36930L, 34102L, + 40052L, 24126L, 68924L, 16004L, 32988L, 40868L, 18406L, 31836L, + 14222L, 11824L, 16206L, 6972L, 24294L, 5016L, 7392L, 6906L, 6244L, + 9424L, 3806L, 3286L, 3478L, 1912L, 5260L, 916L, 1540L, 1142L, + 1266L, 1674L, 602L, 468L, 752L, 2360L)), + class = "data.frame", + row.names = c(NA,-100L)) + +#Alexandropol 1897 +Alex1897 <- structure(list( + Age = 0:99, + Value = c(376.7579251, 342.1757925, + 398.3717579, 337.8530259, 314.0778098, 290.3025937, 355.1440922, + 334, 345, 360, 392, 260.0432277, 370, 307.5936599, 290.3025937, + 247.074928, 281.6570605, 268.6887608, 298.9481268, 164.9423631, + 333.5302594, 1401.253602, 1729.783862, 1643.32853, 1669.26513, + 700.9654179, 324.8847262, 273.0115274, 234.1066282, 104.4236311, + 471.8587896, 108.7463977, 186.556196, 151.9740634, 113.0691643, + 389.7262248, 151.9740634, 126.037464, 156.29683, 48.22766571, + 458.8904899, 48.22766571, 108.7463977, 56.87319885, 65.51873199, + 277.3342939, 82.80979827, 52.55043228, 91.45533141, 48.22766571, + 363.7896254, 26.61383285, 52.55043228, 39.58213256, 39.58213256, + 199.5244957, 56.87319885, 39.58213256, 48.22766571, 5, 285.9798271, + 17.96829971, 26.61383285, 17.96829971, 22.29106628, 78.4870317, + 22.29106628, 22.29106628, 26.61383285, 9.322766571, 121.7146974, + 0.677233429, 9.322766571, 5, 5, 30.93659942, 13.64553314, 9.322766571, + 9.322766571, 13.64553314, 35.25936599, 9.322766571, 9.322766571, + 5, 0.677233429, 17.96829971, 0.677233429, 5, 0.677233429, 9.322766571, + 17.96829971, 5, 5, 5, 5, 0.677233429, 0.677233429, 0.677233429, + 0.677233429, 0.677233429)), + class = "data.frame", + row.names = c(NA, -100L)) + +#call the function for Colombia +resCol <- smooth_age_zigzag_dd(col1973$Value, col1973$Age, OAG = TRUE, ret_ggPlot = TRUE, legendPlot="Colombia 1973") +#show the plot for Colombia +resCol$ggplot +#call the function for Alexandropol +resAlex <- smooth_age_zigzag_dd(Alex1897$Value, Alex1897$Age, OAG = TRUE, ret_ggPlot = TRUE, legendPlot="Alexandropol 1897") +#show the plot +resAlex$ggplot diff --git a/docs/404.html b/docs/404.html index b6e9684e8..3df36364e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -79,7 +79,7 @@ DemoTools - 01.13.20 + 01.13.76 @@ -87,7 +87,7 @@