diff --git a/.RData b/.RData
deleted file mode 100644
index c18bcffd3..000000000
Binary files a/.RData and /dev/null differ
diff --git a/.Rbuildignore b/.Rbuildignore
index 38a2c484d..d82f4d649 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -27,4 +27,8 @@ Spreadsheets
^tic\.R$
rcppExports.cpp
stanExports_*
-^data-raw$
\ No newline at end of file
+^data-raw$
+version_lookup.R
+version_lookup.csv
+.gitsum
+Presentations
diff --git a/.gitignore b/.gitignore
index 6580fc6a8..75320bd7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,5 +19,8 @@ Spreadsheets
*.h
*.o
*.so
-docs/
+.gitsum
.RData
+/doc/
+/Meta/
+Presentations
diff --git a/DESCRIPTION b/DESCRIPTION
index 13d51e14e..e8bb066c2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
Package: DemoTools
Type: Package
Title: Standardize, Evaluate, and Adjust Demographic Data
-Version: 01.13.39
-Date: 2021-03-23
+Version: 01.13.80
+Date: 2023-12-29
Authors@R: c(
person("Tim", "Riffe", role = c("aut", "cre"),
email = "tim.riffe@gmail.com", comment = c(ORCID = "0000-0002-2673-4622")),
@@ -22,7 +22,7 @@ License: file LICENSE
LazyLoad: yes
LazyData: true
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.1
+RoxygenNote: 7.2.3
Depends:
R (>= 3.6),
Rcpp (>= 0.12.0),
@@ -31,15 +31,16 @@ Suggests:
testthat (>= 2.1.0),
knitr,
rmarkdown,
+ markdown,
DT,
ggplot2
RdMacros: Rdpack
Imports:
data.table (>= 1.13.6),
demogR,
- DemoToolsData (>= 0.1.1),
+ DemoToolsData (>= 0.1.5),
dplyr,
- fertestr (>= 0.0.5),
+ fertestr (>= 0.10.00),
lubridate,
magrittr,
MortalityLaws (>= 1.7.0),
@@ -48,10 +49,11 @@ Imports:
rstan (>= 2.18.1),
tibble,
tidybayes,
- ungroup
+ ungroup (>= 1.4.2)
BugReports: https://github.com/timriffe/DemoTools/issues
Remotes:
- github::josehcms/fertestr,
- github::timriffe/DemoToolsData
+ github::timriffe/fertestr,
+ github::timriffe/DemoToolsData,
+ github::timriffe/MortalityLaws
Encoding: UTF-8
VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index cbc1bafd5..0f09fb057 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,6 +3,7 @@
export("%>%")
export(ADM)
export(AGEN)
+export(HMD_old_logquad)
export(ID)
export(IRD)
export(OPAG)
@@ -71,6 +72,7 @@ export(is_age_redundant)
export(is_age_sequential)
export(is_single)
export(loess_smth1)
+export(logquad_augmented)
export(lt_a_closeout)
export(lt_a_pas)
export(lt_a_un)
@@ -161,6 +163,7 @@ importFrom(data.table,rbindlist)
importFrom(data.table,setDT)
importFrom(data.table,uniqueN)
importFrom(demogR,cdmltw)
+importFrom(dplyr,case_when)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
@@ -192,3 +195,4 @@ importFrom(tibble,tibble)
importFrom(tidybayes,gather_draws)
importFrom(ungroup,pclm)
importFrom(utils,head)
+importFrom(utils,tail)
diff --git a/NEWS.md b/NEWS.md
index fd983f7e1..e1b87183d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,14 +1,17 @@
# Changes in this update
-2018-08-15
+
+## 2018-08-15
- NEWS.md file added. When a first release comes out, major functionality
and features will be listed here at that time.
-2019-11-28
+## 2019-11-28
- new function mig_calculate_rc() added
- several function names harmonized, see ?DemoTools-renamed
- - graduate_sprage() and other graduation functions no longer support matrix inputs
+ - graduate_sprague() and other graduation functions no longer support matrix inputs
+## 2023-12-29
+ - minor fixes to pass checks
\ No newline at end of file
diff --git a/R/AGEINT.R b/R/AGEINT.R
index e875e4b16..790cc5c5d 100644
--- a/R/AGEINT.R
+++ b/R/AGEINT.R
@@ -89,7 +89,8 @@ interpolatePop <-
#' @param datesOut vector of dates. The desired dates to interpolate to. See details for ways to express it.
#' @param method string. The method to use for the interpolation, either \code{"linear"}, \code{"exponential"}, or \code{"power"}. Default \code{"linear"}.
#' @param power numeric power to interpolate by, if \code{method = "power"}. Default 2.
-#' @param extrap logical. In case \code{datesOut} is out of range of datesIn, do extrapolation using slope in extreme pairwise. Deafult \code{FALSE}.
+#' @param extrap logical. In case \code{datesOut} is out of range of `datesIn`, do extrapolation using slope in extreme pairwise. Default \code{FALSE}.
+#' @param negatives logical. In case negative output are accepted, set to \code{TRUE}. Default \code{FALSE}.
#' @param ... arguments passed to \code{stats::approx}. For example, \code{rule}, which controls extrapolation behavior.
#' @details The age group structure of the output is the same as that of the input. Ideally, \code{datesOut} should be within the range of \code{datesIn}. If not, the left-side and right-side output are held constant outside the range if \code{rule = 2} is passed in, otherwise \code{NA} is returned (see examples). Dates can be given in three ways 1) a \code{Date} class object, 2) an unambiguous character string in the format \code{"YYYY-MM-DD"}, or 3) as a decimal date consisting in the year plus the fraction of the year passed as of the given date.
#'
@@ -198,6 +199,7 @@ interp <- function(popmat,
method = c("linear", "exponential", "power"),
power = 2,
extrap = FALSE,
+ negatives = FALSE,
...) {
# ... args passed to stats::approx . Can give control over extrap assumptions
# IW: extrap=T for extrapolate following each slope in extreme pairwise.
@@ -282,10 +284,10 @@ interp <- function(popmat,
int <- int ^ power
}
- # IW: no negatives when extrapolate. Thinking in pop and lt expressions
- if(all(!is.na(int)) & any(int<0)){
- cat("Negative values were turned 0. No accepted in population counts, fertility rates or life table functions.\n")
- int[int<0] <- 0
+ # IW: no negatives when extrapolate. Thinking in pop and lt expressions. Inactive when explicitly are accepted negatives
+ if(!negatives & all(!is.na(int)) & any(int < 0)){
+ cat("Negative values have been replaced with 0s.\nNegatives not accepted in population counts,\n fertility rates or life table functions.\nYou can allow negatives (e.g. interpolating net migration)\n by specifying negatives = TRUE")
+ int[int < 0] <- 0
}
int
diff --git a/R/AGESEX.R b/R/AGESEX.R
index 836b4a067..3b5f5634e 100644
--- a/R/AGESEX.R
+++ b/R/AGESEX.R
@@ -19,7 +19,7 @@
#' It is also assumed that the final age group is open, unless \code{ageMax < max(Age)}.
#' Setting \code{OAG = FALSE} will override this and potentially include \code{max(Age)} in calculations.
#' The method argument determines the weighting of numerators and denominators, where the UN method puts
-#' twice the numerator over the sum of the adjacent ages classes, Zelnich does thrice the
+#' twice the numerator over the sum of the adjacent ages classes, Zelnik does thrice the
#' numerator over the sum of the whole range from the next lowest to the next highest age, and
#' Ramachandran does four times the numerator over the same sum, but with the central age
#' double-counted in the numerator.
diff --git a/R/MAV.R b/R/MAV.R
index f57291a54..659dfca6f 100644
--- a/R/MAV.R
+++ b/R/MAV.R
@@ -1,22 +1,22 @@
-# Author: JG
-# Edited 9-Dec-2017 by TR
-# Edited Aug 2018 by TR
-
-###############################################################################
#' Calculate the moving average (mav) over 3 or 5 years.
#' @description This arithmetic smoothing technique aims to eliminate irregularities of the population pyramid by averaging values in a moving window of user-defined width.
-#' @details The moving window is applied symmetrically. Data endpoints are imputed with \code{NA}s in output: the is nothing under 0 or over the highest closed age group to average with. The open age group is imputed with \code{NA} prior to calculations, since it cannot be averaged into the next lowest group. For example, for \code{n=3}, age 0 will be \code{NA}, as will the open age group and next lowest age. Age intervals are assumed uniform. This function could be used with either single or 5-year age groups.
+#' @details
+#' The moving window is applied symmetrically. By default (`tails = FALSE`) data endpoints are imputed with `NA`s in output: the is nothing under 0 or over the highest closed age group to average with. The open age group is not used in averaging, and it is returned as-is. Age intervals are assumed uniform. This function could be used with either single or 5-year age groups.
+#'
+#' If `tails` is set to `TRUE`, then tails have been imputed using moving averages with successively smaller values of `n`, the cascade method.
#' @param Value numeric. A vector of demographic counts in single age groups.
#' @param n integer. A single number, (often 3 or 5), indicating the number of years taken to smooth the population distribution by single ages.
#' @param Age integer. A vector of ages corresponding to the lower integer bound of the counts.
-#' @param OAG logical. Whether or not the top age group is open. Default \code{TRUE}.
-#' @details Ages may be single or grouped, but all age intervals are assumed equal.
+#' @param OAG logical. Whether or not the top age group is open. Default `TRUE`.
+#' @param tails logical. If set to `TRUE`, smaller-n moving averages are applied on both tails
+#' such that all values are non-NA. If `FALSE` (default), tails are set to NA
+#' due to the lag of moving averages.
#' @return Vector with the smoothed demographic counts.
+#'
#' @export
-#' @author Juan Galeano
#' @references
#' \insertRef{GDA1981IREDA}{DemoTools}
@@ -31,9 +31,16 @@
#' 323263,9535,13906,9063,8294,90459,9817,6376,8884,3773,160609)
#'Age <- 0:70
#'# final age group assumed open
-#'mav(Pop, n = 3, Age = Age)
+#' mav(Pop, n = 3, Age = Age)
+#'
#'\dontrun{
-#' nwindows <- sapply(seq(3, 11, by = 2),mav, Value = Pop, Age = Age)
+#' odds <- seq(3, 11, by = 2)
+#' nwindows <- sapply(odds,
+#' mav,
+#' Value = Pop,
+#' Age = Age,
+#' OAG = TRUE,
+#' tails = FALSE)
#' cols <- gray(seq(.8, 0, length = 5))
#' lwds <- seq(3, 1, length = 5)
#' plot(Age,Pop, col = "red", xlab = "Age", ylab = "The counts", pch=16,
@@ -45,21 +52,92 @@
#' lwd = lwds,
#' legend = paste0("n=",seq(3,11,by=2)))
#'}
+#'
+#' # For cascading smoothing on the tails:
+#' mav(Pop, Age, tails = TRUE)
+#'
+#'\dontrun{
+#'# Compare
+#' nwindows_tails <- sapply(odds,
+#' mav,
+#' Value = Pop,
+#' Age = Age,
+#' OAG = TRUE,
+#' tails = TRUE)
+#'
+#' colnames(nwindows) <- odds
+#' colnamaes(nwindows_tails) <- odds
+#'
+#' # NA triangles are completed with
+#' # successively smaller ns.
+#' head(nwindows)
+#' head(nwindows_tails)
+#'
+#' tail(nwindows)
+#' tail(nwindows_tails)
+#' }
-mav <- function(Value, Age, n = 3, OAG = TRUE) {
+mav <- function(Value, Age, n = 3, OAG = TRUE, tails = FALSE) {
In <- Value
if (missing(Age)) {
Age <- as.integer(names(Value))
}
+
+ # save OAG
if (OAG) {
+ OrigOAGpop <- Value[length(Value)]
Value[length(Value)] <- NA
}
- # TR: not sure why n needs to be hard coded
+
Out <- ma(Value, n)
+ # apply cascading tails if needed
+ if (tails){
+ Out <- mav_tails(Value = Value,
+ Age = Age,
+ MavOut = Out,
+ n = n,
+ OAG = OAG)
+ }
+ # plug OAG back in
+ if (OAG){
+ Out[length(Value)] <- OrigOAGpop
+ }
+
structure(Out, names = Age)
}
+# Not exported since it can be called with tails = FALSE on mav.
+mav_tails <- function(Value, Age, MavOut, n = 3, OAG = TRUE) {
+ NewMavOut <- MavOut
+
+ #Last should point to last age group to use
+ Last <- length(Age)
+ if (OAG) {
+ Last <- Last - 1
+ NewMavOut[Last+1] <- Value[Last+1]
+ }
+
+ NewMavOut[1] <- Value[1]
+ NewMavOut[Last] <- Value[Last]
+
+ MavLev <- c(1,2,4,6,8)
+
+ if (n >= 2) {
+ for(i in 2:(as.integer(n/2))) {
+
+ # TR: why not just calculate the whole thing and once and pick out
+ # the two values as needed?
+ NewMavOut[i] <- ma(Value[1:(MavLev[i]+1)], n = MavLev[i])[i]
+ # subscripts right and select just the correct age
+ NewMavOut[Last - i + 1] <- ma(Value[(Last - MavLev[i] ):Last],
+ n = MavLev[i])[ i ]
+
+ }
+ }
+
+ NewMavOut
+}
#Pop <-c(303583,390782,523903,458546,517996,400630,485606,325423,471481,189710,
diff --git a/R/OGIVE.R b/R/OGIVE.R
index 4b9be88e8..48665b544 100644
--- a/R/OGIVE.R
+++ b/R/OGIVE.R
@@ -182,7 +182,7 @@ poly_smth1 <-
#' @details LOESS (locally weighted smoothing) helps to smooth data over age, preserving the open age group if necessary.
#' It is a popular tool to create a smooth line through a timeplot or scatter plot.
#' Loess smoothness may be tweaked by specifying an optional \code{"span"} argument.
-#' Polynomial fitting is used to mooth data over age or time fitting linear models.
+#' Polynomial fitting is used to smooth data over age or time fitting linear models.
#' It can be tweaked by changing the degree and by either log or power transforming.
#' The open age group can be kept as-is if desired by specifying \code{OAG = TRUE}.
#' May be used on any age groups, including irregularly spaced, single age, or 5-year age groups.
diff --git a/R/OPAG.R b/R/OPAG.R
index 047b35dd7..0516cd6ce 100644
--- a/R/OPAG.R
+++ b/R/OPAG.R
@@ -2,22 +2,29 @@
# [ ] All choice of Age_fit / AgeInt_fit based on criteria (see PJ Drive folder)
# [ ] ensure age groups are flexible as required.
# [ ] what happens when one input is in a different age group than another?
-# [ ] OPAG_simple() should allow for non-single ages
+# [x] OPAG_simple() should allow for non-single ages
# [ ] add unit tests
# [ ] check for age group commensurability and warn if necessary
# [ ] add more examples to OPAG?
-# [ ] remove rownames message from DownloadLx(), haha
+# [ ] remove rownames message from DownloadLx(), haha I DON'T SEE THIS
# [ ] test OPAG_simple() with non-single ages groups, update documentation if necessary.
# [ ] harmonize args betwenn OPAG_simple and OPAG family.
+# [x] make AgeInt not required
+# [ ] document Age_fit better
+# [x] change open age formula in warp function
+# [x] fix continuous = FALSE formula/ then removed
+# [x] ensure Lx is single ages once for use throughout
+# [x] change default method to "mono"
+
# Author: tim
###############################################################################
# distribute population in open age group over higher ages.
# The PAS implementation uses stable populations, and it will be added
-# here in the future, as well as other optiond. The main missing piece
+# here in the future, as well as other options. The main missing piece
# is a good collection of model lifetables.
-#' redistripute an open age group count over higher ages proportional to an arbitrary standard
+#' redistribute an open age group count over higher ages proportional to an arbitrary standard
#' @description This method could be useful whenever a reasonable standard is available. At present the standard must be supplied by the user.
#' @details In this implementation both the original population counts and the standard must be in single ages.
#' @param Pop numeric vector of population counts
@@ -72,7 +79,7 @@ OPAG_simple <-
StPop,
StAge,
OAnew = max(StAge)) {
- # # assume single
+ # # assume single NOT NEEDED See age concordance
# stopifnot(is_single(Age))
# stopifnot(is_single(StAge))
# OAG can be less than or equal to max age
@@ -80,13 +87,15 @@ OPAG_simple <-
stopifnot(OAnew %in% StAge)
# age and pop vectors must match lengths, assume ordered
stopifnot(length(Pop) == length(Age))
+ stopifnot(length(StPop) == length(StAge))
# age concordance
- #stopifnot(all(Age %in% StAge))
+ minStAge = min(StAge)
+ stopifnot(all(Age[Age >= minStAge] %in% StAge))
# group pop down to OAG
Pop <- groupOAG(Pop, Age, OAnow)
StPop <- groupOAG(StPop, StAge, OAnew)
-
+
# even up lengths
N <- length(Pop)
Age <- Age[1:N]
@@ -94,19 +103,19 @@ OPAG_simple <-
# same for standard
StN <- length(StPop)
StAge <- StAge[1:StN]
-
- # make stadnard distribution.
+
+ # make standard distribution.
standard <- rescale_vector(StPop[StAge >= OAnow], scale = 1)
# redistribute OAG
PopUpper <- OAtot * standard
# keep lower ages of Pop
PopLower <- Pop[1:(N - 1)]
-
+
# graft, name, and return
out <- c(PopLower, PopUpper)
Ageout <- sort(unique(c(Age, StAge)))
names(out) <- Ageout
-
+
out
}
@@ -114,16 +123,13 @@ OPAG_simple <-
#' Warps a given stationary population into a stable population
#' @description We take `nLx` as indicative of a stationary population age structure,
#' then subject the population structure to long-term growth by a constant rate, `r`.
-#' @details `nLx` could be any population structure of any scale, as long as you're comfortable
+#' @details `Lx1` could be any population structure of any scale, as long as you're comfortable
#' assuming it's stationary and can be warped into stable. For the oldest ages, this is probably
#' quite often an acceptable and useful approximation. The transformation is applied at the single-age scale, even if the input `nLx` is in wider (e.g. abridged) age groups. When needed, we reduce to single ages using (default) `graduate_uniform()`, then apply the transformation, then group back. This is innocuous if `nLx` is given in single ages. You may want to change `method` to `"mono"` or `"pclm"`.
#'
-#' @param nLx numeric vector of stationary population age structure in arbitrary integer age groups
-#' @param Age interger vector of lower bounds of age groups of `nLx`
+#' @param Lx1 numeric vector of stationary population age structure in arbitrary integer age groups
+#' @param Age_Lx1 integer vector of lower bounds of age groups of `nLx`
#' @param r stable growth rate
-#' @param AgeInt optional integer vector of widths of age groups, inferred if not given.
-#' @param continuous logical. If `TRUE` we use the growth adjustment. `e^(-age*r)`. If `FALSE` we assume `r` is geometric growth, and we use `(1+r)^age` for the growth adjustment.
-#' @param method character, graduation method used for intermediate graduation. Default `"uniform"`. Other reasonable choices include `"mono"` or `"pclm"`.
#' @return numeric vector of the transformed `nLx`. Note, this vector sums to `1`.
#' @export
#' @examples
@@ -138,47 +144,40 @@ OPAG_simple <-
#' }
#'
-OPAG_nLx_warp_r <- function(nLx,
- Age,
- r,
- AgeInt = NULL,
- continuous = TRUE,
- method = "uniform"){
- # Let's do this in single ages :-)
- # for now, just uniform, but could pass in args to graduate of course
- # if that is preferred.
- Lx1 <- graduate(nLx, Age, method = method, constrain = TRUE)
- a1 <- names2age(Lx1)
- if (continuous){
- wLx <- exp(-r * (a1 + .5)) * Lx1
+OPAG_nLx_warp_r <- function(Lx1,
+ Age_Lx1,
+ r
+){
+ a1 <- Age_Lx1
+ w1Lx <- exp(-r * (a1 + .5)) * Lx1
+ nAges <- length(Age_Lx1)
+
+ if (r == 0 ) {
+ w1Lx[nAges] <- Lx1[nAges]
} else {
- # then geometric
- w <- (1 + r) ^ (a1 + .5)
- wLx <- w * nLx
- }
- wLx <- wLx / sum(wLx)
- if (is.null(AgeInt)){
- AgeInt <- age2int(Age, OAvalue = 1)
+ Tlast <- Lx1[nAges]
+ Tprev <- Tlast + Lx1[nAges-1]
+ # PJ's closeout
+ abar <- -((log(Lx1[nAges-1])-r*a1[nAges-1])-log(Tprev*exp(r)-Tlast)) / r
+ w1Lx[nAges] <- exp(-r * abar) * Tlast
}
- a12A <- rep(Age, AgeInt)
- nwLx <- groupAges(wLx, Age = a1, AgeN = a12A)
- nwLx
+
+ w1Lx <- w1Lx / sum(w1Lx)
+
+ w1Lx
}
#' calculates residual for optimizing growth rate r for OPAG family
#' @description For a given set of age groups to fit against, and a given stable growth rate, $r$,
#' what is the error implied given the current $r$ and stationary standard?
-#' @details This is a utiltiy function for `OPAG()`, which needs to optimize $r$ for a
+#' @details This is a utility function for `OPAG()`, which needs to optimize $r$ for a
#' given population vector and stationary standard.
#' @param r given stable growth rate
#' @param Pop_fit numeric vector of at least two population counts to use for fitting
#' @param Age_fit integer vector of lower bounds for age groups of `Pop_fit`
#' @param AgeInt_fit integer vector of widths of age groups of `Pop_fit`
-#' @param nLx numeric vector of stable population standard
-#' @param Age_nLx integer vector of lower bounds for age groups of `nLx`
-#' @param AgeInt_nLx optional integer vector of widths of age groups of `nLx`, inferred if not given.
-#' @param continuous logical. If `TRUE` we use the growth adjustment. `e^(-age*r)`. If `FALSE` we assume `r` is geometric growth, and we use `(1+r)^age` for the growth adjustment.
-#' @param method character. Graduation method, default `"uniform"`. `"mono"` or `"pclm"` would also be good choices.
+#' @param Lx1 numeric vector of stable population standard by single ages
+#' @param Age_Lx1 integer vector of lower bounds for age groups of `Lx1`
#' @return numeric. A residual that you're presumably trying to minimize.
#' @export
@@ -188,95 +187,70 @@ OPAG_nLx_warp_r <- function(nLx,
#' Age_fit <- c(70,80)
#' AgeInt_fit <- c(10,10)
#' nLx <- downloadnLx(NULL, "Spain","female",1971)
-#' Age_nLx <- names2age(nLx)
+#' # graduate(nLx, Age_nLx, method = method, constrain = TRUE)
+#' Ageab <- names2age(nLx)
+#' Lx1 <- graduate(c(nLx), Ageab, method = "mono", constrain = TRUE)
+#' Age_Lx1 <- 0:100
#' r <- .01
#'
#' OPAG_r_min(r,
-#' Pop_fit,
-#' Age_fit,
-#' AgeInt_fit,
-#' nLx,
-#' Age_nLx)
+#' Pop_fit = Pop_fit,
+#' Age_fit = Age_fit,
+#' AgeInt_fit = AgeInt_fit,
+#' Lx1 = Lx1,
+#' Age_Lx1 = Age_Lx1)
#'
#' (r_opt <- optimize(OPAG_r_min,
#' Pop_fit = Pop_fit,
#' Age_fit = Age_fit,
#' AgeInt_fit = AgeInt_fit,
-#' nLx = nLx,
-#' Age_nLx = Age_nLx,
+#' Lx1 = Lx1,
+#' Age_Lx1 = Age_Lx1,
#' interval = c(-0.05,.05))$min)
#'
-#' ai <- age2int(Age_nLx)
-#'
-#' # Note the whole age range is being scaled to 1 here, but in practice
-#' # you'd only be doing this in the highest ages. If only two fitting
-#' # ages are given, then we can get an r that matches them perfectly,
-#' # at least within reasonable bounds.
-#' \dontrun{
-#' plot(Age, nLx / sum(nLx) / ai, type = 's')
-#' lines(Age,OPAG_nLx_warp_r(Lx,Age,r=r_opt)/ai,type='s',col = "red")
-#' }
OPAG_r_min <- function(r,
- Pop_fit,
Age_fit,
+ Pop_fit,
AgeInt_fit, # necessary
- nLx,
- Age_nLx,
- AgeInt_nLx = NULL,
- continuous = TRUE,
- method = "uniform"){
- if (is.null(AgeInt_nLx)){
- AgeInt_nLx <- age2int(Age_nLx, OAvalue = 1)
- }
- # This is the standard we want to match to Pop,
+ Lx1,
+ Age_Lx1
+){
+ AgeInt_nLx <- age2int(Age_Lx1, OAvalue = 1)
+
+ # 1) This is the standard we want to match to Pop,
# which has presumably been cut down / grouped to the
# ages we want to calibrate to.
- wnLx <- OPAG_nLx_warp_r(
- nLx = nLx,
- Age = Age_nLx,
- r = r,
- AgeInt = AgeInt_nLx,
- continuous = continuous)
-
- # now need to get it to the same age groups as Pop
+ w1Lx <- OPAG_nLx_warp_r(
+ Lx1 = Lx1,
+ Age_Lx1 = Age_Lx1,
+ r = r
+ )
+
+ # 2) now need to get it to the same age groups as Pop
# so that we can get a residual
-
- # 1) Move stable pop to single ages
- w1Lx <- graduate(
- wnLx,
- Age = Age_nLx,
- AgeInt = AgeInt_nLx,
- method = method)
- a1t <- names2age(w1Lx)
- a1t <- as.integer(a1t)
-
- # 2) which single ages implied by Pop?
- N <- length(AgeInt_fit)
- a1match <- Age_fit[1]:(max(Age_fit) + AgeInt_fit[N] - 1)
- a1match <- as.integer(a1match)
-
- # 3) select down to just those ages:
- ind <- a1t %in% a1match
- w1Lx <- w1Lx[ind]
-
- # 4) group w1Lx to same as Pop_fit
- ageN <- rep(Age_fit, times = AgeInt_fit)
- stand <- groupAges(w1Lx, Age = a1match, AgeN = ageN)
-
- # 5) rescale standard and Pop_fit to sum to 1
- stand <- rescale_vector(stand, scale = 1)
+
+ w1Lx_fit <- rep(NA, length(Age_fit))
+
+ for (i in 1:length(Age_fit)){
+ ind <- Age_Lx1 >= Age_fit[i] & Age_Lx1 < (Age_fit[i] + AgeInt_fit[i])
+ w1Lx_fit[i] <- sum(w1Lx[ind])
+ }
+
+ # 3) rescale standard and Pop_fit to sum to 1
+ stand <- rescale_vector(w1Lx_fit, scale = 1)
Pop_fit <- rescale_vector(Pop_fit, scale = 1)
-
- # 6) return the residual
+
+ # 4) return the residual
sum(abs(stand - Pop_fit))
}
#' creates stable standard based on optimizing the growth rate
#' @description The stationary standard, `nLx` is transformed into a stable standard by optimizing a growth rate, `r` such that the stable standard matches observed population counts in selected age groups. Usually the ages used for fitting are wide age groups in older ages preceding the open age group. The standard output by this function is used by `OPAG` to create the standard used to redistribute counts over older age groups up to a specified open age group, such as 100.
-#' @details The arguments `method` and `continous` don't have much leverage on the result. In short, the stable population transformation is done by ungrouping `nLx` to single ages (if it isn't already), and `method` controls which graduation method is used for this, where `"uniform"`, `"mono"`, `"pclm"` are the reasonable choices at this writing. In single ages, the difference between using a geometric `r` versus continuous `r` are quite small for this task.
-#'
+#' @details The argument `method` don't have much leverage on the result. In short, the stable population transformation is done by ungrouping `nLx` to single ages (if it isn't already), and `method` controls which graduation method is used for this, where `"uniform"`, `"mono"`, `"pclm"` are the reasonable choices at this writing.
+#'
+#'
#' @inheritParams OPAG_r_min
#' @return
#' list constaining
@@ -289,71 +263,64 @@ OPAG_r_min <- function(r,
#' @examples
#' Pop_fit <- c(85000,37000)
#' Age_fit <- c(70,80)
-#' AgeInt_fit <- c(10,10)
#' nLx <- downloadnLx(NULL, "Spain","female",1971)
#' Age_nLx <- names2age(nLx)
-#'
-#' # India Males, 1991
+#' Lx1 <- graduate(nLx,Age=Age_nLx,method = "mono")
+#' Age_Lx1 <- 0:100
+#' # India Males, 1971
#' Pop <- smooth_age_5(pop1m_ind,
#' Age = 0:100,
#' method = "Arriaga")
#' Pop80 <- groupOAG(Pop, names2age(Pop), 80)
#' Age <- names2age(Pop80)
-#' AgeInt <- age2int(Age, OAvalue = 1)
-#'
-#' nLx <- downloadnLx(NULL, "India","male",1991)
+#'
+#' nLx <- downloadnLx(NULL, "India","male",1971)
#' Age_nLx <- names2age(nLx)
-#' AgeInt_nLx <- age2int(Age_nLx,OAvalue = 1)
-#'
+#'
+#'
#' Pop_fit <- groupAges(Pop80, Age, N = 10)[c("60","70")]
#' Age_fit <- c(60,70)
#' AgeInt_fit <- c(10,10)
-#'
+#'
#' Standard <- OPAG_fit_stable_standard(
-#' Pop_fit,
-#' Age_fit,
-#' AgeInt_fit,
-#' nLx = nLx,
-#' Age_nLx = Age_nLx,
-#' AgeInt_nLx = AgeInt_nLx,
-#' method = "uniform",
-#' continuous = TRUE)
-#'
+#' Pop_fit = Pop_fit,
+#' Age_fit = Age_fit,
+#' AgeInt_fit = AgeInt_fit,
+#' Lx1=Lx1,
+#' Age_Lx1 = Age_Lx1
+#' )
+#'
#' # A visual comparison:
#' nL60 <- rescale_vector(nLx[Age_nLx >= 60])
-#' St60p <- rescale_vector( Standard$Standard[Age_nLx >= 60] )
+#' St60p <- rescale_vector( Standard$Standard[c(0:100) >= 60] )
#' ages_plot <- seq(60,100,by=5)
#' \dontrun{
-#' plot(ages_plot,nL60, type = 'l')
-#' lines(ages_plot, St60p, col = "blue")
+#' plot(ages_plot,nL60, type = 'l')
+#' lines(60:100, St60p, col = "blue")
#' }
OPAG_fit_stable_standard <- function(Pop_fit,
Age_fit,
AgeInt_fit,
- nLx,
- Age_nLx,
- AgeInt_nLx,
- method = "uniform",
- continuous = TRUE){
-
-
+ Lx1,
+ Age_Lx1
+){
+
+
# optimize the parameter r
r_opt <- optimize(OPAG_r_min,
Pop_fit = Pop_fit,
Age_fit = Age_fit,
AgeInt_fit = AgeInt_fit,
- nLx = nLx,
- Age_nLx = Age_nLx,
- interval = c(-0.05, .05))
-
-
- standard <- OPAG_nLx_warp_r(nLx = nLx,
- Age = Age_nLx,
- r = r_opt$min,
- AgeInt = AgeInt_nLx,
- continuous = continuous,
- method = method)
+ Lx1 = Lx1,
+ Age_Lx1 = Age_Lx1,
+ interval = c(-0.02, .05)) # changed interval
+
+
+ standard <- OPAG_nLx_warp_r(Lx1 = Lx1,
+ Age_Lx1 = Age_Lx1,
+ r = r_opt$min
+ )
# return both stable standard and the optimization output,
# which will let us know if r is simply unreasonable or similar.
out <- list(Standard = standard,
@@ -373,44 +340,39 @@ OPAG_fit_stable_standard <- function(Pop_fit,
#' @details It may be helpful to try more than one fitting possibility,
#' and more than one `Redistribute_from` cut point, as results may vary.
#'
-#' The argument `"method"` refers to which graduation method (see `?graduate`)
-#' is only relevant if input data are in grouped ages. This is innocuous if
-#' ages are single to begin with. The choice of whether to assume
-#' `continuous = TRUE` constant growth versus geometric (`FALSE`) growth
-#' has little leverage.
-#'
#' `Redistribute_from` can be lower than your current open age group,
#' and `OAnew` can be higher, as long as it is within the range of `Age_nLx`.
#' If `Age_nLx` doesn't go high enough for your needs, you can extrapolate
-#' it ahead of time. For this, you'd want the `nMx` the underly it, and you
+#' it ahead of time. For this, you'd want the `nMx` the underlie it, and you
#' can use `lt_abridged()`, specifying a higher open age, and then
#' extracting `nLx` again from it.
#'
#' @inheritParams OPAG_r_min
#' @param Pop numeric vector of population counts
#' @param Age_Pop integer vector of the lower bounds of the population age groups
-#' @param AgeInt_Pop integer vector of the population age group interval widths, using `Inf` for the open age group.
+#' @param nLx numeric vector of stationary population age structure in arbitrary integer age groups
+#' @param Age_nLx integer vector of lower bounds of age groups of `nLx`
#' @param Redistribute_from integer lower age bound that forms the cutoff, above which we redistribute counts using the stable standard.
#' @param OAnew integer. Desired open age group in the output (must being element of `Age_nLx`)
+#' @param method character, graduation method used for intermediate graduation. Default `"mono"`. Other reasonable choices include `"pclm"` or `"uniform"`.
#' @export
+#' @importFrom utils tail
#' @examples
-#' # India Males, 1991
+#' # India Males, 1971
#' Pop <- smooth_age_5(pop1m_ind,
#' Age = 0:100,
#' method = "Arriaga")
#' Age_Pop <- names2age(Pop)
#' AgeInt_Pop <- age2int(Age_Pop, OAvalue = 1)
#'
-#' nLx <- downloadnLx(NULL, "India","male",1991)
+#' nLx <- downloadnLx(NULL, "India","male",1971)
#' Age_nLx <- names2age(nLx)
#' AgeInt_nLx <- age2int(Age_nLx, OAvalue = 1)
#'
#' Pop_fit <- OPAG(Pop,
#' Age_Pop = Age_Pop,
-#' AgeInt_Pop = AgeInt_Pop,
#' nLx = nLx,
#' Age_nLx = Age_nLx,
-#' AgeInt_nLx,
#' Age_fit = c(60,70),
#' AgeInt_fit = c(10,10),
#' Redistribute_from = 80)
@@ -428,17 +390,15 @@ OPAG_fit_stable_standard <- function(Pop_fit,
OPAG <- function(Pop,
Age_Pop,
- AgeInt_Pop,
- nLx,
- Age_nLx,
- AgeInt_nLx = NULL,
+ nLx,
+ Age_nLx,
Age_fit = NULL,
AgeInt_fit = NULL,
Redistribute_from = max(Age_Pop),
OAnew = max(Age_nLx),
- method = "uniform",
- continuous = TRUE){
-
+ method = "mono"
+){
+
# ensure OAnew is possible
stopifnot(OAnew <= max(Age_nLx))
@@ -450,21 +410,38 @@ OPAG <- function(Pop,
if(!identical(as.integer(unique(diff(Age_Pop))), as.integer(unique(diff(Age_nLx))))){ # put a different
cat("\nAge_Pop and Age_nLx age intervals are different!\n")
}
- # what if one age vec is single and the other isn't? we should warn, but continue.
- # is_single(Age_Pop)
- # is_abridged(Age_Pop)
+
+ # PJ adds this. Note final age group not assigned a width
+ AgeInt_Pop <- diff(Age_Pop)
+ # AgeInt_nLx <- diff(Age_Pop)
# setup, prelims:
# 0) if Age_fit isn't given assume last two 10-year age groups.
+
if (is.null(Age_fit)){
OA <- max(Age_Pop)
Age_fit <- OA - c(20,10)
AgeInt_fit <- c(10,10)
stopifnot(Age_fit %in% Age_Pop)
}
-
+ if (is.null(AgeInt_fit)){
+ # assume age intervals are age differences, and repeat last one
+ AgeInt_fit <- diff(Age_fit)
+ AgeInt_fit <- c(AgeInt_fit, tail(AgeInt_fit, n=1))
+ # if Age_fit includes pop OA then set last fit age int to Inf
+ if (tail(Age_fit,1) == tail(Age_Pop,1)) {
+ AgeInt_fit[length(AgeInt_fit)] <- Inf
+ }
+ }
+ if (any(!Age_fit %in% Age_Pop)){
+ ind <- Age_fit %in% Age_Pop
+ Age_fit <- Age_fit[ind]
+ AgeInt_fit <- AgeInt_fit[ind]
+ stopifnot(length(Age_fit) > 1)
+ }
+
# 1) get Pop_fit
-
+
# TR: note: this calls for a special age utility function I think
# earmarking this code chunk to swap it out in the future.
Pop_fit <- rep(NA, length(Age_fit))
@@ -472,47 +449,55 @@ OPAG <- function(Pop,
ind <- Age_Pop >= Age_fit[i] & Age_Pop < (Age_fit[i] + AgeInt_fit[i])
Pop_fit[i] <- sum(Pop[ind])
}
-
- # 2) get the standard
+
+ # 2) make sure Lx is single ages
+ Lx1 <- graduate(nLx, Age_nLx, method = method, constrain = TRUE)
+ Age_Lx1 <- as.integer(names(Lx1))
+
Stab_stand <- OPAG_fit_stable_standard(Pop_fit,
Age_fit,
AgeInt_fit,
- nLx,
- Age_nLx,
- AgeInt_nLx,
- method = method,
- continuous = continuous)
+ Lx1,
+ Age_Lx1
+ )
StPop <- Stab_stand$Standard
-
+
# 3) get total to redistribute:
OAG_total <- sum(Pop[Age_Pop >= Redistribute_from])
-
+
# 4) select standard in those age groups.
- StPop_sel <- StPop[Age_nLx >= Redistribute_from]
+ StPop_sel <- StPop[Age_Lx1 >= Redistribute_from]
StPop_sel <- rescale_vector(StPop_sel, scale = 1)
-
+
# 5) redistribute
Pop_redistributed <- StPop_sel * OAG_total
-
+
+ # 5a) regroup into original pop age grouping
+ # TR: possibly not the bes tuse of AgeInt_Pop..
+ if (tail(AgeInt_Pop, n=2)[-1] == 5) {
+ Pop_redistributed <- groupAges(Pop_redistributed, N = 5)
+ }
+
# 6) graft together
Pop_grafted <- c(Pop[Age_Pop < Redistribute_from],
Pop_redistributed)
Age_grafted <- c(Age_Pop[Age_Pop < Redistribute_from],
Age_nLx[Age_nLx >= Redistribute_from])
-
+
+ names(Pop_grafted) <- Age_grafted
# 7) potentially group down OAG
Pop_out <- groupOAG(Value = Pop_grafted,
Age = Age_grafted,
OAnew = OAnew)
Age_out <- names2age(Pop_out)
-
+
# 8) compose list for output
out <- list(
- Pop_out = Pop_out,
- Age_out = Age_out,
- Pop_in = Pop,
- Standard = StPop,
- r_opt = Stab_stand$r_opt)
-
+ Pop_out = Pop_out,
+ Age_out = Age_out,
+ Pop_in = Pop,
+ Standard = StPop,
+ r_opt = Stab_stand$r_opt)
+
out
}
diff --git a/R/basepop.R b/R/basepop.R
index 46d1e27fd..aa5078522 100644
--- a/R/basepop.R
+++ b/R/basepop.R
@@ -141,8 +141,8 @@
#' * `nLxm` numeric matrix of male `nLx`, abridged ages in rows and (potentially interpolated) time in columns. Potentially downloaded.
#' * `Asfr` numeric matrix of age specific fertility in 5-year age groups ages 15-19 until 45-49 in rows, and (potentially interpolated) time in columns. Potentially downloaded.
#' * `Exposure_female` numeric matrix of approximated age-specific exposure in 5-year age groups ages 15-19 until 45-49 in rows, and (potentially interpolated) time in columns.
-#' * `Bt` births at three time points prior to census corrsponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9.
-#' * `SRB` sex ratio at birth at three time points prior to census corrsponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9. Potentially downloaded.
+#' * `Bt` births at three time points prior to census corresponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9.
+#' * `SRB` sex ratio at birth at three time points prior to census corresponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9. Potentially downloaded.
#' * `Age` age groups of the input population counts.
#'
# #' `basepop_single` is used, the return value is a numeric vector with
diff --git a/R/check_heaping.R b/R/check_heaping.R
index 5fa8949d1..9f80e5eae 100644
--- a/R/check_heaping.R
+++ b/R/check_heaping.R
@@ -504,7 +504,7 @@ check_heaping_spoorenberg <- function(Value,
#' @param Agei integer. The age on which the index is centered.
#' @param pow either \code{"exp"} (default) or a power such as 2. See details
#'
-#' @details The index looks down two ages and up two ages, so the data must accommodate that range. The denominator is a mean of the counts in the sourrounding 5 single ages. The kind of mean can be controlled with the \code{pow} argument. By default, this takes the antilog of the arithmetic mean of the natural log of the five denominator counts. That will fail if one of the counts is equal to 0. In such cases, another power, such as 2 or 10 or 100 may be used, which is more robust to 0s. The higher the power, the closer the result will resemble the default output. If \code{pow=="exp"} but a 0 is detected among the denominator ages, then \code{pow} is assigned a value of 1000. \code{pow=1} would imply an arithmetic mean in the denominator.
+#' @details The index looks down two ages and up two ages, so the data must accommodate that range. The denominator is a mean of the counts in the surrounding 5 single ages. The kind of mean can be controlled with the \code{pow} argument. By default, this takes the antilog of the arithmetic mean of the natural log of the five denominator counts. That will fail if one of the counts is equal to 0. In such cases, another power, such as 2 or 10 or 100 may be used, which is more robust to 0s. The higher the power, the closer the result will resemble the default output. If \code{pow=="exp"} but a 0 is detected among the denominator ages, then \code{pow} is assigned a value of 1000. \code{pow=1} would imply an arithmetic mean in the denominator.
#' @return The value of the index.
#'
#' @references
@@ -679,13 +679,13 @@ heapify <- function(Value,
#' Detect if heaping is worse on terminal digits 0s than on 5s
#'
-#' @description Ages ending in 0 often have higher apparent heaping than ages ending in 5. In this case, data in 5-year age bins might show a sawtooth pattern. If heaping ocurrs in roughly the same amount on 0s and 5s, then it may be sufficient to group data into 5-year age groups and then graduate back to single ages. However, if heaping is worse on 0s, then this procedure tends to produce a wavy pattern in count data, with 10-year periodicity. In this case it is recommended to use one of the methods of \code{smooth_age_5()} as an intermediate step before graduation.
+#' @description Ages ending in 0 often have higher apparent heaping than ages ending in 5. In this case, data in 5-year age bins might show a sawtooth pattern. If heaping occurs in roughly the same amount on 0s and 5s, then it may be sufficient to group data into 5-year age groups and then graduate back to single ages. However, if heaping is worse on 0s, then this procedure tends to produce a wavy pattern in count data, with 10-year periodicity. In this case it is recommended to use one of the methods of \code{smooth_age_5()} as an intermediate step before graduation.
#'
#' @details Data is grouped to 5-year age bins. The ratio of each value to the average of its neighboring values is calculated. If 0s have stronger attraction than 5s then we expect these ratios to be >1 for 0s and <1 for 5s. Ratios are compared within each 10-year age group in the evaluated age range. If in the evaluated range there are at most two exceptions to this rule (0s>5s), then the ratio of the mean of these ratios is returned, and it is recommended to use a smoother method. Higher values suggest use of a more aggressive method. This approach is only slightly different from that of Feeney, as implemented in the \code{smooth_age_5_zigzag_inner()} functions. This is not a general measure of roughness, but rather an indicator of this particular pattern of age attraction.
#' @export
#' @inheritParams heapify
#' @param ageMin integer evenly divisible by 10. Lower bound of evaluated age range, default 40.
-#' @param ageMax integer evently divisibly by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.
+#' @param ageMax integer evenly divisible by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.
#' @return numeric, ratio of 0s to 5s. If > 1 then the pattern is present.
#' @references
#' \insertRef{feeney1979}{DemoTools}
@@ -762,7 +762,7 @@ check_heaping_sawtooth <-
}
#' Evaluate roughness of data in 5-year age groups
-#' @description For a given age-structured vector of counts, how rough is data after grouping to 5-year age bins? Data may require smoothing even if there is no detectable sawtooth pattern. It is best to use the value in this method together with visual evidence to gauage whether use of \code{smooth_age_5()} is recommended.
+#' @description For a given age-structured vector of counts, how rough is data after grouping to 5-year age bins? Data may require smoothing even if there is no detectable sawtooth pattern. It is best to use the value in this method together with visual evidence to gauge whether use of \code{smooth_age_5()} is recommended.
#' @details First we group data to 5-year age bins. Then we take first differences (d1) of these within the evaluated age range. Then we smooth first differences (d1s) using a generic smoother (\code{ogive()}). Roughness is defined as the mean of the absolute differences between \code{mean(abs(d1 - d1s) / abs(d1s))}. Higher values indicate rougher data, and may suggest more aggressive smoothing. Just eyeballing, one could consider smoothing if the returned value is greater than ca 0.2, and values greater than 0.5 already highly recommend it (pending visual verification).
#' @export
#' @inheritParams check_heaping_sawtooth
diff --git a/R/data.R b/R/data.R
index 6717eb73e..5bfbf3126 100644
--- a/R/data.R
+++ b/R/data.R
@@ -21,9 +21,9 @@
-#' Indian male population 1991
+#' Indian male population 1971
#'
-#' Indian male population 1991
+#' Indian male population 1971
#' @docType data
#' @format
#' A numeric vector of length 101
@@ -200,7 +200,7 @@
#' A data frame with:
#' \describe{
#' \item{Date}{Reference time for the rates estimate.}
-#' \item{Age}{Inferior age for abridged groups. Carefull: last age 100 is not an OAG}
+#' \item{Age}{Inferior age for abridged groups. Careful: last age 100 is not an OAG}
#' \item{Sex}{Male \code{m} and female \code{m}.}
#' \item{nMx}{Mortality rates.}
#' }
@@ -230,7 +230,7 @@
#' A matrix of dimensions 21 x 21
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"pop_m_mat_five"
#' Population matrix for females five year age groups between 1950 and 2050
@@ -242,7 +242,7 @@
#' A matrix of dimensions 21 x 21
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"pop_f_mat_five"
#' Survival rates matrix for males five year age groups between 1950 and 2045
@@ -254,7 +254,7 @@
#' A matrix of dimensions 21 x 20
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"sr_m_mat_five"
#' Survival rates matrix for females five year age groups between 1950 and 2045
@@ -266,7 +266,7 @@
#' A matrix of dimensions 21 x 20
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"sr_f_mat_five"
#' Age-specific fertility rates for age groups 15 to 45 between 1950 and 2045
@@ -278,7 +278,7 @@
#' A matrix of dimensions 7 x 20
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"asfr_mat_five"
#' Sex ratio at birth between 1950 and 2045
@@ -289,7 +289,7 @@
#' A vector of length 20
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"srb_vec_five"
#' Ages between 0 and 100 abridged in five year age groups
@@ -301,7 +301,7 @@
#' A vector of length 21
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"ages_five"
#' Ages between 15 and 45 in five year age groups
@@ -313,7 +313,7 @@
#' A vector of length 7
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"ages_asfr_five"
#' Population matrix for males single ages between 1999 and 2019
@@ -325,7 +325,7 @@
#' A matrix of dimensions 101 x 21
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"pop_m_mat_single"
#' Population matrix for females single ages between 1999 and 2019
@@ -337,7 +337,7 @@
#' A matrix of dimensions 101 x 21
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"pop_f_mat_single"
#' Survival rates matrix for males single ages between 1999 and 2019
@@ -350,7 +350,7 @@
#' A matrix of dimensions 101 x 20
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"sr_m_mat_single"
#' Survival rates matrix for females single ages between 1999 and 2019
@@ -363,7 +363,7 @@
#' A matrix of dimensions 101 x 20
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"sr_f_mat_single"
#' Age-specific fertility rates for single ages 15 to 49 between 1999 and 2018
@@ -376,7 +376,7 @@
#' A matrix of dimensions 35 x 20
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"asfr_mat_single"
#' Sex ratio at birth between 1999 and 2019
@@ -388,7 +388,7 @@
#' A vector of length 20
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"srb_vec_single"
#' Single ages between 0 and 100
@@ -399,7 +399,7 @@
#' A vector of length 101
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"ages_single"
#' Single ages between 15 and 49
@@ -410,7 +410,7 @@
#' A vector of length 36
#'
#' @source
-#' Migration residual PAS spreadhseet
+#' Migration residual PAS spreadsheet
"ages_asfr_single"
#' Parameters for considered migration profiles
diff --git a/R/extra_mortality.R b/R/extra_mortality.R
index 6e8ed4d4d..9f374178e 100644
--- a/R/extra_mortality.R
+++ b/R/extra_mortality.R
@@ -26,12 +26,13 @@
#' @param opt.method character. Default `"LF2"`, see `MortalityLaws::MortalityLaw` for a description of choices.
#' @param ... Other arguments to be passed on to the
#' \code{\link[MortalityLaws]{MortalityLaw}} function.
+#' @details If fitting fails to converge, then we refit assuming Gompertz mortality with explicit starting parameters of `parS = c(A = 0.005, B = 0.13)` and a warning is issued.
#' @seealso
#' \code{\link[MortalityLaws]{MortalityLaw}}
#' \code{\link[MortalityLaws]{predict.MortalityLaw}}
#' @return An object of class \code{lt_rule_m_extrapolate} with the following components:
#' \item{input}{List with arguments provided in input. Saved for convenience.}
-#' \item{call}{An unevaluated function call, that is, an unevaluated expressionwhich consists of the named function applied to the given arguments.}
+#' \item{call}{An unevaluated function call, that is, an unevaluated expression that consists of the named function applied to the given arguments.}
#' \item{fitted.model}{An object of class \code{\link[MortalityLaws]{MortalityLaw}}. Here one can find fitted values, residuals, goodness of fit measures etc.}
#' \item{values}{A vector or matrix containing the complete mortality data, that is the modified input data following the extrapolation procedure.}
#'
@@ -53,7 +54,8 @@
#' f2 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "kannisto_makeham")
#' f3 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "gompertz")
#' f4 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "ggompertz")
-#' f5 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "makeham")
+#' # makeham falls back to gompertz for this data
+#' suppressWarnings(f5 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "makeham"))
#' f6 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "beard")
#' f7 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "beard_makeham")
#' f8 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "quadratic")
@@ -125,17 +127,7 @@
#' lty = c(NA, NA, 1:8), pch = c(16, 16, rep(NA, 8)),
#' col = c(1, 4, 2:9), lwd = 2, pt.cex = 2)
#'}
-#' # ----------------------------------------------
-#' # Example 3 - Extrapolate mortality for multiple years at once
-#'
-#' # Create some data
-#' mx_matrix <- matrix(rep(mx1, 3), ncol = 3) %*% diag(c(1, 1.05, 1.1))
-#' dimnames(mx_matrix) <- list(age = x1, year = c("year1", "year2", "year3"))
-#'
-#' F1 <- lt_rule_m_extrapolate(mx_matrix, x = x1, x_fit, x_extr, law = "kannisto")
-#' F1
-#' ls(F1)
-#' coef(F1)
+
#' @author Marius D. Pascariu
#' @export
lt_rule_m_extrapolate <- function(mx,
@@ -145,7 +137,10 @@ lt_rule_m_extrapolate <- function(mx,
law = "kannisto",
opt.method = "LF2",
...) {
-
+ dm <- dim(mx)
+ if (length(dm) == 1 | (length(mx) == 2 & any(dm == 1))){
+ mx <- c(mx)
+ }
all_the_laws_we_care_about <- c("kannisto",
"kannisto_makeham",
"makeham",
@@ -171,10 +166,36 @@ lt_rule_m_extrapolate <- function(mx,
opt.method = opt.method,
...
)
-
+
+ if (!is.null(M$opt.diagnosis)){
+ if( M$opt.diagnosis$convergence != 0){
+ warning("Extrapolation failed to converge\nFalling back to Gompertz with starting parameters:\n parS = c(A = 0.005, B = 0.13))",
+ immediate. = TRUE)
+
+ parS <- c(A = 0.005, B = 0.13)
+ law <- "gompertz"
+ M <- MortalityLaw(
+ x = x,
+ mx = mx,
+ fit.this.x = x_fit,
+ law = law,
+ parS = parS,
+ ...)
+ }
+ }
+
+ # TR: this will fail if a matrix is given where we only x_extr for 1 age.
+ chop <- FALSE
+ if (length(x_extr) == 1){
+ chop <- TRUE
+ x_extr <- c(x_extr, x_extr + 1)
+ }
pv <- predict(object = M,
x = x_extr)
-
+ if (chop){
+ pv <- pv[1]
+ x_extr <- x_extr[1]
+ }
# which ages are not to be replaced with fitted values?
L <- !(x %in% x_extr)
diff --git a/R/graduate.R b/R/graduate.R
index b5da87b99..cc95772c0 100644
--- a/R/graduate.R
+++ b/R/graduate.R
@@ -11,7 +11,7 @@
#'
#' @return Numeric vector of counts for single year age groups.
#'
-#' @details Assumes that the population is uniformly distributed across each age interval, and that initial age intervals are integers greater than or equal to 1. If \code{AgeInt} is given, its final value is used as the interval for the final age group. If \code{AgeInt} is missing, then \code{Age} must be given, and the open age group is by default preserved \code{OAvalue} rather than split. To instead split the final age group into, e.g., a 5-year age class, either give \code{AgeInt}, *or* give \code{Age}, \code{OAG = TRUE}, and \code{OAvalue = 5}.
+#' @details Assumes that the population is uniformly distributed across each age interval, and that initial age intervals are integers greater than or equal to 1. If \code{AgeInt} is given, its final value is used as the interval for the final age group. If \code{AgeInt} is missing, then \code{Age} must be given, and the open age group is by default preserved \code{OAvalue} rather than split. To instead split the final age group into, e.g., a 5-year age class, either give \code{AgeInt}, *or* give \code{Age}, \code{OAG = TRUE}, and \code{OAvalue = 5}. `Age` be any age range, it does not need to start at 0.
#'
#' @export
#' @examples
@@ -52,7 +52,7 @@ graduate_uniform <-
#' @description This method is used to interpolate counts based on the Sprague formula. It is based on the first stage of the Sprague R script prepared by Thomas Buettner and Patrick Gerland, itself based on the description in Siegel and Swanson, 2004, p. 727.
#'
#' @inheritParams graduate
-#' @details Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age). Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as or coercible to a single-column matrix. This method may produce negative values, most likely in the youngest or oldest ages. This case is dealt with in the \code{graduate()} wrapper function but not in this function.
+#' @details Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age). Dimension labeling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as or coercible to a single-column matrix. This method may produce negative values, most likely in the youngest or oldest ages. This case is dealt with in the \code{graduate()} wrapper function but not in this function.
#'
#' If the highest age does not end in a 0 or 5, and \code{OAG == TRUE}, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and \code{OAG == FALSE}, then results extend to single ages covering the entire 5-year age group.
#'
@@ -203,6 +203,8 @@ graduate_sprague_expand <- function(
# get the split coefficients
# block for ages 0-9
+
+ # TR: 5-5-2021, this assumes ages start at 0...
g1g2 <- matrix(
c( 0.3616, -0.2768, 0.1488, -0.0336,
0.0000, 0.2640, -0.0960, 0.0400,
@@ -387,7 +389,7 @@ graduate_grabill_expand <- function(Value, Age, OAG = TRUE) {
#' Sprague estimated single-age population counts for the first and final ten ages. Open age groups are preserved, as are annual totals.
#'
#' @inheritParams graduate
-#' @details Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as a single-column matrix. Data may be given in either single or grouped ages. If the highest age does not end in a 0 or 5, and \code{OAG == TRUE}, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and \code{OAG == FALSE}, then results extend to single ages covering the entire 5-year age group.
+#' @details Dimension labeling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as a single-column matrix. Data may be given in either single or grouped ages. If the highest age does not end in a 0 or 5, and \code{OAG == TRUE}, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and \code{OAG == FALSE}, then results extend to single ages covering the entire 5-year age group.
#'
#' @return numeric vector in single ages.
#'
@@ -716,7 +718,7 @@ graduate_beers_expand <- function(Value,
#' @inheritParams graduate
#' @param method character. Valid values are `"ord"` or `"mod"`. Default `"ord"`.
#' @param johnson logical. Whether or not to adjust young ages according to the \href{https://www.census.gov/data/software/dapps.html}{Demographic Analysis & Population Projection System Software} method. Default `FALSE.`
-#' @details Ages should refer to lower age bounds. `Value` must be labelled with ages unless `Age` is given separately. There must be at least six 5-year age groups (including the open group, 5 otherwise). If you want the `johnson` adjustment then `Value` must contain a single-year estimate of the population count in age 0. That means `Value` must come either as standard abridged or single age data.
+#' @details Ages should refer to lower age bounds. `Value` must be labeled with ages unless `Age` is given separately. There must be at least six 5-year age groups (including the open group, 5 otherwise). If you want the `johnson` adjustment then `Value` must contain a single-year estimate of the population count in age 0. That means `Value` must come either as standard abridged or single age data.
#'
#' `method` option `"ord"` conserves sums in 5-year age groups, whereas `"mod"` does some smoothing between 5-year age groups too, and is not constrained.
#'
@@ -893,6 +895,8 @@ graduate_beers_johnson <- function(Age0, pop5, pop1) {
#' @details The PCLM method can also be used to graduate rates using an offset if both numerators and denominators are available. In this case \code{Value} is the event count and \code{offset} is person years of exposure. The denominator must match the length of \code{Value} or else the length of the final single age result \code{length(min(Age):OAnew)}. This method can be used to redistribute counts in the open age group if \code{OAnew} gives sufficient space. Likewise, it can give a rate extrapolation beyond the open age.
#'
#' If there are 0s in `Value`, these are replaced with a small value prior to fitting. If negatives result from the pclm fit, we retry after multiplying `Value` by 10, 100, or 1000, as sometimes a temporary rescale for fitting can help performance.
+#'
+#' `Age` be any age range, it does not need to start at 0.
#'
#' @inheritParams graduate
#' @param ... further arguments passed to \code{ungroup::pclm()}
@@ -1004,7 +1008,7 @@ graduate_pclm <- function(Value, Age, AgeInt, OAnew = max(Age), OAG = TRUE, ...)
#' Graduate age groups using a monotonic spline.
#' @description Take the cumulative sum of \code{Value} and then run a monotonic spline through it. The first differences split back single-age estimates of \code{Value}. Optionally keep the open age group untouched.
#'
-#' @details The \code{"hyman"} method of \code{stats::splinefun()} is used to fit the spline because 1) it passes exactly through the points, 2) it is monotonic and therefore guarantees positive counts, and 3) it seems to be a bit less wiggly (lower average first differences of split counts). Single-age data is returned as-is. If you want to use this function as a smoother you first need to group to non-single ages.
+#' @details The \code{"hyman"} method of \code{stats::splinefun()} is used to fit the spline because 1) it passes exactly through the points, 2) it is monotonic and therefore guarantees positive counts, and 3) it seems to be a bit less wiggly (lower average first differences of split counts). Single-age data is returned as-is. If you want to use this function as a smoother you first need to group to non-single ages. `Age` be any age range, it does not need to start at 0.
#' @inheritParams graduate
#' @return Numeric. vector of single smoothed age counts.
#' @importFrom stats splinefun
@@ -1069,9 +1073,9 @@ graduate_mono <- function(
# if the final age is Open, then we should remove it and then
# stick it back on
- AgePred <- c(min(Age), cumsum(AgeInt))
+ AgePred <- c(min(Age), cumsum(AgeInt) + min(Age))
y <- c(0, cumsum(Value))
- AgeS <- min(Age):sum(AgeInt)
+ AgeS <- min(Age):(sum(AgeInt)+ min(Age))
# TR: changed from monoH.FC to hyman 3.3.2021
y1 <- splinefun(y ~ AgePred, method = "hyman")(AgeS)
out <- diff(y1)
@@ -1119,7 +1123,7 @@ graduate_mono <- function(
#' # one may wish to instead rescale results colSums() of
#' # popg at age pivotAge and higher.
#' sum(grabill.closed.out) - sum(popvec)
-#' # also works on an age-labelled vector of data
+#' # also works on an age-labeled vector of data
#' closed.vec <- graduate_mono_closeout(popvec, Age = a5, OAG = TRUE)
#' # let's compare this one with sprague()
@@ -1218,7 +1222,7 @@ graduate_mono_closeout <-
#'
#' \code{OAnew} cannot be higher than \code{max(Age)+4} for \code{"sprague"} or \code{"beers"} methods. For \code{"uniform","mono","pclm"} it can be higher than this, and in each case the open age group is completely redistributed within this range, meaning it's not really open anymore.
#'
-#' For all methods, negative values are detected in output. If present, we deal with these in the following way: we take the geometric mean between the given output (with negative imputed with 0s) and the output of \code{graduate_mono()}, which is guaranteed non-negative. This only affects age groups where negatives were produced in the first pass. In our experience this only arises when using sprague, beers, or grabill methods, whereas all others are guarateed non-negative.
+#' For all methods, negative values are detected in output. If present, we deal with these in the following way: we take the geometric mean between the given output (with negative imputed with 0s) and the output of \code{graduate_mono()}, which is guaranteed non-negative. This only affects age groups where negatives were produced in the first pass. In our experience this only arises when using Sprague, Beers, or Grabill methods, whereas all others are guaranteed non-negative.
#'
#' For any case where input data are in single ages, constraining results to sum to values in the original age groups will simply return the original input data, which is clearly not your intent. This might arise when using graduation as an implicit two-step smoother (group + graduate). In this case, separate the steps, first group using \code{groupAges()} then use \code{graduate(..., constrain = TRUE)}.
#'
@@ -1420,7 +1424,7 @@ graduate <- function(Value,
}
n <- length(out)
- a1 <- min(Age):(n - 1)
+ a1 <- min(Age):(min(Age) + n - 1)
# detect negatives. Have default option to replace.
# Favor quick over perfect, since this only can arise
diff --git a/R/interp_coh.R b/R/interp_coh.R
index b23df2a76..78fd02c4f 100644
--- a/R/interp_coh.R
+++ b/R/interp_coh.R
@@ -27,34 +27,34 @@ shift_census_ages_to_cohorts <- function(pop,
date,
censusYearOpt = "frac",
OAG = TRUE){
-
-
+
+
stopifnot(is_single(age))
-
+
date <- dec.date(date)
yr <- floor(date)
f1 <- date - yr
-
+
if (OAG){
N <- length(pop)
pop <- pop[-N]
age <- age[-N]
}
-
+
if (is.na(censusYearOpt)){
censusYearOpt <- "NA"
}
-
+
upper_part_of_cohort <- pop * f1
lower_part_of_cohort <- pop * (1 - f1)
-
+
shift <- ceiling(f1)
pop_out <- shift.vector(lower_part_of_cohort,shift) + upper_part_of_cohort
-
+
cohorts <- yr - age - 1 + shift
-
+
age_out <- round(f1) + age
-
+
if (censusYearOpt == "drop"){
pop_out <- pop_out[-1]
age_out <- age_out[-1]
@@ -68,7 +68,7 @@ shift_census_ages_to_cohorts <- function(pop,
if (censusYearOpt == "NA"){
pop_out[1] <- NA_real_
}
-
+
list(cohort_size = pop_out,
birth_year = cohorts,
age = age_out,
@@ -76,8 +76,8 @@ shift_census_ages_to_cohorts <- function(pop,
f1 = f1)
}
-#' component-free intercensal cohort interpolation
-#' @description Cohorts between two censuses are interpolated flexibly using linear, exponential, or power rules. The lower and upper intercensal triangles are filled using within-age interpolation. This function is experimental and still in development.
+#' Cohort component intercensal interpolation
+#' @description Cohorts between two censuses are interpolated using a cohort component approach.
#' @seealso interp
#' @param c1 numeric vector. The first (left) census in single age groups
#' @param c2 numeric vector. The second (right) census in single age groups
@@ -96,6 +96,7 @@ shift_census_ages_to_cohorts <- function(pop,
#' @param midyear logical. `FALSE` means all Jan 1 dates between `date1` and `date2` are returned. `TRUE` means all July 1 intercensal dates are returned.
#' @param verbose logical. Shall we send informative messages to the console?
#' @param ... optional arguments passed to
+#' @details The basic approach is to i) align the censuses to single-year cohorts by blending adjacent ages assuming that the birthdays in each age group are uniformly distributed through the year ii) decrement the first census forward within cohorts using period-cohort survival probabilities calculated from (supplied or downloaded) `l(x)` values, iii) redistribute the residual at the time of the second census uniformly over time within cohorts. These steps are always done on Jan 1 reference dates. If `midyear = TRUE`, then we do within-age band arithmetic interpolation to July 1 reference dates.
#' @export
#' @importFrom data.table := as.data.table melt data.table dcast between
#' @examples
@@ -113,29 +114,29 @@ shift_census_ages_to_cohorts <- function(pop,
#' )
#' }
interp_coh <- function(
- c1,
- c2,
- date1,
- date2,
- age1 = 1:length(c1) - 1,
- age2 = 1:length(c2) - 1,
- dates_out = NULL,
- lxMat = NULL,
- age_lx = NULL,
- dates_lx = NULL,
- births = NULL,
- years_births = NULL,
- location = NULL,
- sex = "both",
- midyear = FALSE,
- verbose = TRUE,
- ...
- ) {
-
+ c1,
+ c2,
+ date1,
+ date2,
+ age1 = 1:length(c1) - 1,
+ age2 = 1:length(c2) - 1,
+ dates_out = NULL,
+ lxMat = NULL,
+ age_lx = NULL,
+ dates_lx = NULL,
+ births = NULL,
+ years_births = NULL,
+ location = NULL,
+ sex = "both",
+ midyear = FALSE,
+ verbose = TRUE,
+ ...
+) {
+
# convert the dates into decimal numbers
date1 <- dec.date(date1)
date2 <- dec.date(date2)
-
+
res_list <- rup(
c1 = c1,
c2 = c2,
@@ -155,78 +156,78 @@ interp_coh <- function(
verbose = verbose,
... = ...
)
-
+
pop_jan1 <- res_list$pop_jan1
dates_out <- res_list$dates_out
-
+
. <- NULL
age <- NULL
discount <- NULL
pop_jan1_pre <- NULL
resid <- NULL
year <- NULL
-
+
# add "cumulative" residual to the RUP (pop_jan1_pre)
- pop_jan1[, `:=`(pop_jan1 = pop_jan1_pre + resid * discount)]
+ pop_jan1[, `:=`(pop_jan1 = pop_jan1 + resid * discount)]
pop_jan1 <- pop_jan1[!is.na(cohort)]
-
+
# TR: to get residualmigbeta prelim result, one takes the cumulative
# resid (resid * discount), then decumulates it (within cohorts!),
# then sum over age. boo ya Lexis
-
+
PopAP <-
pop_jan1 %>%
.[, list(age, year, pop_jan1)] %>%
data.table::dcast(age ~ year, value.var = "pop_jan1") %>%
.[order(age)]
-
-
+
+
matinterp <- PopAP[age <= max(age1), -1] %>% as.matrix()
rownames(matinterp) <- age1
-
+
# Handle NAs perhaps c1 needs OPAG beforehand?)
ind <- is.na(matinterp)
if (any(ind) & verbose){
cat("\n",sum(ind),"NA detected in output.\nThese have been imputed with 0s.\nThis could happen in the highest ages,\nand you may consider extending the open ages of the census inputs?\n")
matinterp[ind] <- 0
}
-
+
# Handle negatives (small pops, or large negative residuals relative to pop size)
ind <- matinterp < 0
if (any(ind) & verbose){
cat("\n",sum(ind),"negatives detected in output.\nThese have been imputed with 0s.\n")
matinterp[ind] <- 0
}
-
+
yrsIn <- as.numeric(colnames(matinterp))
if (all(yrsIn > date1)){
matinterp <- cbind(c1, matinterp)
yrsIn <- c(date1, yrsIn)
}
-
+
if (all(yrsIn < date2)){
matinterp <- cbind(matinterp, c2[1:length(c2)])
yrsIn <- c(yrsIn, date2)
}
-
+
colnames(matinterp) <- yrsIn
# now we either return Jan1 dates or July 1 dates.
-
+
out <- interp(
matinterp,
datesIn = yrsIn,
datesOut = as.numeric(dates_out),
rule = 1
)
-
+
if (any(out < 0)) {
if (verbose) {
cat("\nSome of the interpolated values resulted to be negative, replacing with zeroes\n") #nolintr
}
-
+
out[out < 0] <- 0
}
-
+
out
}
@@ -629,6 +630,8 @@ lt_a2s_chunk <- function(chunk, OAnew, ...){
# dplyr::bind_cols() %>%
# as.matrix())
+# ZZZ FLAG for possible depreation
+
interp_coh_lxMat_pxt <- function(lxMat,
dates_lx,
age_lx,
@@ -641,23 +644,23 @@ interp_coh_lxMat_pxt <- function(lxMat,
# fixed.
date1 <- dec.date(date1)
date2 <- dec.date(date2)
-
+
year1 <- floor(date1) + 1
year2 <- floor(date2)
-
+
year_seq <- year1:year2
-
+
dates_out <- c(dec.date(date1), year_seq)
-
+
# get ndx andnLx from lt_abridged()
-
+
a1 <- 0:OAnew
qx1 <- matrix(ncol = ncol(lxMat),
nrow = length(a1),
dimnames = list(a1,
dates_lx))
for (i in 1:ncol(lxMat)){
-
+
if (is_abridged(age_lx)){
# LTA <- lt_abridged(Age = age_lx,
# lx = lxMat[, i],
@@ -671,46 +674,52 @@ interp_coh_lxMat_pxt <- function(lxMat,
qx1[, i] <- LT1$nqx
} else {
qx <- lt_id_l_q(lxMat[, i])
-
+
LT1 <- lt_single_qx(nqx = qx,
Age=1:length(qx)-1,
OAnew = OAnew,
...)
-
-
+
+
qx1[, i] <- LT1$nqx
}
-
+
}
-
+
# We do linear interpolation of the logit-transformed qx.
logit_qx <- log(qx1 / (1 - qx1))
-
+
logit_qx_interp <-
interp(
popmat = logit_qx,
datesIn = dates_lx,
datesOut = dates_out,
- rule = 2)
+ rule = 2,
+ negatives = TRUE)
# transform back
QX <- exp(logit_qx_interp) / (1 + exp(logit_qx_interp))
-
QX[nrow(QX), ] <- 1
-
-
+
f1 <- diff(dates_out)[1]
f2 <- date2 - floor(date2)
-
+
+ # get Sx (keep PX name)
+ PX <- apply(QX,2,function(q){lt_single_qx(nqx = q,
+ Age=a1,
+ OAnew = OAnew,
+ ...)$Sx})
+ rownames(PX) <- rownames(QX)
# assume linear px change within age class
- PX <- 1 - QX
+ # PX <- 1 - QX
+ # IW: PX^f1 is not bad. Other option is PX=(1-QX*f). One assumes linear on d_x, the other constant mu_x.
PX[,1] <- PX[, 1] ^f1
PX[,ncol(PX)] <- PX[, ncol(PX)] ^f2
-
-
+
+
PX
}
-
+### ZZZ FLAG for redux and/or deprecation / name change
transform_pxt <- function(lxMat,
location,
sex,
@@ -721,20 +730,22 @@ transform_pxt <- function(lxMat,
age_lx,
age1,
...) {
-
+
# get the lexis surface of survival probabilities
if (is.null(lxMat)){
- pxt <- suppressMessages(
+ # ZZZ Note this already returns Sx. that was an easy fix.
+ pxt <- suppressMessages(
interp_coh_download_mortality(location = location,
sex = sex,
date1 = date1,
date2 = date2,
- OAnew = max(age1) + 1,
+ OAnew = max(age1),
verbose = verbose)
)
} else {
-
+
+ ### FLAG This all needs to change.
if (is.null(dates_lx)){
# if lx dates not given we assume dates evenly distributed from date1 to date2?
dates_lx <- seq(date1,date2,length.out = ncol(lxMat))
@@ -742,10 +753,10 @@ transform_pxt <- function(lxMat,
cat("lxMat specified, but not dates_lx\nAssuming:",paste(dates_lx,collapse=", "),"\n")
}
}
-
+
available_dates <- data.table::between(dates_lx, date1, date2)
if (!all(available_dates)) stop("All `dates_lx` must be within the range of `date1` and `date2`")
-
+
# if the shortest distance from dates_lx to date1 or date2 is greater than 7
# warn
dates_df <- expand.grid(dates_lx = dates_lx, dates = c(date1, date2))
@@ -761,20 +772,20 @@ transform_pxt <- function(lxMat,
") is greater than 7 years. Be wary."
)
}
-
- ic_period <- date2 - date1
- lx_mm <- range(dates_lx)
- overlap <- min(c(lx_mm[2], date2)) - c(max(lx_mm[1], date1))
- extrap_low <- lx_mm[1] - min(lx_mm[1],date1)
+
+ ic_period <- date2 - date1
+ lx_mm <- range(dates_lx)
+ overlap <- min(c(lx_mm[2], date2)) - c(max(lx_mm[1], date1))
+ extrap_low <- lx_mm[1] - min(lx_mm[1],date1)
extrap_high <- max(lx_mm[2],date2) - lx_mm[2]
t1 <- overlap / ic_period < .25
t2 <- extrap_low > 6
t3 <- extrap_high > 6
if (any(c(t1, t2, t3))) cat("\nRange between `date1` and `date2` must overlap with `lx_dates` for at least 25% of the range or 6 years.\n")
-
+
if (is.null(age_lx)){
if (nrow(lxMat) < 26){
-
+
N <- nrow(lxMat)
age_lx <- c(0,1,seq(5,5*(N-2),by=5))
} else {
@@ -784,7 +795,7 @@ transform_pxt <- function(lxMat,
cat("lxMat specified, but Age_lx missing\nAssuming:",paste(age_lx,collapse=", "),"\n")
}
}
-
+
# ensure lx fills timepoints.
# would like to pass ... here for the lifetable part
pxt <- interp_coh_lxMat_pxt(
@@ -793,11 +804,11 @@ transform_pxt <- function(lxMat,
age_lx = age_lx,
date1 = date1,
date2 = date2,
- OAnew = max(age1) + 1,
+ OAnew = max(age1),
control = list(deg = 3, lambda = 100),
...)
}
-
+
pxt
}
@@ -807,12 +818,12 @@ check_args <- function(lxMat, births, location, age1, age2, c1, c2, verbose) {
stopifnot(length(age2) == length(c2))
stopifnot(is_single(age1))
stopifnot(is_single(age2))
-
+
if (length(age1) != length(age2) & verbose){
cat("\nFYI: age ranges are different for c1 and c2\nWe'll still get intercensal estimates,\nbut returned data will be chopped off after age", max(age1), "\n")
}
-
-
+
+
# If lxMat or births are missing -- message requiring location and sex
if (is.null(lxMat) & is.null(location)) {
stop("lxMat not specified, please specify location and sex\n")
@@ -820,22 +831,22 @@ check_args <- function(lxMat, births, location, age1, age2, c1, c2, verbose) {
if (is.null(births) & is.null(location)) {
stop("births not specified, please specify location and sex\n")
}
-
+
if (!is.null(lxMat) && ncol(lxMat) == 1) {
stop("lxMat should have at least two or more dates as columns. lxMat contains only one column") #nolintr
}
-
+
if (any(c1 < 0)) stop("No negative values allowed in `c1`")
if (any(c2 < 0)) stop("No negative values allowed in `c2`")
if (any(lxMat < 0)) stop("No negative values allowed in `lxMat`")
-
+
}
# If dates_out not given, then we resolve using the midyear argument.
# If FALSE (default) we return intermediate Jan 1, not including c1 and c2
# If TRUE we return intermediate July 1 (.5) dates, not including c1 and c2
transform_datesout <- function(dates_out, date1, date2, midyear) {
-
+
if (is.null(dates_out)){
if (! midyear){
# jan 1 dates
@@ -854,28 +865,29 @@ transform_datesout <- function(dates_out, date1, date2, midyear) {
dates_out <- dates_out[dates_out_lgl]
}
}
-
+
dates_out
}
+### ZZZ FLAG for redux or deprecation
reshape_pxt <- function(
- pxt,
- births,
- c1,
- c2,
- age1,
- age2,
- date1,
- date2,
- f1,
- f2,
- yrs_births
- ) {
-
+ pxt,
+ births,
+ c1,
+ c2,
+ age1,
+ age2,
+ date1,
+ date2,
+ f1,
+ f2,
+ yrs_births
+) {
+
# Since we're using data.table, we need to create these empty
# variables to avoid having R CMD checks with no visible binding
# for global variable.
-
+
age <- NULL
year <- NULL
px <- NULL
@@ -893,146 +905,150 @@ reshape_pxt <- function(
discount <- NULL
.N <- NULL
. <- NULL
-
+
px_triangles <-
pxt %>%
data.table::as.data.table(keep.rownames = "age") %>%
data.table::melt(
- id.vars = "age",
- variable.name = "year",
- value.name = "px",
- variable.factor = FALSE
- )
-
+ id.vars = "age",
+ variable.name = "year",
+ value.name = "px",
+ variable.factor = FALSE
+ )
+
# No need for assignment: data.table assigns without creating a copy
px_triangles[, `:=`(age = as.numeric(age),
year = as.numeric(year),
lower = magrittr::raise_to_power(px, 0.5),
upper = magrittr::raise_to_power(px, 1 - 0.5))]
-
+
px_triangles <-
px_triangles[, list(age, year, lower, upper)] %>%
data.table::melt(
- id.vars = c("age", "year"),
- measure.vars = c("lower", "upper"),
- variable.name = "triangle",
- value.name = "value",
- variable.factor = FALSE
- )
-
+ id.vars = c("age", "year"),
+ measure.vars = c("lower", "upper"),
+ variable.name = "triangle",
+ value.name = "value",
+ variable.factor = FALSE
+ )
+
px_triangles[, `:=`(adj = ifelse(triangle == "upper", 1, 0))]
px_triangles[, `:=`(cohort = magrittr::subtract(year, age) %>% magrittr::subtract(adj) %>% floor())]
-
+
# cohort changes over the whole period
# px_cum1 <- px_triangles[, list(n_triangles = .N, coh_p = prod(value)),
# keyby = list(cohort)]
-
+
# adjust the census population vectors
c1c <- shift_census_ages_to_cohorts(c1, age1, date1, censusYearOpt = "frac")
c2c <- shift_census_ages_to_cohorts(c2, age2, date2, censusYearOpt = "frac")
-
+
# correction for the first year age 0 -- only take first for the remaining of
# the year
births[1] <- births[1] * (1 - f1)
-
+
# correction for the last year age 0
n_yrs <- length(births)
births[n_yrs] <- births[n_yrs] * f2
-
+
cohort_dt <-
data.table::data.table(
- cohort = yrs_births,
- pop = births
- )
-
+ cohort = yrs_births,
+ pop = births
+ )
+
input <-
data.table::data.table(cohort = c1c$birth_year, pop = c1c$cohort_size) %>%
.[order(cohort)] %>%
rbind(cohort_dt) %>%
.[, list(pop = sum(pop)), keyby = list(cohort)]
-
+
# population c2 observed
pop_c2 <- data.frame(
cohort = c2c$birth_year,
pop_c2_obs = c2c$cohort_size
)
-
+
pop_jan1_pre <-
px_triangles %>%
.[, list(n_triangles = .N, coh_p = prod(value)), keyby = list(year, cohort)] %>%
.[order(cohort, year)]
-
+
pop_jan1_pre[, `:=`(coh_lx = cumprod(coh_p)), keyby = list(cohort)]
-
+
pop_jan1_pre <- pop_jan1_pre[input, on = "cohort"]
-
+
pop_jan1_pre[, `:=`(
- pop_jan1_pre = pop * coh_lx,
+ pop_jan1 = pop * coh_lx,
age = floor(year) - cohort,
year = floor(year) + 1
)]
-
+
pop_jan1_pre[, `:=`(year = ifelse(year == max(year), year + f2 - 1, year))]
-
# calculate the discrepancy (migration) -- to be disrtibuted uniformly in
# cohorts
resid <-
pop_jan1_pre %>%
.[year == max(year)] %>%
.[pop_c2, on = "cohort"]
-
- resid[, `:=`(resid = pop_c2_obs - pop_jan1_pre)]
+
+ resid[, `:=`(resid = pop_c2_obs - pop_jan1)]
# Only used in the process for diagnostics
# resid[, `:=`(rel_resid = resid / pop_c2_obs)]
resid <- resid[, list(cohort, resid)]
-
+
# This should just be one value per cohort.
-
+
# determine uniform error discounts:
resid_discounts <-
stats::approx(
- x = c(date1, date2),
- y = c(0, 1),
- xout = yrs_births
- ) %>%
+ x = c(date1, date2),
+ y = c(0, 1),
+ xout = yrs_births
+ ) %>%
data.table::as.data.table() %>%
.[, list(year = x, discount = y)]
-
+
# output
pop_jan1 <-
pop_jan1_pre %>%
merge(resid, by = "cohort", all = TRUE) %>%
merge(resid_discounts, by = "year", all = TRUE)
-
+
# for the residual discount, account for boundaries
pop_jan1[, `:=`(
resid = ifelse(is.na(resid), 0, resid),
discount = ifelse(year == max(year), 1, discount)
)]
-
-
+
+
pop_jan1
}
+### ZZZ FLAG for redux
+### after the under-the hood changes are redone to back out Sx,
+### the top level args could expand it optionally let the user
+### also provide nLxMat
rup <- function(
- c1,
- c2,
- date1,
- date2,
- age1,
- age2,
- dates_out,
- lxMat,
- age_lx,
- dates_lx,
- births,
- years_births,
- location,
- sex,
- midyear,
- verbose,
- ...
- ) {
+ c1,
+ c2,
+ date1,
+ date2,
+ age1,
+ age2,
+ dates_out,
+ lxMat,
+ age_lx,
+ dates_lx,
+ births,
+ years_births,
+ location,
+ sex,
+ midyear,
+ verbose,
+ ...
+) {
+
check_args(
lxMat = lxMat,
births = births,
@@ -1043,11 +1059,11 @@ rup <- function(
c2 = c2,
verbose = verbose
)
-
+
if (is.na(date1) | is.na(date2)){
stop("\nCensus dates didn't parse\n")
}
-
+
# TR: resolve dates_out
# if some dates were given, let's coerce to numeric and ensure valid
if (!is.null(dates_out)){
@@ -1059,15 +1075,15 @@ rup <- function(
if (length(dates_out) == 0){
stop("\nno valid dates to interpolate to\n")
}
-
+
# if we still have valid dates, then check we're not extrapolating
dates_out_keep <- data.table::between(dates_out,
date1,
date2,
incbounds = FALSE)
-
+
dates_out_for_real <- dates_out[dates_out_keep]
-
+
# warn about any dates lost due to extrap request:
if (length(dates_out_for_real) != length(dates_out) & verbose){
cat("\nFollowing dates requested, but not returned\nbecause they'd require extrapolation:\n",paste(dates_out[!dates_out_keep],collapse = ", "),"\n")
@@ -1076,22 +1092,23 @@ rup <- function(
stop("\nuh oh! This method is strictly for cohort component interpolation\nYour requested dates_out didn't have anything between date1 and date2\n")
}
}
-
+
# If dates_out not given, then we resolve using the midyear argument.
# If FALSE (default) we return intermediate Jan 1, not including c1 and c2
# If TRUE we return intermediate July 1 (.5) dates, not including c1 and c2
dates_out <- transform_datesout(dates_out, date1, date2, midyear)
-
+
DD <- date2 - date1
if (DD >= 15 & verbose){
cat("\nFYI, there are",DD,"years between c1 and c2\nBe wary.\n")
}
-
+
# let's store the proportions separately
f1 <- date1 %>% magrittr::subtract(date1 %>% floor)
f2 <- date2 %>% magrittr::subtract(date2 %>% floor)
-
+
# And download if needed
+ # ZZZ FLAG: this step may
pxt <- transform_pxt(
lxMat = lxMat,
location = location,
@@ -1104,7 +1121,7 @@ rup <- function(
age1 = age1,
... = ...
)
-
+
yrs_births <- seq(floor(date1), floor(date2), 1)
# TR: if right-side is jan 1 then we can cut it off of pxt.
if (f2 == 0){
@@ -1112,7 +1129,7 @@ rup <- function(
yrs_births <- yrs_births[-length(yrs_births)]
f2 <- 1
}
-
+
# Download wpp births if needed
births <-
fetch_wpp_births(
@@ -1122,23 +1139,26 @@ rup <- function(
sex = sex,
verbose = verbose
)
-
+
# check length of births, also filter using provided dates if necessary
if (!is.null(years_births)){
stopifnot(length(births) == length(years_births))
-
+
years_births <- floor(years_births)
yrs_keep <- data.table::between(years_births,
min(yrs_births),
max(yrs_births),
incbounds = TRUE)
-
+
births <- births[yrs_keep]
}
-
+
# now that births should be available we can do this check.
stopifnot(length(births) == length(yrs_births))
-
+
+
+ # ZZZ FLAG this may or may not stay same. pxt could just be different values,
+ # and perhaps this would work. However, edge conditions need exa
pop_jan1 <- reshape_pxt(
pxt = pxt,
births = births,
@@ -1152,7 +1172,7 @@ rup <- function(
f2 = f2,
yrs_births = yrs_births
)
-
+
list(
pop_jan1 = pop_jan1,
dates_out = dates_out
diff --git a/R/interp_lc_lim.R b/R/interp_lc_lim.R
index 281cbf434..da6899462 100644
--- a/R/interp_lc_lim.R
+++ b/R/interp_lc_lim.R
@@ -1,10 +1,10 @@
#' Lee-Carter method with limited data.
#'
-#' @description Given a data frame with dates, sex and mortality data by age (rates, conditionated probabilities of death
+#' @description Given a data frame with dates, sex and mortality data by age (rates, conditioned probabilities of death
#' or survival function), this function interpolate/extrapolate life tables
#' using the method for limited data suggested by Li et. al (2004) (at least three observed years).
#'
-#' @details Based on spreedsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
+#' @details Based on spreadsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
#' Useful for abridged or single ages, and allows output in both formats also.
#' One option is the use of non-divergent method for sex coherency (Li & Lee, 2005).
#' The other is the possibility of fitting `"k"` to replicate `"e_0"` at some given dates.
@@ -13,7 +13,7 @@
#'
#' @param input data.frame with cols: Date, Sex, Age, nMx (opt), nqx (opt), lx (opt)
#' @param dates_out numeric. Vector of decimal years to interpolate or extrapolate.
-#' @param Single logical. Wheter or not the lifetable output is by single ages.
+#' @param Single logical. Whether or not the lifetable output is by single ages.
#' @param dates_e0 numeric. Vector of decimal years where `"e_0"` should be fitted when apply method.
#' @param e0_Males numeric. Vector of life expectancy by year to be fitted. Same length than `"dates_e0"`.
#' @param e0_Females numeric. Vector of life expectancy by year to be fitted. Same length than `"dates_e0"`.
@@ -287,13 +287,22 @@ interp_lc_lim <- function(input = NULL,
e0_Males),
dates_e0,
dates_out,
- extrap = TRUE)[1, ]
+ extrap = TRUE)
e0f <- interp(rbind(e0_Females,
e0_Females),
dates_e0,
dates_out,
- extrap = TRUE)[1, ]
+ extrap = TRUE)
+
+ # IW: issue with dimension in case the interpolation is for 1 date only
+ if(ndates_out==1){
+ e0m = e0m[1]
+ e0f = e0f[1]
+ }else{
+ e0m = e0m[1,]
+ e0f = e0f[1,]
+ }
# avoid divergence: same bx but not kt.
if (prev_divergence){
@@ -304,7 +313,7 @@ interp_lc_lim <- function(input = NULL,
ktm_star = ktf_star = c()
for (j in 1:ndates_out){
ktm_star[j] <- optimize(f = interp_lc_lim_kt_min,
- interval = c(-20, 20),
+ interval = c(-50, 50),
ax = axm,
bx = bxm,
age = Age,
@@ -312,7 +321,7 @@ interp_lc_lim <- function(input = NULL,
e0_target = e0m[j],
...)$minimum
ktf_star[j] <- optimize(f = interp_lc_lim_kt_min, # TR: add ...
- interval = c(-20, 20),
+ interval = c(-50, 50),
ax = axf,
bx = bxf,
age = Age,
@@ -414,13 +423,13 @@ interp_lc_lim_abk_m <- function(k,ax,bx){
}
# estimate LC for limited data
-#' Estimate LC with limited data params
+#' Estimate LC with limited data parameters
#' @description Estimate LC with limited data from a matrix of rates (age by dates).
-#' @details SVD for ax and bx. Fit a simmple linear model for k and interp/extrapolate for objective dates.
-#' @param M numeric. Matrix with many rows as ages and columns as dates_in.
+#' @details SVD for `ax` and `bx.` Fit a simple linear model for `k` and interpolate/extrapolate for objective dates.
+#' @param M numeric. Matrix with many rows as ages and columns as `dates_in`.
#' @param dates_in numeric. Vector of dates with input rates.
#' @param dates_out numeric. Vector of dates for estimate a set of rates.
-#' @param SVD logical. Use Singular Value Decomposition for estimate b and k or Maximum Likelihood Estimation. Default `FALSE` for Maximum Likelihood Estimation.
+#' @param SVD logical. Use Singular Value Decomposition for estimate `b` and `k` or Maximum Likelihood Estimation. Default `FALSE` for Maximum Likelihood Estimation.
#' @references
#' \insertRef{Li2004}{DemoTools}
#' @export
@@ -452,7 +461,7 @@ interp_lc_lim_estimate <- function(M, dates_in, dates_out, SVD = F){
# smooth rule previous to solve ambiguous
#' Smooth and apply lt_ambiguous
#' @description Considering different mortality input for each sex/year data,
-#' smooth olders with makeham or kannisto in case no law was specified,
+#' smooth older ages with makeham or kannisto in case no law was specified,
#' and return a data.frame with standard LT.
#' @details Makeham is chosen if last age is less than 90. Else Kannisto.
#' @param input data.frame. with cols: Date, Sex, Age, nMx (opt), nqx (opt), lx (opt)
diff --git a/R/interp_lc_lim_group.R b/R/interp_lc_lim_group.R
index 502ab21e9..efbdc6790 100644
--- a/R/interp_lc_lim_group.R
+++ b/R/interp_lc_lim_group.R
@@ -9,11 +9,11 @@
# text/messages/warnings. Specially the case when no `id` is given
#'
-#' @description Given a data frame with groups (country, region, sex), dates, sex and mortality data by age (rates, conditionated probabilities of death
+#' @description Given a data frame with groups (country, region, sex), dates, sex and mortality data by age (rates, conditioned probabilities of death
#' or survival function), this function interpolate/extrapolate life tables
#' using the method for limited data suggested by Li et. al (2004) (at least three observed years).
#'
-#' @details Based on spreedsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
+#' @details Based on spreadsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
#' Useful for abridged or single ages, and allows output in both formats also.
#' One option is the use of non-divergent method for sex coherency (Li & Lee, 2005).
#' The other is the possibility of fitting `"k"` to replicate `"e_0"` at some given dates.
@@ -23,9 +23,9 @@
#' @note Draft Version
#'
#' @param input data.frame. Columns: id, Date, Sex, Age, nMx (opt), nqx (opt), lx (opt).
-#' The first column (id) cn be a numeric index or charcter vector identifying each group.
+#' The first column (id) can be a numeric index or character vector identifying each group.
#' @param dates_out numeric. Vector of decimal years to interpolate or extrapolate.
-#' @param Single logical. Wheter or not the lifetable output is by single ages.
+#' @param Single logical. Whether or not the lifetable output is by single ages.
#' @param input_e0 data.frame with cols: id, Date, Sex and `"e_0"`. This should be fitted when apply method.
#' @param prev_divergence logical. Whether or not prevent divergence and sex crossover between groups. Default `FALSE.`
#' @param weights list. For `prev_divergence` option. A double for each element of a list with names as `id` columns. Should sum up to 1. Default: same weight for each group.
diff --git a/R/log_quad_augm.R b/R/log_quad_augm.R
new file mode 100644
index 000000000..fcbb921e7
--- /dev/null
+++ b/R/log_quad_augm.R
@@ -0,0 +1,176 @@
+
+# author: IW --------------------------------------------------------------
+
+#' HMD pattern for adult ages.
+#' @description Adjust rates in oldest ages using HMD pattern, based on log-quad method.
+#' @details One possible scenario when mortality data on last ages is not reliable, is to use a mortality pattern with some known index in previous ages.
+#' This function gives a HMD pattern based on 5q0 and 45q15, using log-quad model. Additionally, a value on mortality between 60 and 75 can be included to make a better adjustment in level.
+#' @param nMx numeric. Vector of mortality rates in abridged age classes.
+#' @param Age integer. Single ages (abridged not allowed).
+#' @param Sex character. Either male \code{"m"}, female \code{"f"}, or both \code{"b"}.
+#' @param q0_5 numeric. Probability of death from born to age 5. By default implicit values in `nMx` should be entered.
+#' @param q15_45 numeric. Probability of death from age 15 to age 60. By default implicit values in `nMx` should be entered.
+#' @param fitted_logquad Optional, defaults to \code{NULL}. An object of class
+#' \code{wilmoth}. If full HMD is not enough, one
+#' can fit a Log-Quadratic (\url{https://github.com/mpascariu/MortalityEstimate}) model
+#' based on any other collection of life tables;
+#' @param q60_15 numeric. Probability of death from age 60 to age 75. When external information on those ages level is available,
+#' can be included to increase parameter `ax` from log-quad model in last ages (Li, 2003).
+#' @param Age_transition integer. Form which age should transition to HMD pattern starts.
+#' @param window_transition integer. Number of ages to the left and to the right of `Age_transition` to do a log-linear transition in rates.
+#' @param plot_comparison Show or not a plot with the result.
+#' @param ... Other arguments to be passed on to the \code{lt_single} function.
+#' @export
+#' @return life table as in \code{lt_single} function.
+#' @examples
+#' # Mortality rates from UN Chilean with e0=70. Wat would be the rates based on HMD pattern?
+#' # In this case making a transition of 10 years at age 80, and returning an OAG=100.
+#' \dontrun{
+#' lt <- DemoToolsData::modelLTx1
+#' lt <- lt[lt$family == "Chilean" & lt$sex == "female" & lt$e0 == 70,]
+#' chilean70_adjHMD <- HMD_old_logquad(nMx = lt$mx1,
+#' Age = lt$age,
+#' Sex = "f",
+#' q0_5 = 1 - lt$lx1[lt$age==5]/lt$lx1[lt$age==0],
+#' q15_45 = 1 - lt$lx1[lt$age==60]/lt$lx1[lt$age==15],
+#' Age_transition = 80,
+#' window_transition = 10,
+#' plot_comparison = TRUE,
+#' OAnew = 100)
+#' # We know (as an example) that q60_15 is .5 higher than what HMD pattern would be.
+#' chilean70_adjHMD_augm <- HMD_old_logquad(nMx = lt$mx1,
+#' Age = lt$age,
+#' Sex = "f",
+#' q0_5 = 1 - lt$lx1[lt$age==5]/lt$lx1[lt$age==0],
+#' q15_45 = 1 - lt$lx1[lt$age==60]/lt$lx1[lt$age==15],
+#' q60_15 = (1 - lt$lx1[lt$age==75]/lt$lx1[lt$age==60]) * 1.5,
+#' Age_transition = 80, window_transition = 10,
+#' OAnew = 100, plot_comparison = TRUE)
+#' }
+
+HMD_old_logquad <- function(nMx, Age = NULL,
+ Sex = "b",
+ q0_5 = NULL, q15_45 = NULL,
+ q60_15 = NULL,
+ Age_transition = 80,
+ window_transition = 3,
+ plot_comparison = FALSE,
+ fitted_logquad = NULL,
+ ...){
+
+ # check if age is not complete
+ if(!is_single(Age)) stop("Need rates by single age.")
+ if(is.null(Age)) Age <- 0:length(nMx)
+
+ # check if an optional fitted_logquad is specified
+ if(is.null(fitted_logquad)){
+ if(Sex == "b"){
+ fitted_logquad <- DemoTools::fitted_logquad_b
+ }
+ if(Sex == "f"){
+ fitted_logquad <- DemoTools::fitted_logquad_f
+ }
+ if(Sex == "m"){
+ fitted_logquad <- DemoTools::fitted_logquad_m
+ }
+ }
+
+ # load the log-quad model already fitted in the package (different from MortalityEstimate) and estimate with input data
+ logquad_model <- lt_model_lq(Sex = Sex, q0_5 = q0_5, q15_45 = q15_45, fitted_logquad = fitted_logquad)
+ mx_logquad_5q0_45q15 <- logquad_model$lt
+
+ # augmented method if was asked
+ if(!is.null(q60_15)){
+ mx_logquad_5q0_45q15 <- tryCatch({
+ logquad_augmented(coeffs = fitted_logquad$coefficients,
+ k = logquad_model$values$k,
+ Age = logquad_model$lt$Age,
+ q0_5 = q0_5, q60_15 = q60_15, Sex = Sex)},
+ error = function(e) {
+ warning("Augmented log-quad was not possible. Revise q60_15. Returned basic log-quad.")
+ mx_logquad_5q0_45q15})
+ }
+
+ # make it single
+ mx_logquad_5q0_45q15 <- lt_abridged2single(nMx = mx_logquad_5q0_45q15$nMx,
+ Age = mx_logquad_5q0_45q15$Age,
+ lx = mx_logquad_5q0_45q15$lx,
+ Sex = Sex,
+ ...)
+
+ # smooth transition with a given length window
+ Ages <- 0:max(mx_logquad_5q0_45q15$Age)
+ Age_smooth <- (Age_transition-window_transition):(Age_transition+window_transition)
+ Age_not_smooth <- Ages[!Ages %in% Age_smooth]
+ nMx_to_smooth_transition <- data.frame(Age = Age_not_smooth,
+ nMx = c(nMx[Agemax(Age_smooth)]))
+ nMx_interpolated <- exp(stats::approx(x = nMx_to_smooth_transition$Age,
+ y = log(nMx_to_smooth_transition$nMx),
+ xout = Age_smooth)$y)
+ smooth_transtition_nMx <- dplyr::bind_rows(
+ nMx_to_smooth_transition,
+ data.frame(Age = Age_smooth, nMx = nMx_interpolated)) %>%
+ dplyr::arrange(Age)
+
+ # plot diagnostic
+ if(plot_comparison){
+ Type <- NULL
+ df1 <- data.frame(Age = Age, nMx = nMx)
+ df1$Type <- "Input"
+ df2 <- data.frame(Age = smooth_transtition_nMx$Age, nMx = smooth_transtition_nMx$nMx)
+ df2$Type <- "Adjusted"
+ df3 <- data.frame(Age = Age_smooth, nMx = nMx_interpolated)
+ df3$Type <- "Transition"
+ rbind(df1, df2, df3) %>%
+ ggplot2::ggplot(ggplot2::aes(x = Age, y = nMx, color = Type)) +
+ ggplot2::geom_line() +
+ ggplot2::geom_vline(xintercept = Age_transition, linetype = "dashed", color = "grey") +
+ ggplot2::scale_y_log10() +
+ ggplot2::theme_bw()
+ }
+
+ # rebuild lt and finish, with input OAnew if was not defined as additional input argument
+ lt_extra_arguments <- list(...)
+ if(!("OAnew" %in% names(lt_extra_arguments))){OAnew <- max(Age)} else {OAnew <- lt_extra_arguments$OAnew}
+ mx_logquad_5q0_45q15 <- lt_single_mx(nMx = smooth_transtition_nMx$nMx,
+ Age = smooth_transtition_nMx$Age,
+ Sex = Sex,
+ OAnew = OAnew)
+ return(mx_logquad_5q0_45q15)
+}
+
+#' Augmented logquad
+#' @description Adjust rates in oldest ages that comes from a HMD model, using an external estimate of 15q60 (Li, 2014). As an example see\code{\link[DemoTools]{HMD_old_logquad}}.
+#' @details Parameter \code{a(x)} is augmented based on an external estimate of 15q60.
+#' @param coeffs data.frame. Columns \code{a(x)}, \code{b(x)}, \code{c(x)} and \code{v(x)} from fitted logquad model. See \code{fitted_logquad_b}.
+#' @param k numeric. Adult mortality related value from log-quad estimatation based on one or two input parameters. See \code{lt_model_lq}.
+#' @param Sex character. Either male \code{"m"}, female \code{"f"}, or both \code{"b"}.
+#' @param Age integer. Abridged lower bound ages. Same length than rows in `coeffs`.
+#' @param q0_5 numeric. Probability of death from born to age 5.
+#' @param q60_15 numeric. Probability of death from age 60 to age 75.
+#' @param ... Other arguments to be passed on to the \code{lt_abridged} function.
+#' @export
+#' @return life table as in \code{lt_abridged} function.
+#' @references See [Li (2014)](https://www.un.org/development/desa/pd/content/estimating-life-tables-developing-countries).
+
+logquad_augmented <- function(coeffs, k, q0_5, q60_15, Sex = "b", Age, ...){
+
+ # arrange parameters and get rates from the model
+ params <- c(1, log(q0_5), log(q0_5)^2, k)
+ mx_logquad <- exp(rowSums(as.matrix(coeffs) %*% diag(params)))
+
+ # adjust ax with the ratio between input value and implicit value from model
+ q60_15_hat <- q60_15
+ age_q60_15 <- which(Age == 60)
+ q60_15 <- 1 - exp(-5*(sum(mx_logquad[c(age_q60_15, age_q60_15+1, age_q60_15+2)])))
+ coeffs_hat <- coeffs
+ coeffs_hat$ax[age_q60_15:nrow(coeffs_hat)] <- coeffs_hat$ax[age_q60_15:nrow(coeffs_hat)] + log(log(1-q60_15_hat)/log(1-q60_15))
+
+ # new rates with changed ax
+ mx_logquad_augm <- exp(rowSums(as.matrix(coeffs_hat) %*% diag(params)))
+ lt_logquad_augm <- lt_abridged(nMx = mx_logquad_augm, Age = Age, Sex = Sex, ...)
+
+ # output
+ return(lt_logquad_augm)
+}
\ No newline at end of file
diff --git a/R/lt_abridged.R b/R/lt_abridged.R
index 6630faf74..4ff6c3c81 100644
--- a/R/lt_abridged.R
+++ b/R/lt_abridged.R
@@ -20,7 +20,7 @@
#' is aligned with the other columns in all 5-year age groups, but note the
#' first two values have a slightly different age-interval interpretation:
#' In Age 0, the interpretation is survival from birth until interval 0-4.
-#' In Age 1, it is survival from 0-4 into 5-9. Therafter the age groups align.
+#' In Age 1, it is survival from 0-4 into 5-9. Thereafter the age groups align.
#' This column is required for population projections.
#'
#' @param Deaths numeric. Vector of death counts in abridged age classes.
@@ -36,12 +36,13 @@
#' @param region character. North, East, South, or West: code{"n"}, code{"e"}, code{"s"}, code{"w"}. Default code{"w"}.
#' @param IMR numeric. Infant mortality rate \ifelse{html}{\out{q0}}{\eqn{q_0}}, in case available and \code{nqx} is not specified. Default \code{NA}.
#' @param mod logical. If \code{"un"} specified for \code{axmethod}, whether or not to use Nan Li's modification for ages 5-14. Default \code{TRUE}.
-#' @param SRB the sex ratio at birth (boys / girls), detault 1.05
+#' @param SRB the sex ratio at birth (boys / girls), default 1.05
#' @param OAnew integer. Desired open age group (5-year ages only). Default \code{max(Age)}. If higher then rates are extrapolated.
#' @param OAG logical. Whether or not the last element of \code{nMx} (or \code{nqx} or \code{lx}) is an open age group. Default \code{TRUE}.
#' @param extrapLaw character. If extrapolating, which parametric mortality law should be invoked? Options include
#' \code{"Kannisto", "Kannisto_Makeham", "Makeham", "Gompertz", "GGompertz", "Beard", "Beard_Makeham", "Quadratic"}. Default \code{"Kannisto"} if the highest age is at least 90, otherwise `"makeham"`. See details.
#' @inheritParams lt_a_closeout
+#' @importFrom dplyr case_when
#' @export
#' @return Lifetable in data.frame with columns
#' \itemize{
@@ -183,9 +184,30 @@ lt_abridged <- function(Deaths = NULL,
extrapFrom = max(Age),
extrapFit = NULL,
...) {
+
+ # some handy name coercion
+ a0rule <- case_when(a0rule == "Andreev-Kingkade" ~ "ak",
+ a0rule == "Coale-Demeny" ~ "cd",
+ TRUE ~ a0rule)
+ axmethod <- case_when(axmethod == "UN (Greville)" ~ "un",
+ axmethod == "PASEX" ~ "pas",
+ TRUE ~ axmethod)
+ Sex <- substr(Sex, 1, 1) |>
+ tolower()
+ Sex <- ifelse(Sex == "t", "b", Sex)
+
+ region <- substr(region, 1, 1) |>
+ tolower()
+ if (!is.null(extrapLaw)){
+ extrapLaw <- tolower(extrapLaw)
+ }
+
+
+ # now we check args
axmethod <- match.arg(axmethod, choices = c("pas","un"))
Sex <- match.arg(Sex, choices = c("m","f","b"))
a0rule <- match.arg(a0rule, choices = c("ak","cd"))
+
if (!is.null(extrapLaw)){
extrapLaw <- tolower(extrapLaw)
extrapLaw <- match.arg(extrapLaw, choices = c("kannisto",
@@ -318,27 +340,30 @@ lt_abridged <- function(Deaths = NULL,
momega <- nMx[length(nMx)]
}
# --------------------------------
- # begin extrapolation:
- # TR: 13 Oct 2018. always extrapolate to 130 no matter what,
- # then truncate to OAnew in all cases. This will ensure more robust closeouts
- # and an e(x) that doesn't depend on OAnew. 130 is used similarly by HMD.
- x_extr <- seq(extrapFrom, 130, by = 5)
-
- Mxnew <- lt_rule_m_extrapolate(
- x = Age,
- mx = nMx,
- x_fit = extrapFit,
- x_extr = x_extr,
- law = extrapLaw,
- ...)
-
- nMxext <- Mxnew$values
- Age2 <- names2age(nMxext)
-
- keepi <- Age2 < extrapFrom
- nMxext[keepi] <- nMx[Age < extrapFrom]
- nMx <- nMxext
- Age <- Age2
+
+ if (max(Age) < 130){
+ # begin extrapolation:
+ # TR: 13 Oct 2018. always extrapolate to 130 no matter what,
+ # then truncate to OAnew in all cases. This will ensure more robust closeouts
+ # and an e(x) that doesn't depend on OAnew. 130 is used similarly by HMD.
+ x_extr <- seq(extrapFrom, 130, by = 5)
+
+ Mxnew <- lt_rule_m_extrapolate(
+ x = Age,
+ mx = nMx,
+ x_fit = extrapFit,
+ x_extr = x_extr,
+ law = extrapLaw,
+ ...)
+
+ nMxext <- Mxnew$values
+ Age2 <- names2age(nMxext)
+
+ keepi <- Age2 < extrapFrom
+ nMxext[keepi] <- nMx[Age < extrapFrom]
+ nMx <- nMxext
+ Age <- Age2
+ }
AgeInt <- age2int(
Age,
OAG = TRUE,
@@ -416,7 +441,9 @@ lt_abridged <- function(Deaths = NULL,
nMx[N] <- lx[N] / Tx[N]
}
- Sx <- lt_id_Ll_S(nLx, lx, AgeInt, N = 5)
+ Sx <- c(lt_id_Ll_S(nLx, lx, Age, AgeInt, N = 5), 0.0)
+ names(Sx) <- NULL
+
# output is an unrounded, unsmoothed lifetable
out <- data.frame(
Age = Age,
diff --git a/R/lt_id.R b/R/lt_id.R
index 400645dc3..7c6ed00f6 100644
--- a/R/lt_id.R
+++ b/R/lt_id.R
@@ -118,15 +118,16 @@ lt_id_l_d <- function(lx) {
#' @title Derive lifetable death probabilities from survivorship.
#' @description This lifetable identity is the same no matter what kind of lifetable is required.
#' You can find it in any demography textbook.
-#' @details The vector returned is the same length as \code{lx} and it sums to the lifetable radix.
-#' If the radix is one then this is the discrete deaths distribution.
+#' @details The vector returned is the same length as \code{lx}.
#'
#' @param lx numeric. Vector of age-specific lifetable survivorship.
#' @references
#' \insertRef{preston2000demography}{DemoTools}
-#' @return ndx vector of lifetable deaths.
+#' @return `qx` values of age-specific mortality rates. The last value is always 1.0
#' @export
lt_id_l_q <- function(lx) {
+ # TR note if there are trailing 0s in lx then
+ # this can be NaN
dx <- lt_id_l_d(lx)
dx / lx
}
@@ -271,27 +272,47 @@ lt_id_ma_q <- function(nMx, nax, AgeInt, closeout = TRUE, IMR) {
}
#' @title Calculate survivor ratios
-#' @description An extra lifetable column for use in projections, which require uniform time steps both both age and period. Intervals are either single age (\code{N=1}) or five-year ages (\code{N=5}). Input vectors are assumed to come from either single or standard abridged ages.
+#' @description An extra lifetable column for use in projections, which require uniform time steps both both age and period. Intervals are either single age or five-year ages. Input vectors are assumed to come from either single or standard abridged ages. Note that the ages of the output Sx are the ages the population would be after the N-year projection.
#' @details This function does not account for \code{nLx} having been pre-binned into uniform 5-year age widths, which will throw an error. Just leave them in abridged ages instead. Note that in the case of abridged ages, the interpretation for the first and second value don't follow the original abridged age intervals: the first value in the probability of surviving from birth into ages 0-4 in the first five years, and the second value is the probability of surviving from 0-4 to 5-9. This represents a slight misalignment with the rest of the lifetable, user beware.
-#' @inheritParams lt_abridged
#' @param nLx numeric vector of lifetable exposure.
+#' @param lx numeric vector of lifetable survivors from same lifetable than \code{nLx}. Infered radix from nLx in case is \code{NULL}.
+#' @param Age integer vector of starting ages.
+#' @param AgeInt integer vector of age intervals.
#' @param N integer, the age width for survivor ratios, either 5 or 1. Default 5.
#' @export
-lt_id_Ll_S <- function(nLx, lx, AgeInt, N = c(5, 1)) {
+
+lt_id_Ll_S <- function(nLx, lx = NULL, Age, AgeInt = NULL, N = 5) {
+ # number ages
n <- length(nLx)
- stopifnot(length(lx) == n)
+ # same length of vectors
+ stopifnot(length(nLx) == length(Age))
# either we're in 1 or 5 year age groups
stopifnot(length(N) == 1 & N %in% c(5, 1))
- ## compute Sx (missing from the LTbr computation
- Sx <- rep(NA, n)
+ # infer radix in case lx is not given
+ radix <- lx[1]
+ if(is.null(lx)){
+ radix <- ifelse(nLx[1]>1, 10^nchar(trunc(nLx[1])), 1)
+ }
+ # validate nLx. Some zero in ages>100 could happen. YP should be non-zero in ages<80, even in historical pops (?)
+ # IW: relax this condition on YP of non-negative YP because of huge heterogeneity on input lt
+ stopifnot(all(nLx>=0, # nLx[Age<60]>0,
+ nLx[-n] < (radix*N)))
+
+ ## compute Sx (missing from the LTbr computation)
# first age group is survival from births to the second age group
if (N == 5) {
- # double check because assuming abridged nLx is given...
+ Sx <- rep(NA, n-1)
+ # infer AgeInt in case is not given
+ if(is.null(AgeInt)){
+ AgeInt <- inferAgeIntAbr(Age)
+ }
stopifnot(length(AgeInt) == n)
ageintcompare <- inferAgeIntAbr(vec = nLx)
- stopifnot(all(ageintcompare[-n] == AgeInt[-n]))
+ if (Age[1] == 0){
+ stopifnot(all(ageintcompare[-n] == AgeInt[-n]))
+ }
# birth until 0-4
- Sx[1] <- (nLx[1] + nLx[2]) / ((AgeInt[1] + AgeInt[2]) * lx[1])
+ Sx[1] <- (nLx[1] + nLx[2]) / ((AgeInt[1] + AgeInt[2]) * radix)
# second age group is survival age 0-4 to age 5-9
Sx[2] <- nLx[3] / (nLx[1] + nLx[2])
# middle age groups
@@ -299,18 +320,20 @@ lt_id_Ll_S <- function(nLx, lx, AgeInt, N = c(5, 1)) {
Sx[mind] <- nLx[mind + 1] / nLx[mind]
# penultimate age group
Sx[n - 1] <- nLx[n] / (nLx[n - 1] + nLx[n])
- # closeout
- Sx[n] <- 0.0
+ # names of ages at arrive
+ names(Sx) <- seq(0,Age[length(Age)],5)
}
if (N == 1) {
- LLXX <- c(lx[1], nLx)
+ Sx <- rep(NA, n)
+ LLXX <- c(radix, nLx)
mind <- 1:(n - 1)
Sx[mind] <- LLXX[mind + 1] / LLXX[mind]
# closeout
Sx[n] <- nLx[n] / (nLx[n - 1] + nLx[n])
+ # names of ages at arrive
+ names(Sx) <- 0:(n-1)
}
-
-
-
Sx
}
+
+
diff --git a/R/lt_model_lq.R b/R/lt_model_lq.R
index ee601dea1..fa063b710 100644
--- a/R/lt_model_lq.R
+++ b/R/lt_model_lq.R
@@ -12,7 +12,7 @@
#' Estimate Wilmoth Model Life Table
#'
-#' Construct model life tables based on the Log-Quadratic (wilmoth) estimates
+#' Construct model life tables based on the Log-Quadratic (Wilmoth) estimates
#' with various choices of 2 input parameters:
#' \code{q0_5, q0_1, q15_45, q15_35} and \code{e0}. There are 8 possible
#' combinations (see examples below).
@@ -20,7 +20,7 @@
#' @details Due to limitations of the R language the notation for probability
#' of dying \code{nqx} is written \code{qx_n}, where \code{x} and \code{n} are
#' integers. For example \code{45q15} is represented as \code{q45_15}.
-#' @note This function is ported from \code{MortalityEstimate::wilmothLT} experimental package by Marius Pascariu. The package is no longe maintained. The latest version can be found here: \url{https://github.com/mpascariu/MortalityEstimate}
+#' @note This function is ported from \code{MortalityEstimate::wilmothLT} experimental package by Marius Pascariu. The package is no longer maintained. The latest version can be found here: \url{https://github.com/mpascariu/MortalityEstimate}
#' @param Sex Choose the sex of the population. This choice defines the use
#' of a corresponding Log-Quadratic (\code{wilmoth})
#' model fitted for the whole Human Mortality Database (as of Dec 2019,
diff --git a/R/lt_regroup_age.R b/R/lt_regroup_age.R
index 516d0cde7..b2e557011 100644
--- a/R/lt_regroup_age.R
+++ b/R/lt_regroup_age.R
@@ -53,7 +53,8 @@ lt_single2abridged <- function(lx,
nAx[N] <- ex[N]
nMx <- ndx/nLx
Tx <- lt_id_L_T(nLx)
- Sx <- lt_id_Ll_S(nLx, lx, AgeInt, N = 5)
+ Sx <- c(lt_id_Ll_S(nLx, lx, Age5, AgeInt, N = 5),0.0)
+ names(Sx) <- NULL
out <- data.frame(
Age = Age5,
@@ -80,7 +81,7 @@ lt_single2abridged <- function(lx,
#' @description Computes single year of age life table by graduating the mortality schedule of an abridged life table, using the `ungroup::pclm()` to ungroup binned count data. Returns complete single-age lifetable.
#' @details Similar to `lt_abridged()` details, forthcoming.
#' @inheritParams lt_abridged
-#' @param ... optional arguments passed to `pclm()`. For example, if you pass an expicit `lambda` parameter via the `control` argument, you can speed up estimation
+#' @param ... optional arguments passed to `pclm()`. For example, if you pass an explicit `lambda` parameter via the `control` argument, you can speed up estimation
#' @return Single-year lifetable in data.frame with columns
#' \itemize{
#' \item{Age}{integer. Lower bound of single year age class},
@@ -98,6 +99,7 @@ lt_single2abridged <- function(lx,
#'
#' @export
#' @importFrom ungroup pclm
+#' @importFrom dplyr case_when
#' @examples
#' Mx <- c(.23669,.04672,.00982,.00511,.00697,.01036,.01169,
#' .01332,.01528,.01757,.02092,.02517,.03225,.04241,.06056,
@@ -150,6 +152,23 @@ lt_abridged2single <- function(
NN <- length(Age)
#stopifnot(length(nMx) == NN)
+ # some handy name coercion
+ a0rule <- case_when(a0rule == "Andreev-Kingkade" ~ "ak",
+ a0rule == "Coale-Demeny" ~ "cd",
+ TRUE ~ a0rule)
+ axmethod <- case_when(axmethod == "UN (Greville)" ~ "un",
+ axmethod == "PASEX" ~ "pas",
+ TRUE ~ axmethod)
+ Sex <- substr(Sex, 1, 1) |>
+ tolower()
+ Sex <- ifelse(Sex == "t", "b", Sex)
+
+ region <- substr(region, 1, 1) |>
+ tolower()
+ if (!is.null(extrapLaw)){
+ extrapLaw <- tolower(extrapLaw)
+ }
+
if (!is.null(extrapLaw)){
extrapLaw <- tolower(extrapLaw)
extrapLaw <- match.arg(extrapLaw, choices = c("kannisto",
@@ -218,9 +237,9 @@ lt_abridged2single <- function(
# redefine Age and extrapFit for single year ages and new maxage
a1 <- 1:length(M) - 1
- extrapFit <- a1[a1 >= min(extrapFit, (max(Age)-20)) & a1 <= max(Age)]
+ extrapFit <- a1[a1 >= min(extrapFit, (max(a1)-20)) & a1 <= max(Age)]
# always refit from 110 even if extrapFrom > 110
- extrapFrom <- min(max(Age), 110)
+ extrapFrom <- min(max(a1), 110)
# compute life table columns from single year mx
LT <- lt_single_mx(nMx = M,
@@ -242,7 +261,7 @@ lt_abridged2single <- function(
}
-#' calculate an abidged or single age lifetable from abridged or sinlge age data
+#' calculate an abridged or single age lifetable from abridged or single age data
#' @description This is a wrapper around the other lifetable utilities. We start with either `nMx`, `nqx`, or `lx` in single or abridged ages, and returns a full lifetable in either single or abridged ages. All optional arguments of `lt_abridged()` or `lt_single*()` can be passed in, for instance the `nax` assumptions or the extrapolation arguments.
#'
#' @param nMx_or_nqx_or_lx numeric vector of either `nMx`, `nqx`, or `lx`
@@ -311,7 +330,7 @@ lt_ambiguous <- function(nMx_or_nqx_or_lx = NULL,
out <- lt_single_qx(nqx = xx, Age = Age, Sex = Sex, ...)
}
if (type == "q" & !Single){
- out <- lt_single_qx(qx = xx, Age = Age, Sex = Sex, ...)
+ out <- lt_single_qx(nqx = xx, Age = Age, Sex = Sex, ...)
out <- lt_single2abridged(lx = out$lx,nLx = out$nLx, ex = out$ex)
}
}
diff --git a/R/lt_rule.R b/R/lt_rule.R
index 828e81541..b16b56df2 100644
--- a/R/lt_rule.R
+++ b/R/lt_rule.R
@@ -88,8 +88,8 @@
#' segments(1, M1_4, 5, M1_4)
#' text(1, c(M0, M1_4, M0_4), c("M0", "M1_4", "M0_4"), pos = 3)
#' }
-lt_rule_4m0_D0 <- function(D04, M04, P04, Sex = c("m", "f")) {
-
+lt_rule_4m0_D0 <- function(D04, M04, P04, Sex = "m") {
+ stopifnot(Sex %in% c("m","f"))
if (missing(M04)) {
M5 <- D04 / P04
} else {
@@ -225,8 +225,8 @@ lt_rule_4m0_D0 <- function(D04, M04, P04, Sex = c("m", "f")) {
#' text(1, c(M0, M1_4, M0_4), c("M0", "M1_4", "M0_4"), pos = 3)
#' }
-lt_rule_4m0_m0 <- function(M04, D04, P04, Sex = c("m", "f")) {
-
+lt_rule_4m0_m0 <- function(M04, D04, P04, Sex ="m") {
+ stopifnot(Sex %in% c("m","f"))
if (missing(M04)) {
M5 <- D04 / P04
} else {
@@ -282,7 +282,7 @@ lt_rule_4m0_m0 <- function(M04, D04, P04, Sex = c("m", "f")) {
#' @title estimates a0 using the Andreev-Kingkade rule of thumb starting with IMR
#'
-#' @description \code{AKq02a0} Andreev Kingkade a0 method. This version has a 3-part segemented linear model, based on cutpoints in q0. Code ported from HMDLifeTables.
+#' @description \code{AKq02a0} Andreev Kingkade a0 method. This version has a 3-part segmented linear model, based on cut points in q0. Code ported from HMDLifeTables.
#'
#' @param q0 a value or vector of values of q0, the death probability in the first year of life.
#' @param Sex either "m" or "f"
@@ -328,9 +328,9 @@ lt_rule_ak_m0_a0 <- function(M0, Sex ){
#'
#' @description This function wraps the two approximations for a0 based on either q0 (IMR) or m0.
#'
-#' @param M0 a value or vector of values of m0, the death probability in the first year of life.
-#' @param q0 a value or vector of values of m0, the death risk in the first year of life.
-#' @param Sex either "m" or "f"
+#' @param M0 a value or vector of values of `1m0``, the death risk in the first year of life.
+#' @param q0 a value or vector of values of `1q0``, the death probability in the first year of life, sometimes approximated with IMR.
+#' @param Sex either `"m"` or `"f"`
#'
#' @return a0, the estimated average age at death of those dying in the first year of life, either a single value or a vector of values.
#'
@@ -343,7 +343,7 @@ lt_rule_1a0_ak <- function(M0 = NULL, q0 = NULL, Sex){
a0 <- lt_rule_ak_q0_a0(q0,Sex)
}
if (is.null(q0) & !is.null(M0)){
- a0 <- lt_rule_ak_q0_a0(M0,Sex)
+ a0 <- lt_rule_ak_m0_a0(M0,Sex)
}
a0
}
@@ -351,14 +351,14 @@ lt_rule_1a0_ak <- function(M0 = NULL, q0 = NULL, Sex){
#' @title calculate a0 in different ways
#'
-#' @description This function wraps the Coale-Demeny and Andreev-Kingkade approximations for a0, which can come from M0, qo, or IMR.
+#' @description This function wraps the Coale-Demeny and Andreev-Kingkade approximations for `a0`, which can come from `M0`, `q0`, or `IMR.`
#' @details If sex is given as both, \code{"b"}, then we calculate the male and female results separately, then weight them together using SRB. This is bad in theory, but the leverage is trivial, and it's better than using male or female coefs for the total population.
#'
#' @inheritParams lt_rule_1a0_cd
#' @param rule character. Either \code{"ak"} (Andreev-Kingkade) or \code{"cd"} (Coale-Demeny).
#' @param Sex character, either \code{"m"}, \code{"f"}, or \code{"b"}
#' @param q0 a value or vector of values of m0, the death risk in the first year of life.
-#' @param SRB the sex ratio at birth (boys / girls), detault 1.05
+#' @param SRB the sex ratio at birth (boys / girls), default 1.05
#' @details Neither Coale-Demeny nor Andreev-Kingkade have explicit a0 rules for both-sexes combined. There's not a good way to arrive at a both-sex a0 estimate without increasing data requirements (you'd need data from each sex, which are not always available). It's more convenient to blend sex-specific a0 estimates based on something. Here we use SRB to do this, for no other reason than it has an easy well-known default value. This is bad because it assumes no sex differences in infant mortality, but this choice has a trivial impact on results.
#' @return a0, the estimated average age at death of those dying in the first year of life, either a single value or a vector of values.
#'
diff --git a/R/lt_single.R b/R/lt_single.R
index 7abec996f..60cbaac5a 100644
--- a/R/lt_single.R
+++ b/R/lt_single.R
@@ -19,6 +19,7 @@
#' \item{ex}{numeric. Age-specific remaining life expectancy.}
#' }
#' @export
+#' @importFrom dplyr case_when
lt_single_mx <- function(nMx,
Age = 1:length(nMx) - 1,
radix = 1e5,
@@ -36,6 +37,21 @@ lt_single_mx <- function(nMx,
...) {
stopifnot(extrapFrom <= max(Age))
+
+ # some handy name coercion
+ a0rule <- case_when(a0rule == "Andreev-Kingkade" ~ "ak",
+ a0rule == "Coale-Demeny" ~ "cd",
+ TRUE ~ a0rule)
+ Sex <- substr(Sex, 1, 1) |>
+ tolower()
+ Sex <- ifelse(Sex == "t", "b", Sex)
+
+ region <- substr(region, 1, 1) |>
+ tolower()
+ if (!is.null(extrapLaw)){
+ extrapLaw <- tolower(extrapLaw)
+ }
+
Sex <- match.arg(Sex, choices = c("m","f","b"))
a0rule <- match.arg(a0rule, choices = c("ak","cd"))
if (!is.null(extrapLaw)){
@@ -74,37 +90,40 @@ lt_single_mx <- function(nMx,
}
# --------------------------
# Now all vectors may end up being longer
- x_extr <- seq(extrapFrom, 130, by = 1)
- Mxnew <- lt_rule_m_extrapolate(
- x = Age,
- mx = nMx,
- x_fit = extrapFit,
- x_extr = x_extr,
- law = extrapLaw,
- ...)
-
- nMxext <- Mxnew$values
- Age2 <- names2age(nMxext)
+ if (max(Age) < 130){
+ x_extr <- seq(extrapFrom, 130, by = 1)
+ Mxnew <- lt_rule_m_extrapolate(
+ x = Age,
+ mx = nMx,
+ x_fit = extrapFit,
+ x_extr = x_extr,
+ law = extrapLaw,
+ ...)
- keepi <- Age2 < extrapFrom
- nMxext[keepi] <- nMx[Age < extrapFrom]
-
- # overwrite some variables:
- nMx <- nMxext
- Age <- Age2
+ nMxext <- Mxnew$values
+ Age2 <- names2age(nMxext)
+
+ keepi <- Age2 < extrapFrom
+ nMxext[keepi] <- nMx[Age < extrapFrom]
+
+ # overwrite some variables:
+ nMx <- nMxext
+ Age <- Age2
+ }
N <- length(Age)
AgeInt <- rep(1, N)
# get ax:
nAx <- rep(.5, N)
- nAx[1] <- lt_rule_1a0(
- rule = a0rule,
- M0 = nMx[1],
- IMR = IMR,
- Sex = Sex,
- region = region,
- SRB = SRB)
-
+ if (Age[1] == 0){
+ nAx[1] <- lt_rule_1a0(
+ rule = a0rule,
+ M0 = nMx[1],
+ IMR = IMR,
+ Sex = Sex,
+ region = region,
+ SRB = SRB)
+ }
# get qx (if pathological qx > 1, ax replaced, assumed constant hazard)
qx <- lt_id_ma_q(
nMx = nMx,
@@ -146,7 +165,7 @@ lt_single_mx <- function(nMx,
AgeInt[N] <- NA
# Survival ratios computed only after nLx is closed out
- Sx <- lt_id_Ll_S(nLx, lx, AgeInt = AgeInt, N = 1)
+ Sx <- lt_id_Ll_S(nLx, lx, Age, AgeInt = AgeInt, N = 1)
if (OAG) {
if (OAnew == OA) {
diff --git a/R/lt_single_qx.R b/R/lt_single_qx.R
index a164e8667..988fd4886 100644
--- a/R/lt_single_qx.R
+++ b/R/lt_single_qx.R
@@ -15,7 +15,7 @@
#' \item{lx}{numeric. Lifetable survivorship}
#' \item{ndx}{numeric. Lifetable deaths distribution.}
#' \item{nLx}{numeric. Lifetable exposure.}
-#' \item{Sx}{numeric. Survivor ratios in uniform 5-year age groups.}
+#' \item{Sx}{numeric. Survivor ratios in uniform single-year age groups.}
#' \item{Tx}{numeric. Lifetable total years left to live above age x.}
#' \item{ex}{numeric. Age-specific remaining life expectancy.}
#' }
@@ -81,13 +81,14 @@ lt_single_qx <- function(nqx,
# compute ax:
nAx <- rep(.5, N)
- nAx[1] <- lt_rule_1a0(rule = a0rule,
- q0 = nqx[1],
- IMR = IMR,
- Sex = Sex,
- region = region,
- SRB = SRB)
-
+ if (Age[1] == 0){
+ nAx[1] <- lt_rule_1a0(rule = a0rule,
+ q0 = nqx[1],
+ IMR = IMR,
+ Sex = Sex,
+ region = region,
+ SRB = SRB)
+ }
# compute 1mx from 1qx and 1ax
nMx <- lt_id_qa_m(nqx = nqx,
nax = nAx,
diff --git a/R/mig_beta.R b/R/mig_beta.R
index 51aaf2d7a..16fdd4e0c 100644
--- a/R/mig_beta.R
+++ b/R/mig_beta.R
@@ -1,3 +1,8 @@
+# [ ] Jan 1 thing not ideal.
+# [ ] C2 and last year can produce big negative in age 0.
+# maybe cut it off and not return estimate for partial year
+# [ ] use Lx ratios to project
+
#' Estimate intercensal migration by comparing census population, by age and
#' sex, to the results of a RUP projection.
#'
@@ -8,6 +13,7 @@
#' the cohort parallelogram assuming uniform distribution assuming it is all
#' migration. It finalizes by summing the estimate by age groups across the entire
#' intercensal period to have a total migration during the entire period.
+#' Alternatively, a child adjustment and an old age adjustment can be applied.
#'
#' @param c1 numeric vector. The first (left) census in single age groups
#' @param c2 numeric vector. The second (right) census in single age groups
@@ -25,6 +31,33 @@
#' @param sex character string, either `"male"`, `"female"`, or `"both"`
#' @param midyear logical. `FALSE` means all Jan 1 dates between `date1` and `date2` are returned. `TRUE` means all July 1 intercensal dates are returned.
#' @param verbose logical. Shall we send informative messages to the console?
+#'
+#' @param child_adjust The method with which to adjust the youngest age groups.
+#' If \code{"none"}, no adjustment is applied (default). If
+#' child-woman ratio (\code{"cwr"}) is chosen, the first cohorts reflecting the
+#' difference between \code{date2 - date1} are adjusted (plus age 0). If
+#' child constant ratio (\code{"constant"}) is chosen, the first 15 age groups
+#' are adjusted.
+#'
+#' @param childage_max The maximum age from which to apply \code{child_adjust}.
+#' By default, set to \code{NULL}, which gets translated into all the cohorts
+#' between \code{date2} and \code{date1}. If \code{date2} is 2010 and
+#' \code{date1} is 2002, the first 8 cohorts are adjusted. Otherwise, the user
+#' can supply an integer.
+#'
+#' @param cwr_factor A numeric between 0 and 1 to which adjust the CWR method
+#' for the young ages from \code{child_adjust}. \strong{This is only used
+#' when \code{child_adjust} is \code{"cwr"}}.
+#'
+#' @param oldage_adjust The type of adjustment to apply to ages at and above
+#' \code{oldage_min}. \code{'beers'} applies a beers graduation method
+#' while \code{'mav'} applies a moving average with cascading on the tails.
+#' For more information see \code{?mav} and \code{?graduation_beers}.
+#'
+#' @param oldage_min The minimum age from which to apply \code{oldage_adjust}.
+#' By default, set to 65, so any adjustment from \code{oldage_adjust} will be
+#' applied for 65+.
+#'
#' @param ... optional arguments passed to \code{lt_single_qx}
#' @export
#'
@@ -36,7 +69,8 @@
#' @examples
#'
#' \dontrun{
-#' mig_beta(
+#'
+#' mig_beta(
#' location = "Russian Federation",
#' sex = "male",
#' c1 = pop1m_rus2002,
@@ -64,13 +98,24 @@ mig_beta <- function(
sex = "both",
midyear = FALSE,
verbose = TRUE,
- ...
- ) {
+ child_adjust = c("none", "cwr", "constant"),
+ childage_max = NULL,
+ cwr_factor = 0.3,
+ oldage_adjust = c("none", "beers", "mav"),
+ oldage_min = 65,
+ ...) {
+ child_adjust <- match.arg(child_adjust)
+ oldage_adjust <- match.arg(oldage_adjust)
# convert the dates into decimal numbers
date1 <- dec.date(date1)
date2 <- dec.date(date2)
+ # If null, assume, the cohorts between censuses date2 and dates2
+ if (is.null(childage_max)) {
+ childage_max <- as.integer(ceiling(date2) - floor(date1))
+ }
+
res_list <- rup(
c1 = c1,
c2 = c2,
@@ -112,67 +157,79 @@ mig_beta <- function(
# and the decum_resid on the values.
mat_resid <-
data.table::dcast(
- pop_jan1[, list(year, age, decum_resid)],
- age ~ year,
- value.var = "decum_resid"
- )
+ pop_jan1[, list(year, age, decum_resid)],
+ age ~ year,
+ value.var = "decum_resid"
+ )
+ mig_vec <- rowSums(mat_resid[,-1], na.rm = TRUE)
# Sum over all ages to get a total decum_resid over all years for each age.
- mig <- stats::setNames(rowSums(mat_resid, na.rm = TRUE), mat_resid$age)
- mig
-}
+ mig <- stats::setNames(mig_vec, mat_resid$age)
+ # Child adjustment
+ mig <-
+ switch(
+ child_adjust,
+ "none" = mig,
+ "cwr" = mig_beta_cwr(mig, c1, c2, date1, date2, n_cohs = childage_max, cwr_factor = cwr_factor),
+ "constant" = mig_beta_constant_child(mig, c1, c2, ageMax = childage_max)
+ )
+ # Old age adjustment
+ mig_oldage <-
+ switch(
+ oldage_adjust,
+ "none" = mig,
+ "beers" = graduate_beers(mig, as.integer(names(mig)), AgeInt = 1),
+ "mav" = mav(mig, names(mig), tails = TRUE)
+ )
+ # Only apply the old age adjustment on ages above oldage_min
+ ages_oldages <- as.integer(names(mig_oldage))
+ mig[ages_oldages >= oldage_min] <- mig_oldage[ages_oldages >= oldage_min]
+
+ mig
+}
-mig_beta_cwr <- function(mig,
- c1_females,
- c2_females,
- date1,
- date2,
- maternal_window = 30,
- maternal_min = 15){
+mig_beta_cwr <- function(mig,
+ c1_females,
+ c2_females,
+ date1,
+ date2,
+ maternal_window = 30,
+ maternal_min = 15,
+ n_cohs = NULL,
+ cwr_factor = 0.3) {
+
age <- names2age(mig)
-
+
# conservative guess at how many child ages to cover:
- n_cohs <- as.integer(ceiling(date2) - floor(date1))
-
+ if (is.null(n_cohs)) n_cohs <- as.integer(ceiling(date2) - floor(date1))
+
mig_out <- mig
- for (i in 1:n_cohs){
+ for (i in 1:n_cohs) {
# index maternal ages
- a_min <- i + maternal_min
- a_max <- min(i + maternal_min + maternal_window, 49)
- mat_ind <- a_min:a_max
- cwr_i <- (c1_females[i] / sum(c1_females[mat_ind]) + c2_females[i] / sum(c2_females[mat_ind])) / 2
+ a_min <- i + maternal_min
+ a_max <- min(i + maternal_min + maternal_window, 49)
+ mat_ind <- a_min:a_max
+ cwr_i <- (c1_females[i] / sum(c1_females[mat_ind]) + c2_females[i] / sum(c2_females[mat_ind])) / 2
# proportional to maternal neg mig.
- mig_out[i] <- cwr_i * sum(mig[mat_ind])
+ mig_out[i] <- cwr_factor * cwr_i * sum(mig[mat_ind])
}
-
+
mig_out
}
+# rough stb at constant child adjustment
+mig_beta_constant_child <- function(mig, c1, c2, ageMax = 14) {
+ age <- names2age(mig)
+
+ denom <- (c1 + c2) / 2
-# TR: prep for constant child. Need denom for rates though,
-# but ideally without re-calculating an intermediate object
-# that was already needed. Maybe be we can get exposures
-# from RUP. Hmm.
-# mig_beta_constant_child <- function(mig,
-# c1,
-# c2,
-# date1,
-# date2,
-# maternal_window = 30,
-# maternal_min = 15){
-# age <- names2age(mig)
-#
-# # conservative guess at how many child ages to cover:
-# n_cohs <- as.integer(ceiling(date2) - floor(date1))
-#
-# mig_out <- mig
-#
-#
-#
-# mig_out
-# }
-#
+ ind <- age <= ageMax
+ mig_rate_const <- sum(mig[ind]) / sum(denom[ind])
+ mig[ind] <- denom[ind] * mig_rate_const
+
+ mig
+}
diff --git a/R/mig_rc.R b/R/mig_rc.R
index 8ec34743c..28ee1c223 100644
--- a/R/mig_rc.R
+++ b/R/mig_rc.R
@@ -14,7 +14,7 @@
#' @export
#' @details In the full 13 parameter model, the migration rate at age x, \eqn{m(x)} is defined as
-#' \deqn{m(x) = a1*exp(-1*alpha1*x) + a2*exp(-1*alpha2*(x - mu2) - exp(-1*lambda2*(x - mu2))) + a3*exp(-1*alpha3*(x - 3) - exp(-1*lambda3*(x - mu3))) + a4*exp(lambda4*x) + c}
+#' \deqn{m(x) = a1*exp(-1*alpha1*x) + a2*exp(-1*alpha2*(x - mu2) - exp(-1*lambda2*(x - mu2))) + a3*exp(-1*alpha3*(x - mu3) - exp(-1*lambda3*(x - mu3))) + a4*exp(lambda4*x) + c}
#'
#' The first, second, third and fourth pieces of the equation represent pre-working age, working age, retirement and post-retirement age patterns, respectively.
#' Models with less parameters gradually remove terms at the older ages. Parameters in each family are:
diff --git a/R/mig_resid.R b/R/mig_resid.R
index 17984aed1..9f387864b 100644
--- a/R/mig_resid.R
+++ b/R/mig_resid.R
@@ -371,7 +371,7 @@ mig_resid_stock <- function(pop_m_mat,
is.numeric(ages_asfr)
)
-# <<<<<<< HEAD
+
# # Check in dimensions are ok - still working on this
# if(ncol(asfr_mat) == ncol(pop_f_mat) -1 & nrow(sr_f_mat) == nrow(pop_f_mat) -1){
# print("matrix dimensions are correct")
@@ -389,22 +389,23 @@ mig_resid_stock <- function(pop_m_mat,
# asfr_mat
# sr_f_mat
# }
-# =======
-# >>>>>>> 362ae9857574b05c519c8de40548461d6b9070dd
+
# Migration net of only survivors
- net_mig_m <- migresid_net_surv(pop_m_mat, sr_m_mat)
- net_mig_f <- migresid_net_surv(pop_f_mat, sr_f_mat)
+ net_mig_m <- migresid_net_surv(pop_mat = pop_m_mat,
+ sr_mat = sr_m_mat)
+ net_mig_f <- migresid_net_surv(pop_mat = pop_f_mat,
+ sr_mat = sr_f_mat)
# fertility_index <- which(ages %in% ages_asfr)
# Returns all births for all years
age_interval <- unique(diff(ages))
all_births <- migresid_births(
- pop_f_mat,
- asfr_mat,
+ pop_f_mat = pop_f_mat,
+ asfr_mat = asfr_mat,
# fertility_index,
- age_interval
+ age_interval = age_interval
)
# With all_births already calculated, separate between
@@ -414,17 +415,17 @@ mig_resid_stock <- function(pop_m_mat,
births_f <- all_births * (1 / (1 + srb_vec[byrs]))
net_mig_m <- migresid_net_surv_first_ageg(
- net_mig_m,
- pop_m_mat,
- births_m,
- sr_m_mat
+ net_mig = net_mig_m,
+ pop_mat = pop_m_mat,
+ births = births_m,
+ sr_mat = sr_m_mat
)
net_mig_f <- migresid_net_surv_first_ageg(
- net_mig_f,
- pop_f_mat,
- births_f,
- sr_f_mat
+ net_mig = net_mig_f,
+ pop_mat = pop_f_mat,
+ births = births_f,
+ sr_mat = sr_f_mat
)
# First year is empty, so we exclude
@@ -490,12 +491,12 @@ mig_resid_cohort <- function(pop_m_mat,
# Adjust last age group in the bounds
mig_bounds <- migresid_bounds_last_ageg(
- net_mig_m,
- net_mig_f,
- mig_upper_m,
- mig_lower_m,
- mig_upper_f,
- mig_lower_f
+ net_mig_m = net_mig_m,
+ net_mig_f = net_mig_f,
+ mig_upper_m = mig_upper_m,
+ mig_lower_m = mig_lower_m,
+ mig_upper_f = mig_upper_f,
+ mig_lower_f = mig_lower_f
)
mig_upper_m <- mig_bounds$mig_upper_m
@@ -540,13 +541,6 @@ mig_resid_time <- function(pop_m_mat,
asfr_mat <- args_list$asfr_mat
srb_vec <- args_list$srb_vec
- # TR: add chunk (maybe a new function?) that
- # checks dimensions; names dimensions if necessary (and warns if so)
- # and trims dimensions if necessary (warning user if needed).
- # warning does mean warning() it just means cat("\nwatch out!\n")
- # Not important, but theese could be silenced using a new 'verbose' arg
-
-
# Estimate stock method
mig_res <-
mig_resid_stock(
@@ -604,7 +598,9 @@ migresid_net_surv <- function(pop_mat, sr_mat) {
# is treated by migresid_net_surv_first_ageg.
res[nrow(res), ] <- NA
res <- rbind(matrix(NA, nrow = 1, ncol = ncol(res)), res)
- res <- migresid_net_surv_last_ageg(res, pop_mat, sr_mat)
+ res <- migresid_net_surv_last_ageg(net_mig = res,
+ pop_mat = pop_mat,
+ sr_mat = sr_mat)
rownames(res) <- rownames(pop_mat)
colnames(res) <- colnames(pop_mat)[-p]
res
diff --git a/R/mig_un_fam.R b/R/mig_un_fam.R
index a5f516c19..81a2c8e8b 100644
--- a/R/mig_un_fam.R
+++ b/R/mig_un_fam.R
@@ -1,9 +1,9 @@
#' Net migration by age for an UN family
#' @description Given a total net migration,
#' calculate the net migration age schedule based on the Rogers and Castro formula for UN families.
-#' @param NM numeric. Total net migration to distribuite between ages and sex.
+#' @param NM numeric. Total net migration to distribute between ages and sex.
#' @param family character. Could be "Family", "Female Labor", "Male Labor".
-#' @param Single logical. Results by simple age. Default `FALSE`.
+#' @param Single logical. Results by simple age. Default `TRUE`.
#' Typically from pre-working age and working age parts of in Roger-Castro formula.
#' @param OAnew The age from which to group all ages into an open ended age group.
#' By default it is set to 100, so it groups all ages up to 120, which is the
diff --git a/R/nAx.R b/R/nAx.R
index d7c0e5f7d..729fcd1d2 100644
--- a/R/nAx.R
+++ b/R/nAx.R
@@ -788,9 +788,14 @@ lt_a_un <- function(nMx,
Sex = Sex,
region = region,
mod = mod,
- # no need to redo extrap in here
+ # we need to redo extrap in here because otherwise extrapFit
+ # might be length < 2 and raise problems with MortalityLaws
+ extrapLaw = extrapLaw,
+ extrapFrom = extrapFrom,
+ extrapFit = extrapFit,
...
)
+
qxnew <-
lt_id_ma_q(
nMx = mxi,
diff --git a/R/smooth_age_5.R b/R/smooth_age_5.R
index 45c8a1392..bc73a2246 100644
--- a/R/smooth_age_5.R
+++ b/R/smooth_age_5.R
@@ -63,11 +63,11 @@ smooth_age_5_cf <- function(Value,
out <- Value5 * NA
# cut back down (depending) and name
interleaf <- c(rbind(vodds, vevens))
-
+
if (start_on == 5){
interleaf <- interleaf[-1]
}
-
+
n <- min(c(length(interleaf), N))
out[1:n] <- interleaf[1:n]
@@ -115,7 +115,7 @@ smooth_age_5_kkn <- function(Value,
# would need to move this up to ensure?
# or in case of 85+ would we want to keep 80-84, 85+ as-is?
- Value10 <- groupAges(Value5, Age = Age, N = 10, shiftdown = start_on)
+ Value10 <- groupAges(Value5, Age = Age5, N = 10, shiftdown = start_on)
# what OAG is a strange digit? Then take OAG after grouping.
if (OAG) {
@@ -133,21 +133,21 @@ smooth_age_5_kkn <- function(Value,
vodds <- Value10 / 2 + (v10R - v10L) / 16
# constrained in 10-year age groups
vevens <- Value10 - vodds
-
+
# if (start_on == 5){
# # this is the KNN operation
# vevens <- Value10 / 2 + (v10R - v10L) / 16
# # constrained in 10-year age groups
# vodds <- Value10 - vevens
# }
- #
+ #
# stagger odd even 5s
interleaf <- c(rbind(vodds, vevens))
-
+
if (start_on == 5){
interleaf <- interleaf[-1]
}
-
+
# produce results vector
out <- Value5 * NA
n <- min(c(length(interleaf), N))
@@ -442,14 +442,24 @@ smooth_age_5_zigzag <- function(Value,
}
#' Smooth in 5-year age groups using a moving average
+#'
#' @description Smooth data in 5-year age groups.
#' @details This function calls \code{smooth_age_5_zigzag_inner()}, but prepares data in a way consistent with other methods called by \code{smooth_age_5()}. It is probably preferable to call \code{zigzag()} from the top level, or else call this method from \code{agesmth()} for more control over tail imputations.
+#'
#' @param Value numeric vector of (presumably) counts in 5-year age groups.
#' @param Age integer vector of age group lower bounds.
#' @param OAG logical. Whether or not the top age group is open. Default \code{TRUE}.
#' @param n integer. The width of the moving average. Default 3 intervals (x-5 to x+9).
+#' @param tails logical. If tails is \code{FALSE}, both tails are left untouched.
+#' Otherwise, the tails are filled out using a cascade method.
+#'
#' @return numeric vector of smoothed counts in 5-year age groups.
-#' @details This function calls \code{mav()}, which itself relies on the more general \code{ma()}. We lose the lowest and highest ages with this method, unless \code{n=1}, in which case data is returned in the original 5-year age groups. The total population count is not constrained to sum to the orignal total.
+#'
+#' @details If tails is set to \code{FALSE}, this function calls \code{mav()}, which itself relies on the more general \code{ma()}. We lose the lowest and highest ages with this method, unless \code{n=1}, in which case data is returned in the original 5-year age groups. The total population count is not constrained to sum to the original total.
+#'
+#' If tails is \code{TRUE}, the same results are expected but the tails are
+#' filled in using a cascading method.
+#'
#' @examples
#' Age <- c(0,1,seq(5,90,by=5))
#' # defaults
@@ -466,11 +476,11 @@ smooth_age_5_zigzag <- function(Value,
#' legend("topright", col = cols, lty = 1, lwd = lwds, legend = paste("n =",1:5))
#' }
#' @export
-
smooth_age_5_mav <- function(Value,
Age,
OAG = TRUE,
- n = 3) {
+ n = 3,
+ tails = FALSE) {
Value <- groupAges(Value, Age = Age, N = 5)
Age <- as.integer(names(Value))
@@ -479,7 +489,8 @@ smooth_age_5_mav <- function(Value,
Value = Value,
Age = Age,
OAG = OAG,
- n = n
+ n = n,
+ tails = tails
)
Smoothed
@@ -615,10 +626,10 @@ smooth_age_5_feeney <- function(Value,
#' Smooth populations in 5-year age groups using various methods
#'
#' @description Smooth population counts in 5-year age groups using the Carrier-Farrag,
-#' Karup-King-Newton, Arriaga, United Nations, Stong, or Zigzag methods. Allows for imputation
+#' Karup-King-Newton, Arriaga, United Nations, Strong, MAV or Zigzag methods. Allows for imputation
#' of values in the youngest and oldest age groups for the Carrier-Farrag, Karup-King-Newton,
#' and United Nations methods.
-
+#'
#' @details The Carrier-Farrag, Karup-King-Newton (KKN), and Arriaga methods do not modify the totals
#' in each 10-year age group, whereas the United Nations, Strong, Zigzag, and moving average (MAV) methods do. The age intervals
#' of input data could be any integer structure (single, abridged, 5-year, other), but
@@ -627,8 +638,8 @@ smooth_age_5_feeney <- function(Value,
#'
#' The Carrier-Farrag, Karup-King-Newton, and United Nations methods do not produce estimates
#' for the first and final 10-year age groups. By default, these are imputed with the original 5-year age group totals, but
-#' you can also specify to impute with \code{NA}, or the results of the Arriaga or
-#' Strong methods. If the terminal digit of the open age group is 5, then the terminal 10-year
+#' you can also specify to impute with \code{NA}, or the results of the Arriaga,
+#' Strong and Cascade methods. If the terminal digit of the open age group is 5, then the terminal 10-year
#' age group shifts down, so imputations may affect more ages in this case. Imputation can follow
#' different methods for young and old ages.
#'
@@ -640,7 +651,7 @@ smooth_age_5_feeney <- function(Value,
#'
#' @param Value numeric vector of counts in single, abridged, or 5-year age groups.
#' @param Age integer vector of ages corresponding to the lower integer bound of the counts.
-#' @param method character string. Options include \code{"Carrier-Farrag"},\code{"Arriaga"},\code{"KKN"},\code{"United Nations"}, \code{"Strong"}, and \code{"Zigzag"}. See details. Default \code{"Carrier-Farrag"}.
+#' @param method character string. Options include \code{"Carrier-Farrag"},\code{"Arriaga"},\code{"KKN"},\code{"United Nations"}, \code{"Strong"}, \code{MAV} and \code{"Zigzag"}. See details. Default \code{"Carrier-Farrag"}.
#' @param OAG logical. Whether or not the top age group is open. Default \code{TRUE}.
#' @param ageMin integer. The lowest age included included in intermediate adjustment. Default 10. Only relevant for Strong method.
#' @param ageMax integer. The highest age class included in intermediate adjustment. Default 65. Only relevant for Strong method.
@@ -756,7 +767,7 @@ smooth_age_5 <- function(Value,
ageMin = 10,
ageMax = 65,
n = 3,
- young.tail = c("Original", "Arriaga", "Strong", NA),
+ young.tail = c("Original", "Arriaga", "Strong", "Cascade", NA),
old.tail = young.tail) {
method <- match.arg(method, c("Carrier-Farrag",
@@ -766,8 +777,10 @@ smooth_age_5 <- function(Value,
"Strong",
"Zigzag",
"MAV"))
- young.tail <- match.arg(young.tail, c("Original", "Arriaga", "Strong", NA))
- old.tail <- match.arg(old.tail, c("Original", "Arriaga", "Strong", NA))
+
+ tail_methods <- c("Original", "Arriaga", "Strong", "Cascade", NA)
+ young.tail <- match.arg(young.tail, tail_methods)
+ old.tail <- match.arg(old.tail, tail_methods)
method <- simplify.text(method)
young.tail <- simplify.text(young.tail)
@@ -858,6 +871,8 @@ smooth_age_5 <- function(Value,
original <- groupAges(Value, Age = Age, N = 5)
arriaga <- smooth_age_5_arriaga(Value, Age = Age, OAG = OAG)
strong <- smooth_age_5_strong(Value, Age = Age, OAG = OAG)
+ mav_tails <- smooth_age_5_mav(Value, Age = Age, OAG = OAG, tails = TRUE)
+
# are the final entries NAs?
if (nrle$values[length(nrle$values)] == 1 & !is.na(old.tail)) {
nrle$values[1] <- 0
@@ -877,7 +892,10 @@ smooth_age_5 <- function(Value,
stopifnot(length(strong) == length(out))
out[old.ind] <- strong[old.ind]
}
-
+ if (old.tail == "cascade") {
+ stopifnot(length(mav_tails) == length(out))
+ out[old.ind] <- mav_tails[old.ind]
+ }
}
nrle <- rle(as.integer(nas))
# take care of young tail
@@ -900,6 +918,11 @@ smooth_age_5 <- function(Value,
stopifnot(length(strong) == length(out))
out[young.ind] <- strong[young.ind]
}
+ if (young.tail == "cascade") {
+ stopifnot(length(mav_tails) == length(out))
+ out[young.ind] <- mav_tails[young.ind]
+ }
+
}
} # end tail operations
diff --git a/R/utils.R b/R/utils.R
index 655601b1b..a47457e2d 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -196,7 +196,9 @@ getModelLifeTable <- function(ModelName, Sex) {
avg_adj <- function(x) {
n <- length(x)
- (shift.vector(x,-1, NA) + shift.vector(x, 1, NA)) / 2
+ out <- (shift.vector(x,-1, NA) + shift.vector(x, 1, NA)) / 2
+ names(out) <- names(x)
+ out
}
#' convert strings to concatenation of lower case alphabet
diff --git a/R/utilsAge.R b/R/utilsAge.R
index 57da8c721..1c6082b01 100644
--- a/R/utilsAge.R
+++ b/R/utilsAge.R
@@ -58,7 +58,7 @@ AGEN <-
#' @param shiftdown integer. Move the grouping down by one or more single ages. Optional argument.
#'
#' @details If you shift the groupings, then the first age groups may have a negative lower bound
-#' (for example of -5). These counts would be discarded for the osculatory version of Sprague smoothing,
+#' (for example of -5). These counts would be discarded for the oscillatory version of Sprague smoothing,
#' for example, but they are preserved in this function. The important thing to know is that if you shift
#' the groups, the first and last groups won't be N years wide. For example if \code{shiftdown} is 1,
#' the first age group is 4-ages wide.
@@ -272,7 +272,7 @@ age2int <- function(Age, OAG = TRUE, OAvalue = NA) {
#' @return Vector of counts in N-year age groups.
#'
#' @details If you shift the groupings, then the first age groups may have a negative lower bound
-#' (for example of -5). These counts would be discarded for the osculatory version of Sprague smoothing,
+#' (for example of -5). These counts would be discarded for the oscillatory version of Sprague smoothing,
#' for example, but they are preserved in this function. The important thing to know is that if you shift
#' the groups, the first and last groups will not be N years wide. For example if \code{shiftdown} is 1, the first age group is 4-ages wide. The ages themselves are not returned,
#' but they are the name attribute of the output count vector. Note this will also correctly group abridged ages
@@ -324,6 +324,7 @@ groupOAG <- function(Value, Age, OAnew) {
N <- length(Value[Age <= OAnew])
Value[N] <- sum(Value[Age >= OAnew])
Value <- Value[1:N]
+ names(Value) <- Age[1:N]
Value
}
@@ -331,7 +332,7 @@ groupOAG <- function(Value, Age, OAnew) {
#' check for coherence within Age and between Age and AgeInt
#'
#' @description A few checks are carried out to test if \code{Age} is internally consistent, that \code{OAG} is consistent with \code{AgeInt}, and that \code{Age} and \code{AgeInt} are consistent with one another. For \code{Age} to be internally consistent, we cannot have redundant values, and values must be sequential.
-#' @details If \code{OAG} is \code{TRUE} then \code{AgeInt} must be coded as \code{NA}. If \code{Age} is not sorted then we sort both \code{Age} and \code{AgeInt}, assuming that they are in matched order. This isn't incoherence per se, but a message is returned to the console.
+#' @details If \code{OAG} is \code{TRUE} then \code{AgeInt} must be coded as \code{NA}. If \code{Age} is not sorted then we sort both \code{Age} and \code{AgeInt}, assuming that they are in matched order. This isn't incoherence in itself, but a message is returned to the console.
#'
#' @param Age integer vector of single ages (lower bound)
#' @param AgeInt integer vector. Age interval widths
@@ -724,7 +725,7 @@ rescaleAgeGroups <- function(Value1,
}
#' force a (count) vector to abridged ages
-#' @description This is a robustness utility, in place to avoid annoying hang-ups in \code{LTAbr()}. If data are given in non-standard ages, they are forced to standard abrdiged ages on the fly. Really this should happen prior to calling \code{lt_abridged()}
+#' @description This is a robustness utility, in place to avoid annoying hang-ups in \code{LTAbr()}. If data are given in non-standard ages, they are forced to standard abridged ages on the fly. Really this should happen prior to calling \code{lt_abridged()}
#' @details This should be able to group up and group down as needed. \code{graduate_mono()} is used below the hood. \code{pclm()} or \code{graduate_uniform()} out to be flexible enough to do the same.
#' @inheritParams graduate_uniform
#' @seealso graduate_mono_closeout, lt_abridged
@@ -734,14 +735,16 @@ rescaleAgeGroups <- function(Value1,
#' Age <- c(0,1,3,seq(5,100,5))
#' AgeInt <- c(1,2,2,rep(5,19),1)
#' Value <- tapply(V1,rep(Age,times=AgeInt), sum)
-#'
+#'
#' is_abridged(Age)
-#' age_abridge_force(Value, AgeInt, Age)
-age_abridge_force <- function(Value, AgeInt, Age) {
+#' age_abridge_force(Value, Age)
+age_abridge_force <- function(Value, Age) {
+
v1 <- graduate_mono_closeout(
Value,
Age = Age)
- a1 <- min(Age):(length(v1) - 1)
+ #a1 <- min(Age):(length(v1) - 1)
+ a1 <- (1:length(v1) - 1 + min(Age)) |> as.integer()
AgeAbr <- calcAgeAbr(a1)
vabr <- tapply(v1, AgeAbr, sum)
vabr
diff --git a/R/utils_downloads.R b/R/utils_downloads.R
index af435bda2..2627408fc 100644
--- a/R/utils_downloads.R
+++ b/R/utils_downloads.R
@@ -3,7 +3,7 @@
# and potentially others.
#' Extract Lx estimates from WPP2019. Mainly an util function for other ones.
-#' @description We extract `Lx` from `wpp2019`, interpolated to exact dates. Different methods availables.
+#' @description We extract `Lx` from `wpp2019`, interpolated to exact dates. Different methods available.
#' A vector of countries can handle, but with an unique sex. Row names are not indicative of countries.
#' @param nLx numeric. either `NULL` or a numeric vector of lifetable exposure. If it's the second then we just pass it back.
#' @param location vector. UN Pop Div `LocName` or `LocID`
@@ -11,7 +11,7 @@
#' @param nLxDatesIn numeric. Vector of three decimal dates produced by (or passed through) `basepop_five()`
#' @param method character. Could be `"linear"`, `"exponential"`, or `"power"`
#'
-#' @return numeric matrix of `nLx` with `length(nLxDatesIn)` and abrdiged ages in rows.
+#' @return numeric matrix of `nLx` with `length(nLxDatesIn)` and abridged ages in rows.
#' @export
#' @importFrom stats setNames
#' @importFrom stats reshape
@@ -58,7 +58,7 @@ downloadnLx <- function(nLx, location, gender, nLxDatesIn, method="linear") {
}
if (!any(fertestr::is_LocID(location))) {
location_code <- fertestr::get_location_code(location)
- }else {
+ } else {
location_code <- as.integer(location)
}
@@ -121,7 +121,7 @@ downloadnLx <- function(nLx, location, gender, nLxDatesIn, method="linear") {
}
#' Extract ASFR estimates from WPP2019. Mainly an util function for other ones.
-#' @description We extract `ASFRx` from `wpp2019`, interpolated to exact dates. Different methods availables.
+#' @description We extract `ASFRx` from `wpp2019`, interpolated to exact dates. Different methods available.
#' A vector of countries can handle, but with an unique sex. Row names are not indicative of countries.
#' @param Asfrmat numeric.
#' @param location vector. UN Pop Div `LocName` or `LocID`
@@ -262,7 +262,7 @@ downloadSRB <- function(SRB, location, DatesOut, verbose = TRUE){
fetch_wpp_births <- function(births, yrs_births, location, sex, verbose) {
# fetch WPP births if not provided by user
- if (is.null(births)) {
+ if (is.null(births) | length(births) == 0) {
# load WPP births
requireNamespace("DemoToolsData", quietly = TRUE)
@@ -316,7 +316,8 @@ interp_coh_download_mortality <- function(location, sex, date1, date2, OAnew = 1
}) %>%
lapply(lt_a2s_chunk, OAnew = OAnew) %>%
lapply(function(X){
- 1 - X$nqx
+ #1 - X$nqx
+ lt_id_Ll_S(X$nLx, X$lx, X$Age, X$AgeInt, N = 1)
}) %>%
do.call("cbind",.)
diff --git a/README.md b/README.md
index 2e0ec7c56..2c972ce7d 100644
--- a/README.md
+++ b/README.md
@@ -5,19 +5,19 @@
[![R build status](https://github.com/timriffe/DemoTools/workflows/R-CMD-check/badge.svg)](https://github.com/timriffe/DemoTools/actions)
[![codecov](https://codecov.io/gh/timriffe/DemoTools/branch/master/graph/badge.svg)](https://codecov.io/gh/timriffe/DemoTools)
-[![](https://img.shields.io/badge/devel%20version-01.13.39-yellow.svg)](https://github.com/timriffe/DemoTools)
+[![](https://img.shields.io/badge/devel%20version-01.13.79-yellow.svg)](https://github.com/timriffe/DemoTools)
[![issues](https://img.shields.io/github/issues-raw/timriffe/DemoTools.svg)](https://github.com/timriffe/DemoTools/issues)
[![lifecycle](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental)
# Tools for aggregate demographic analysis
-Date: 2021-03-23
+Date: 2023-12-20
`DemoTools` is an R package that contains simple functions often used in demographic analysis. It is in active development.
This project is commissioned by the [UN Population Division](http://www.un.org/en/development/desa/population/) and financed by the [Bill and Melinda Gates Foundation](https://www.gatesfoundation.org/) as part of the [Making Family Planning Count](http://www.un.org/en/development/desa/population/projects/making-family-planning-count/index.shtml) project. Work is also done in collaboration with Sean Fennell, [José Manuel Aburto](https://github.com/jmaburto), [Ilya Kashnitsky](https://ikashnitsky.github.io/), [Marius Pascariu](https://github.com/mpascariu), [Jorge Cimentada](https://github.com/cimentadaj), [Monica Alexander](https://www.monicaalexander.com/), and with minor contributions from [several more](https://github.com/timriffe/DemoTools/graphs/contributors) (thank you!). This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 IGO ([CC BY-SA 3.0 IGO](https://creativecommons.org/licenses/by-sa/3.0/igo/)).
-The idea behind `DemoTools` is to provide a common set of functions that can be easily used by analysts and scientists working on demographic analysis and modelling.
+The idea behind `DemoTools` is to provide a common set of functions that can be easily used by analysts and scientists working on demographic analysis and modeling.
If you detect a bug or have a suggestion please notify us using the [Issues](https://github.com/timriffe/DemoTools/issues) tab on github. Even better if you fix it and make a pull request! See [CONTRIBUTING.md](https://github.com/timriffe/DemoTools/blob/master/CONTRIBUTING.md) for more tips on reporting bugs or offering patches.
@@ -29,12 +29,11 @@ If you are getting started with `DemoTools` we recommend taking a look at the tu
You can load the ```DemoTools``` package in R like so:
```r
-# install.packages("devtools")
+# install.packages("remotes")
-library(devtools)
# requires the development version of rstan, sorry!
install.packages("rstan", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
-install_github("timriffe/DemoTools")
+remotes::install_github("timriffe/DemoTools")
```
## Citation
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 0c43c45ef..b110f0e37 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -10,8 +10,8 @@ home:
title: DemoTools
description: Do you love demography and data? If so, you might enjoy using this package.
links:
- - text: UN Deparment of Economic and Social Affairs Population
- href: https://www.un.org/en/development/desa/population/index.asp
+ - text: United Nations Population Division
+ href: https://www.un.org/development/desa/pd/
- text: Browse source code
href: https://github.com/timriffe/DemoTools/
@@ -47,6 +47,12 @@ reference:
desc: Functions to construct a lifetable
contents:
- '`lt_abridged`'
+ - '`lt_single_mx`'
+ - '`lt_single_qx`'
+ - '`lt_single2abridged`'
+ - '`lt_abridged2single`'
+ - '`lt_ambiguous`'
+ - '`lt_smooth_ambiguous`'
- title: "Interpolation"
desc: Functions to interpolate counts
contents:
@@ -54,6 +60,7 @@ reference:
- '`interp_coh`'
- title: "Extrapolation"
desc: Functions to interpolate/extrapolate rates or counts
+ contents:
- '`interp_lc_lim`'
- '`lt_rule_m_extrapolate`'
- '`OPAG`'
diff --git a/data/asfr_mat_five.rda b/data/asfr_mat_five.rda
index 78b67259b..3aa59a764 100644
Binary files a/data/asfr_mat_five.rda and b/data/asfr_mat_five.rda differ
diff --git a/data/pop_f_mat_five.rda b/data/pop_f_mat_five.rda
index aea76202a..115355c24 100644
Binary files a/data/pop_f_mat_five.rda and b/data/pop_f_mat_five.rda differ
diff --git a/dev/.gitignore b/dev/.gitignore
index 2d6e1e81a..a21fc36c9 100644
--- a/dev/.gitignore
+++ b/dev/.gitignore
@@ -4,3 +4,5 @@
transitivitytests.R
scratch.R
junk.R
+testLT.R
+lx2.RData
diff --git a/dev/FunctionInventory.csv b/dev/FunctionInventory.csv
new file mode 100644
index 000000000..72f6628a5
--- /dev/null
+++ b/dev/FunctionInventory.csv
@@ -0,0 +1,171 @@
+Script,Function
+LIFIT.R,ADM
+utilsAge.R,age_abridge_force
+utilsAge.R,age2ageN
+utilsAge.R,age2int
+utilsAge.R,AGEN
+AGESEX.R,ageRatioScore
+AGESEX.R,ageSexAccuracy
+AGESEX.R,ageSexAccuracyDasGupta
+OGIVE.R,agesmth1
+basepop.R,ArgsCheck
+utils.R,avg_adj
+basepop.R,basepop_five
+GRPOPYB.R,birthCohorts
+utilsAge.R,calcAgeAbr
+utilsAge.R,calcAgeN
+interp_coh.R,check_args
+check_heaping.R,check_heaping_bachi
+check_heaping.R,check_heaping_coale_li
+check_heaping.R,check_heaping_jdanov
+check_heaping.R,check_heaping_kannisto
+check_heaping.R,check_heaping_myers
+check_heaping.R,check_heaping_noumbissi
+check_heaping.R,check_heaping_roughness
+check_heaping.R,check_heaping_sawtooth
+check_heaping.R,check_heaping_spoorenberg
+check_heaping.R,check_heaping_whipple
+utils.R,dec.date
+utils_downloads.R,downloadAsfr
+utils_downloads.R,downloadnLx
+utils_downloads.R,downloadSRB
+utils_downloads.R,fetch_wpp_births
+lt_model_lq.R,find_my_case
+utils_downloads.R,get_LocID
+utils_downloads.R,get_LocName
+utils.R,getModelLifeTable
+graduate.R,graduate
+graduate.R,graduate_beers
+graduate.R,graduate_beers_expand
+graduate.R,graduate_beers_johnson
+graduate.R,graduate_grabill
+graduate.R,graduate_grabill_expand
+graduate.R,graduate_mono
+graduate.R,graduate_mono_closeout
+graduate.R,graduate_pclm
+graduate.R,graduate_sprague
+graduate.R,graduate_sprague_expand
+graduate.R,graduate_uniform
+utilsAge.R,groupAges
+utilsAge.R,groupOAG
+check_heaping.R,heapify
+IRDID.R,ID
+utilsAge.R,inferAgeIntAbr
+utilsAge.R,int2age
+utilsAge.R,int2ageN
+AGEINT.R,interp
+interp_coh.R,interp_coh
+utils_downloads.R,interp_coh_download_mortality
+interp_coh.R,interp_coh_lxMat_pxt
+interp_lc_lim.R,interp_lc_lim
+interp_lc_lim.R,interp_lc_lim_abk_m
+interp_lc_lim.R,interp_lc_lim_estimate
+interp_lc_lim_group.R,interp_lc_lim_group
+interp_lc_lim.R,interp_lc_lim_kt_min
+AGEINT.R,interpolatePop
+IRDID.R,IRD
+utilsAge.R,is_abridged
+utilsAge.R,is_age_coherent
+utilsAge.R,is_age_redundant
+utilsAge.R,is_age_sequential
+utils_downloads.R,is_Loc_available
+utilsAge.R,is_single
+utils_downloads.R,loc_message
+OGIVE.R,loess_smth1
+nAx.R,lt_a_closeout
+nAx.R,lt_a_pas
+nAx.R,lt_a_un
+interp_coh.R,lt_a2s_chunk
+lt_abridged.R,lt_abridged
+lt_regroup_age.R,lt_abridged2single
+lt_regroup_age.R,lt_ambiguous
+lt_id.R,lt_id_d_l
+lt_id.R,lt_id_d_q
+lt_id.R,lt_id_l_d
+lt_id.R,lt_id_l_q
+lt_id.R,lt_id_L_T
+lt_id.R,lt_id_lda_L
+lt_id.R,lt_id_Ll_S
+lt_id.R,lt_id_ma_q
+nAx.R,lt_id_morq_a
+nAx.R,lt_id_morq_a_greville
+lt_id.R,lt_id_q_l
+lt_id.R,lt_id_qa_m
+lt_id.R,lt_id_qm_a
+basepop.R,lt_infer_radix_from_1L0
+lt_model_lq.R,lt_model_lq
+lt_rule.R,lt_rule_1a0
+lt_rule.R,lt_rule_1a0_ak
+nAx.R,lt_rule_1a0_cd
+nAx.R,lt_rule_4a1_cd
+lt_rule.R,lt_rule_4m0_D0
+lt_rule.R,lt_rule_4m0_m0
+lt_rule.R,lt_rule_ak_m0_a0
+lt_rule.R,lt_rule_ak_q0_a0
+extra_mortality.R,lt_rule_m_extrapolate
+lt_single.R,lt_single_mx
+lt_single_qx.R,lt_single_qx
+lt_regroup_age.R,lt_single2abridged
+interp_lc_lim.R,lt_smooth_ambiguous
+lt_model_lq.R,lthat.logquad
+utils.R,ma
+MAV.R,mav
+MAV.R,mav_tails
+utilsAge.R,maxA2abridged
+mig_beta.R,mig_beta
+mig_beta.R,mig_beta_constant_child
+mig_beta.R,mig_beta_cwr
+mig_rc.R,mig_calculate_rc
+mig_rc.R,mig_estimate_rc
+mig_resid.R,mig_resid
+mig_resid.R,mig_resid_cohort
+mig_resid.R,mig_resid_dim_checker
+mig_resid.R,mig_resid_stock
+mig_resid.R,mig_resid_time
+mig_un_fam.R,mig_un_fam
+mig_resid.R,migresid_births
+mig_resid.R,migresid_bounds
+mig_resid.R,migresid_bounds_last_ageg
+mig_resid.R,migresid_net_surv
+mig_resid.R,migresid_net_surv_first_ageg
+mig_resid.R,migresid_net_surv_last_ageg
+utilsAge.R,names2age
+OPAG.R,OPAG
+OPAG.R,OPAG_fit_stable_standard
+OPAG.R,OPAG_nLx_warp_r
+OPAG.R,OPAG_r_min
+OPAG.R,OPAG_simple
+OGIVE.R,poly_smth1
+utils.R,ratx
+LIFIT.R,RDM
+utils.R,rescale_vector
+utilsAge.R,rescaleAgeGroups
+interp_coh.R,reshape_pxt
+utils.R,rlog
+interp_coh.R,rup
+AGESEX.R,sexRatioScore
+interp_coh.R,shift_census_ages_to_cohorts
+utils.R,shift.vector
+utils.R,simplify.text
+utils.R,single2abridged
+smooth_age_5.R,smooth_age_5
+smooth_age_5.R,smooth_age_5_arriaga
+smooth_age_5.R,smooth_age_5_cf
+smooth_age_5.R,smooth_age_5_feeney
+smooth_age_5.R,smooth_age_5_kkn
+smooth_age_5.R,smooth_age_5_mav
+smooth_age_5.R,smooth_age_5_strong
+smooth_age_5.R,smooth_age_5_un
+smooth_age_5.R,smooth_age_5_zigzag
+ZIGZAG.R,smooth_age_5_zigzag_inner
+ZIGZAG.R,smooth_age_5_zigzag_min
+ZIGZAG.R,smooth_age_5_zigzag_p
+SPENCER.R,spencer
+splitOscillate.R,splitOscillate
+CENSUR.R,surv10
+CENSUR.R,surv5
+CENSUR.R,survN
+CENSUR.R,survRatioError
+interp_coh.R,transform_datesout
+interp_coh.R,transform_pxt
+ZELNIK.R,zelnik
diff --git a/dev/arriaga_tidy.R b/dev/arriaga_tidy.R
new file mode 100644
index 000000000..0ae5e9263
--- /dev/null
+++ b/dev/arriaga_tidy.R
@@ -0,0 +1,164 @@
+Ages <- seq(0, 80, by = 5)
+AMales <- smooth_age_5_arriaga(Value = pop5m_pasex, Age = Ages, OAG = TRUE)
+# PAS spreadsheet result:
+Atest <- c(662761, 495126, 345744, 287629, 285919, 261018, 237469, 203277,
+ 161733, 126960, 88586, 67496, 54587, 41257, 28790, 17189, 34729)
+all(round(AMales) - Atest == 0, na.rm = TRUE)
+plot(Ages, pop5m_pasex)
+lines(as.integer(names(AMales)),AMales)
+# Before:
+smooth_age_5_arriaga <- function(Value,
+ Age,
+ OAG = TRUE) {
+
+ # these values are not used, it's just for lengths, and to make sure we
+ # end on an even 10. Technically we could even provide data in 10-year
+ # age groups and it'd still not break.
+ Value1 <- graduate_uniform(Value = Value, Age = Age, OAG = OAG)
+ Value5 <-
+ groupAges(Value1, Age = as.integer(names(Value1)), N = 5)
+ N <- length(Value5)
+ Age5 <- as.integer(names(Value5))
+
+ # would need to move this up to ensure?
+ # or in case of 85+ would we want to keep 80-84, 85+ as-is?
+ Value10 <- groupAges(Value, Age = Age, N = 10)
+
+ # what OAG is a strange digit? Then take OAG after grouping.
+ if (OAG) {
+ OAGvalue <- Value5[length(Value5)]
+ Value10[length(Value10)] <- NA
+ Value5[length(Value5)] <- NA
+ }
+
+ # again get staggered vectors
+ Value10LL <- shift.vector(Value10,-2, fill = NA)
+ Value10L <- shift.vector(Value10,-1, fill = NA)
+ Value10R <- shift.vector(Value10, 1, fill = NA)
+ Value10RR <- shift.vector(Value10, 2, fill = NA)
+
+ # alternating calc, with differences at tails
+ vevens <- (-Value10R + 11 * Value10 + 2 * Value10L) / 24
+ # tails different
+ vevens[1] <-
+ (8 * Value10[1] + 5 * Value10L[1] - Value10LL[1]) / 24
+ lastind <- which(is.na(vevens))[1]
+ vevens[lastind] <-
+ Value10[lastind] - (8 * Value10[lastind] + 5 * Value10R[lastind] - Value10RR[lastind]) / 24
+ # odds are complement
+ vodds <- Value10 - vevens
+
+ # prepare output
+ interleaf <- c(rbind(vodds, vevens))
+ # produce results vector
+ out <- Value5 * NA
+ n <- min(c(length(interleaf), N))
+ out[1:n] <- interleaf[1:n]
+
+ # if OA ends in 5, then we can save penultimate value too
+ na.i <- is.na(out)
+ out[na.i] <- Value5[na.i]
+ if (OAG) {
+ out[N] <- OAGvalue
+ }
+
+ out
+}
+library(tidyverse)
+data <- tibble(Age = Ages, Pop = pop5m_pasex)
+
+age2single <- function(Age, OAG = TRUE, AgeInt = NULL){
+ maxA <- ifelse(OAG, max(Age), max(Age) + AgeInt[which.max(Age)])
+ min(Age):maxA
+}
+
+library(collapse)
+library(data.table)
+library(tidyfast)
+smooth_age_5_arriaga_tidy <- function(data, variable, OAG = TRUE){
+
+ if (OAG){
+ OAvalue <- data |>
+ fsubset(Age == max(Age)) |>
+ pull(!!sym(variable))
+ }
+
+ data <-
+ data |>
+ rename(V = !!variable)
+ V <- data$V
+ A <- data$Age
+ V1 <- graduate_uniform(Value = V,
+ Age = Age,
+ OAG = OAG)
+ A1 <- age2single(A, OAG = OAG)
+ # force to single
+ # reframe(
+ # V = graduate_uniform(Value = V,
+ # Age = Age,
+ # OAG = OAG),
+ # Age = age2single(Age))
+ data.frame(V = V1, Age = A1) |>
+ # data |>
+ # group to 5 (innocuous if 5-year data given)
+ fmutate(Age = Age - Age %% 5,
+ V = fifelse(Age == max(Age) & OAG, NA_real_, V)) |>
+ fgroup_by(Age) |>
+ fsummarize(V = sum(V)) |>
+ fungroup() |>
+ fmutate(Age10 = Age - Age %% 10) |>
+ fmutate(V10 = sum(V), .by = Age10) |>
+ fmutate(V10LL = flag(V10,-4),
+ V10L = flag(V10,-2),
+ V10R = flag(V10, 2),
+ V10RR = flag(V10,4),
+ vevens = dt_case_when(Age10 == min(Age10) ~ (8 * V10 + 5 * V10L - V10LL) / 24,
+ Age10 == (max(Age10) - 10) ~ V10 - (8 * V10 + 5 * V10R - V10RR) / 24,
+ TRUE ~ (-V10R + 11 * V10 + 2 * V10L) / 24),
+ vodds = V10 - vevens,
+ V5out = dt_case_when(
+ Age == max(Age)~ OAvalue,
+ Age %% 10 == 0 ~ vodds,
+ Age %% 5 == 0 ~ vevens,
+ TRUE ~ V),
+ V5out = fifelse(is.na(V5out), V, V5out)) |>
+ fselect(Age, Age10, V5out) |>
+ rename(!!variable := V5out)
+
+ }
+# ---------------- #
+# test equivalency #
+# ---------------- #
+# for OAG divisible by 10
+smooth_age_5_arriaga_tidy(tibble(Age = Ages,
+ Pop = pop5m_pasex),
+ variable = "Pop",
+ OAG = TRUE)
+ mutate(V5orig = smooth_age_5_arriaga(pop5m_pasex, Ages, TRUE))
+
+
+# for OAG divisible by 5
+Age75 <- seq(0,75,by=5)
+v75 <- pop5m_pasex
+v75[16] <- sum(pop5m_pasex[16:17])
+v75 <- v75[-17]
+
+data <- tibble(Age = Age75,
+ Pop = v75)
+smooth_age_5_arriaga_tidy(data,
+ variable = "Pop",
+ OAG = TRUE) |>
+ mutate(V5orig = smooth_age_5_arriaga(v75, Age75, TRUE))
+
+# ----------------------- #
+# Compare execution speed #
+# ----------------------- #
+library(rbenchmark)
+benchmark(smooth_age_5_arriaga_tidy(data,
+ variable = "Pop",
+ OAG = TRUE)) # .522 | 0.402 if we remove reframe()
+benchmark(smooth_age_5_arriaga(v75, Age75, TRUE)) # .054, 10 times faster...
+
+# Conclusion:
+# Possibly data.table could do the same a tic faster than collapse/tidyfast,
+# but only marginally do, whereas base-powered DemoTools arithmetic is 10x faster
diff --git a/dev/build.R b/dev/build.R
index 9545ed4bb..d6e841d2e 100644
--- a/dev/build.R
+++ b/dev/build.R
@@ -18,10 +18,12 @@ shhh <- function(expr){
library(devtools)
library(TimUtils)
+library(magrittr)
+library(git2r)
#install.packages("backports")
#install.packages("roxygen2")
#install_github("hadley/devtools")
-#install_github("timriffe/TimUtils/TimUtils")
+#install_github("timriffe/TimUtils")
# do this whenever new functions are added to /R, or whenever roxygen is updated
devtools::document()
@@ -30,15 +32,28 @@ devtools::document()
devtools::build_vignettes()
# devtools::install_github("r-lib/pkgdown")
+# usethis::proj_activate(here::here())
pkgdown::build_site()
-versionIncrement(
+TimUtils::versionIncrement(
major = FALSE, # only for releases
mid = FALSE, # major functionality added
minor = TRUE, # whenever documentation renewed, any patch, tweak, or fix
maxdigits = c(2,2,3),# maybe 4 required?
README = TRUE) # update README dev version badge
+# add line to immediately commit and tag.
+
+# D <- readLines("DESCRIPTION")
+# vs <- D[grepl(D,pattern = "Version: ")] %>% gsub(pattern = "Version: ", replacement = "") %>%
+# paste0("v",.)
+# commit(message = vs,all=TRUE)
+# tag(name =vs,message = vs)
+# push(refspec = vs)
+
+# https://raw.githubusercontent.com/timriffe/DemoTools/59a0f4e50b7696c185a3c9d4e582426f88aac84f/DESCRIPTION
+
+
# run this to get access to already-written functions
shhh(load_all())
@@ -47,6 +62,9 @@ shhh(load_all())
Sys.setenv('_R_CHECK_SYSTEM_CLOCK_' = 0)
devtools::check(force_suggests = TRUE)
+
+source("version_lookup.R")
+update_lookup()
#build(pkg = "/home/tim/git/DemoTools", path = "/home/tim/Desktop")
#?devtools::build
#devtools::use_testthat("/home/tim/git/DemoTools")
diff --git a/dev/build_tests.R b/dev/build_tests.R
index b7d9d1ed4..63d848227 100644
--- a/dev/build_tests.R
+++ b/dev/build_tests.R
@@ -55,4 +55,41 @@ str(d2)
names(eliminable) <- names(d2)
N - eliminable
-unlist(d2) %>% unique()
\ No newline at end of file
+unlist(d2) %>% unique()
+
+# what file is each function in?
+
+A <- lsf.str("package:DemoTools")
+devtools::load_all()
+attr(attr(inferAgeIntAbr,"srcref"),"srcfile")
+
+
+get_file <- function(filename_no_quotes){
+ attr(attr(filename_no_quotes,"srcref"),"srcfile")
+}
+get_file(inferAgeIntAbr)
+lapply(ls("package:DemoTools"),function(x) eval() %>% is.function)
+is.function(DemoTools::`:=`)
+
+# list of functions by script
+
+scripts <- dir("R")
+files <- list()
+
+for (i in scripts){
+test.env <- new.env()
+sys.source(paste0("R/",i), envir = test.env)
+A <- lsf.str(envir=test.env)
+funs <- lapply(A,"[[",1) %>% unlist()
+files[[i]] <- funs
+rm(A)
+rm(test.env)
+}
+library(tidyverse)
+
+lengths <- lapply(files, length) %>% unlist()
+DF <- tibble(Script = rep(names(lengths), times = lengths),
+ Function = unlist(files))
+DF %>%
+ arrange(Function) %>%
+ write_csv("dev/FunctionInventory.csv")
diff --git a/dev/ediev.R b/dev/ediev.R
new file mode 100644
index 000000000..0c5d693fc
--- /dev/null
+++ b/dev/ediev.R
@@ -0,0 +1,217 @@
+smooth_age_zigzag_dd <- function(
+ Value, #population counts by single year of age
+ Age, #age index for Value: should be a continuous range of integer values (we will not check that)
+ OAG = TRUE, #is the last age group an open interval?
+ ret_ggPlot = TRUE, #if TRUE, we return a list(data=ValueOut, ggplot=myPlot) with a plot for checking results
+ #if FALSE, we return the smoothed series ValueOut
+ legendPlot = NULL #legend for the ggPlot
+)
+{
+ #Extension of Feeney (2013) "Removing Zig-Zag from Age Data" for single age data
+ #in the spirit of Dalkhat M. Ediev (2021) "A model of age heaping with applications to population graduation that retains informative demographic variation"
+ #Main differences with Feeney's algorithm is that, apart for working with single age data
+ #to suppose that age heaping can start 2 years before (and symmetrically 2 years after)
+ #and not only 1 year before and after.
+ #For example for age 40, we suppose that heaping occurs symmetrically, in the same proportion, at
+ #ages 39 and 41, as Feeney supposed for five-years age groups with adjacent age groups,
+ #but in the same spirit we suppose that
+ #heaping can occur also at ages 38 and 42 towards attracting age 40.
+ #We also suppose that heaping can only occurs for ages multiple of 5 and 10
+ if (OAG) {
+ #if the last age is an open interval, we drop it
+ AgeRangeOut <- Age[1:(length(Age)-1)]
+ }
+ #determine the ages with the variable parameters
+ #For example we start with ages 3 and 4 for heaping at age 5 and ages 6 and 7 use symmetrical values
+ #generally speaking heaping can start at ages x-2, x-1 and then symmetrically at ages x+1, x+2 towards attracting age x
+ #we suppose that the first and the last age we can treat have at least values for 2 ages before and after
+ #that means that if the first age in Age is 9 or 10, we will skip the first
+ getAgeRange <- function(AgeRangeOut) {
+ ageRangeMin <- max(5, trunc((AgeRangeOut[1] + 2) / 5) * 5)
+ if (ageRangeMin < AgeRangeOut[1] + 2) ageRangeMin <- ageRangeMin + 5
+ ageRangeMax <- trunc((AgeRangeOut[length(AgeRangeOut)] - 2) / 5) * 5
+ if (ageRangeMax > AgeRangeOut[length(AgeRangeOut)] - 2) ageRangeMax <- ageRangeMax - 5
+ return (c(ageRangeMin, ageRangeMax))
+ }
+ ageRange <- getAgeRange (AgeRangeOut)
+ ageRangeMin <- ageRange[1]
+ ageRangeMax <- ageRange[2]
+ #number of ages multiples of 5
+ nAges_mult <- trunc ((ageRangeMax - ageRangeMin) / 5) + 1
+ if (nAges_mult < 2) stop ("Age range should include at least two consecutive ages multiple of 5")
+ #initial values for the parameters
+ #if first attracting age is 5, age 3 has initial proportion of 0.1, age 4 of 0.2
+ #these two parameters will get 'optimized' by R optim
+ #the same parameters are applied, symmetrically, for age 6 (0.2) and age 7 (0.1)
+ param <- rep (c(0.1, 0.2), nAges_mult)
+ param_min <- rep(0, nAges_mult * 2)
+ param_max <- rep(9, nAges_mult * 2)
+
+ computeNewPop <- function(data, par, Age, ageRangeMin, nAges_mult, ret_Props=FALSE) {
+ if (ret_Props) props <- rep(NA, length(data))
+ for (ageIter in (1:nAges_mult)) {
+ currFirstAge <- ageRangeMin + (ageIter - 1) * 5 - 2
+ indexInData <- currFirstAge - Age[1] + 1
+ if (ret_Props) {
+ props[indexInData] <- par[(ageIter - 1) * 2 + 1]
+ props[indexInData+1] <- par[(ageIter - 1) * 2 + 2]
+ props[indexInData+3] <- par[(ageIter - 1) * 2 + 2]
+ props[indexInData+4] <- par[(ageIter - 1) * 2 + 1]
+ } else {
+ data[indexInData + 2] <- data[indexInData + 2] -
+ (data[indexInData] + data[indexInData + 4]) * par[(ageIter - 1) * 2 + 1] -
+ (data[indexInData + 1] + data[indexInData + 3]) * par[(ageIter - 1) * 2 + 2]
+ data[indexInData] <- data[indexInData] * (1 + par[(ageIter - 1) * 2 + 1])
+ data[indexInData+1] <- data[indexInData+1] * (1 + par[(ageIter - 1) * 2 + 2])
+ data[indexInData+3] <- data[indexInData+3] * (1 + par[(ageIter - 1) * 2 + 2])
+ data[indexInData+4] <- data[indexInData+4] * (1 + par[(ageIter - 1) * 2 + 1])
+ }
+ }
+
+ if (ret_Props) {
+ return (props)
+ } else {
+ return (data)
+ }
+ }
+
+ optim_fun <- function(data, par) {
+ popIter <- computeNewPop(data, par, Age, ageRangeMin, nAges_mult)
+ totPop <- sum (popIter)
+ objective_to_min <- 0
+ for (ageIter in (1:nAges_mult)) {
+ currFirstAge <- ageRangeMin + (ageIter - 1) * 5 - 2
+ indexInData <- currFirstAge - Age[1] + 1
+ term1 <- ((popIter[indexInData + 2] - (popIter[indexInData + 1] + popIter[indexInData + 3]) / 2)) / totPop
+ term2 <- ((popIter[indexInData + 2] - (popIter[indexInData] + popIter[indexInData + 4]) / 2)) / totPop
+ objective_to_min <- objective_to_min + term1^2 + term2^2
+ }
+ return (objective_to_min)
+ }
+
+ oldOptimValue <- 10^1000
+
+ for (rep in (1:10)) {
+ #apply optimization up to 10 times, using parameters obtained in the previous step, just in case...
+ parResult <- optim(data=Value, par=param, lower=param_min, upper=param_max, fn=optim_fun, method = "L-BFGS-B")
+ if (parResult$value >= oldOptimValue) break
+ param <- parResult$par
+ oldOptimValue <- parResult$value
+ }
+ valueOut <- computeNewPop(Value, parResult$par, Age, ageRangeMin, nAges_mult)
+
+ if (ret_ggPlot) {
+ library2 <- function(pack) {
+ #pack<-"tcltk2"
+ if( !(pack %in% installed.packages()))
+ {install.packages(pack)}
+ library(pack,character.only = TRUE)
+ }
+ library2("ggplot2")
+ library2("scales")
+
+ getScales <- function(Value, ageRangeMin, ageRangeMax) {
+ maxCount <- max(Value) * 1.0
+ power <- 0
+ while (maxCount > 1) {
+ maxCount <- maxCount / 10.0
+ power <- power + 1
+ }
+ maxCount <- ceiling(maxCount * 10) * 10^(power-1)
+ leftScale <- seq(0, maxCount, maxCount / 4)
+ rightScale <- seq(0, 1, 0.25)
+ xScale <- seq(ageRangeMin-5, ageRangeMax+5, 5)
+ return (list(left=leftScale, right=rightScale, x=xScale))
+ }
+
+ df <- data.frame(Age=Age, val=Value)
+ df$type <- 'Original'
+ temp <- data.frame(Age=Age, val=valueOut)
+ temp$type <- 'Corrected'
+ df <- rbind(df, temp)
+ props <- computeNewPop(Value, parResult$par, Age, ageRangeMin, nAges_mult, ret_Prop = TRUE)
+ temp <- data.frame(Age=Age, val=props)
+ temp$type <- 'Attraction'
+ #transform into proportions
+ temp$val <- temp$val / (1 + temp$val)
+ scale_axes <- getScales (Value, ageRangeMin, ageRangeMax)
+ maxY_left <- scale_axes$left[length(scale_axes$left)]
+ #adjust values in preparation for second axis (we will correct the labels later on in scale_y_continuous)
+ temp$val <- temp$val * maxY_left
+ df <- rbind(df, temp)
+ df$type <- factor (df$type, levels=c("Original", "Corrected", "Attraction"))
+ myLabelNames <- c("Original", "Corrected", "Attraction factors")
+
+ myPlot <- ggplot(df, aes(x=Age, y=val, color=type, alpha=type, size=type)) + geom_line()
+ myPlot <- myPlot + scale_y_continuous(breaks = scale_axes$left, name="population counts",
+ sec.axis = sec_axis(as.formula(paste("~./", maxY_left, sep="")), name="attraction proportions",
+ breaks=scale_axes$right),
+ labels = scales::label_number())
+ myPlot <- myPlot + scale_x_continuous(breaks = scale_axes$x)
+ myPlot <- myPlot + scale_colour_manual(values=c("red", "blue", "grey40"), labels=myLabelNames)
+ myPlot <- myPlot + scale_size_manual(values=c(1, 2, 2), labels=myLabelNames)
+ myPlot <- myPlot + scale_alpha_manual(values=c(1, 1, 0.2), labels=myLabelNames)
+ myPlot <- myPlot + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank(), legend.position="bottom")
+ myPlot <- myPlot + theme_text(1)
+ myPlot <- myPlot + labs(color="", alpha="", size="")
+ if (!is.null(legendPlot)) myPlot <- myPlot + ggtitle(legendPlot)
+
+ return (list(data=valueOut, ggplot=myPlot))
+ } else {
+ return (valueOut)
+ }
+}
+
+#data for Colombia 1973
+col1973 <- structure(list(
+ Age = 0:99,
+ Value = c(493538L, 497954L, 580000L,
+ 610204L, 609266L, 609670L, 599184L, 628352L, 624302L, 549158L,
+ 624112L, 527810L, 611266L, 541084L, 490342L, 466650L, 435310L,
+ 423912L, 444468L, 334732L, 345390L, 301686L, 319468L, 312100L,
+ 267828L, 293902L, 230690L, 230770L, 244100L, 178590L, 278284L,
+ 136552L, 201120L, 218514L, 159054L, 217800L, 156786L, 162270L,
+ 203560L, 141258L, 247758L, 94344L, 166104L, 164336L, 116024L,
+ 182858L, 104268L, 102916L, 136222L, 90550L, 180598L, 68132L,
+ 109788L, 108058L, 81438L, 116446L, 77230L, 62844L, 73906L, 49812L,
+ 136486L, 37506L, 61658L, 69702L, 43570L, 72760L, 36930L, 34102L,
+ 40052L, 24126L, 68924L, 16004L, 32988L, 40868L, 18406L, 31836L,
+ 14222L, 11824L, 16206L, 6972L, 24294L, 5016L, 7392L, 6906L, 6244L,
+ 9424L, 3806L, 3286L, 3478L, 1912L, 5260L, 916L, 1540L, 1142L,
+ 1266L, 1674L, 602L, 468L, 752L, 2360L)),
+ class = "data.frame",
+ row.names = c(NA,-100L))
+
+#Alexandropol 1897
+Alex1897 <- structure(list(
+ Age = 0:99,
+ Value = c(376.7579251, 342.1757925,
+ 398.3717579, 337.8530259, 314.0778098, 290.3025937, 355.1440922,
+ 334, 345, 360, 392, 260.0432277, 370, 307.5936599, 290.3025937,
+ 247.074928, 281.6570605, 268.6887608, 298.9481268, 164.9423631,
+ 333.5302594, 1401.253602, 1729.783862, 1643.32853, 1669.26513,
+ 700.9654179, 324.8847262, 273.0115274, 234.1066282, 104.4236311,
+ 471.8587896, 108.7463977, 186.556196, 151.9740634, 113.0691643,
+ 389.7262248, 151.9740634, 126.037464, 156.29683, 48.22766571,
+ 458.8904899, 48.22766571, 108.7463977, 56.87319885, 65.51873199,
+ 277.3342939, 82.80979827, 52.55043228, 91.45533141, 48.22766571,
+ 363.7896254, 26.61383285, 52.55043228, 39.58213256, 39.58213256,
+ 199.5244957, 56.87319885, 39.58213256, 48.22766571, 5, 285.9798271,
+ 17.96829971, 26.61383285, 17.96829971, 22.29106628, 78.4870317,
+ 22.29106628, 22.29106628, 26.61383285, 9.322766571, 121.7146974,
+ 0.677233429, 9.322766571, 5, 5, 30.93659942, 13.64553314, 9.322766571,
+ 9.322766571, 13.64553314, 35.25936599, 9.322766571, 9.322766571,
+ 5, 0.677233429, 17.96829971, 0.677233429, 5, 0.677233429, 9.322766571,
+ 17.96829971, 5, 5, 5, 5, 0.677233429, 0.677233429, 0.677233429,
+ 0.677233429, 0.677233429)),
+ class = "data.frame",
+ row.names = c(NA, -100L))
+
+#call the function for Colombia
+resCol <- smooth_age_zigzag_dd(col1973$Value, col1973$Age, OAG = TRUE, ret_ggPlot = TRUE, legendPlot="Colombia 1973")
+#show the plot for Colombia
+resCol$ggplot
+#call the function for Alexandropol
+resAlex <- smooth_age_zigzag_dd(Alex1897$Value, Alex1897$Age, OAG = TRUE, ret_ggPlot = TRUE, legendPlot="Alexandropol 1897")
+#show the plot
+resAlex$ggplot
diff --git a/docs/404.html b/docs/404.html
index b6e9684e8..3df36364e 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -79,7 +79,7 @@
DemoTools
- 01.13.20
+ 01.13.76
@@ -87,7 +87,7 @@
Your pull request will be easiest for us to read if you use a common style: http://r-pkgs.had.co.nz/r.html#style. Please pay particular attention to whitespace.
You should always add a bullet point to NEWS.md motivating the change. It should look like “This is what changed (@yourusername, #issuenumber)”. Please don’t add headings like “bug fix” or “new features” - these are added during the release process.
If you can, also write a test.
-
If you’re adding new parameters or a new function, you’ll also need to document them with roxygen2. Make sure to re-run devtools::document() on the code before submitting.
-
Also run devtools::check() to make sure your function doesn’t imply downstream errors or warnings. More such checking will be taken care of by us.
+
If you’re adding new parameters or a new function, you’ll also need to document them with roxygen2. Make sure to re-run devtools::document() on the code before submitting.
+
Also run devtools::check() to make sure your function doesn’t imply downstream errors or warnings. More such checking will be taken care of by us.
Here are tips for syncing your fork with the main repository syncing
A pull request is a process, and unless you’re a practiced contributor it’s unlikely that your pull request will be accepted as is. Typically the process looks like this:
The figure above has some clear age irregularities. For example, there is a consistent clustering around ages ending in zero and five. In principle, a post-enumeration survey or a sample interview should give more information on the reasons why these irregularities appear in the data (Siegel Jacob and Swanson David 2004). However, not every country has the resources to conduct a survey or post-enumeration process. Therefore, various arithmetic techniques have been developed for measuring heaping on individual ages, terminal digits, and age ranges. These estimate the degree of bias, but they do not correct it (Booth and Gerland 2015). The simplest way to analyze age-heaping is by assuming that the true numbers are rectangularly distributed over some age range that includes centered the age in question (Siegel Jacob and Swanson David 2004).
-
Several indices of age-heaping exist. DemoTools implements Whipple (Spoorenberg and Dutreuilh 2007), Myers, Bachi (Bachi 1951), Coale-Li (Coale and Li 1991), Noumbissi (Noumbissi 1992), Spoorenberg (Spoorenberg and Dutreuilh 2007), Jdanov (Jdanov 2008), and Kannisto (Kannisto 1999) age-heaping indices. Although the literal interpretation of these indices may vary, they tend to covary strongly when applied to the same data and age-ranges, so for making quick judgements of the degree of heaping over a large collection of data it may not be necessary to apply more than one or two. We also offer two new measures designed to test for irregularities in data in 5-year age bins.
-
Often the degree of detected heaping suggests that some smoothing procedure is warranted, but the kind of smoothing procedure may be a function of the particular way in which heaping is manifested. For example, if heaping is light, or just a matter of rounding to the nearest digit divisible by 5, then there will be no major difference between heaping on 0s versus heaping on 5s. In this case, grouping to 5-year age bins (see groupAges()) and then graduating in a constrained way (see sprague()) may suffice to remove the distortion while maintaining the broad underlying age pattern. However, if heaping is much worse on 0s than on 5s, the age pattern may still be distorted in a regular and pernicious way even after binning in 5-year age groups. In that case, it is advised to select a smoothing procedure designed for 5-year age groups (see smooth_age_5() and the vignette on smoothing) before graduating, or else some other more agressive smoothing option (see agesmth1()). The present vignette does not offer specific guidelines for such adjustments, but we do offer two new age quality indices that might be useful for deciding whether to resort to agressive smoothing: zero_pref_sawtooth() checks for and rates the mentioned jagged pattern in 5-year age groups. five_year_roughness() gives a total measure of noise for data in 5-year age groups, but does not look for a particular pattern to it. This second measure should not be used in isolation, but together with visual assessment. More details about such adjustments and decisions can be found in a second vignette on smoothing.
+
The figure above has some clear age irregularities. For example, there is a consistent clustering around ages ending in zero and five. In principle, a post-enumeration survey or a sample interview should give more information on the reasons why these irregularities appear in the data (Siegel Jacob and Swanson David 2004). However, not every country has the resources to conduct a survey or post-enumeration process. Therefore, various arithmetic techniques have been developed for measuring heaping on individual ages, terminal digits, and age ranges. These estimate the degree of bias, but they do not correct it (Booth and others 2015). The simplest way to analyze age-heaping is by assuming that the true numbers are rectangularly distributed over some age range that includes centered the age in question (Siegel Jacob and Swanson David 2004).
+
Several indices of age-heaping exist. DemoTools implements Whipple (Spoorenberg and Dutreuilh 2007), Myers, Bachi (Bachi 1951), Coale-Li (Coale and Li 1991), Noumbissi (Noumbissi 1992), Spoorenberg (Spoorenberg and Dutreuilh 2007), Jdanov (Jdanov 2008), and Kannisto (Kannisto 1999) age-heaping indices. Although the literal interpretation of these indices may vary, they tend to covary strongly when applied to the same data and age-ranges, so for making quick judgement of the degree of heaping over a large collection of data it may not be necessary to apply more than one or two. We also offer two new measures designed to test for irregularities in data in 5-year age bins.
+
Often the degree of detected heaping suggests that some smoothing procedure is warranted, but the kind of smoothing procedure may be a function of the particular way in which heaping is manifested. For example, if heaping is light, or just a matter of rounding to the nearest digit divisible by 5, then there will be no major difference between heaping on 0s versus heaping on 5s. In this case, grouping to 5-year age bins (see groupAges()) and then graduating in a constrained way (see sprague()) may suffice to remove the distortion while maintaining the broad underlying age pattern. However, if heaping is much worse on 0s than on 5s, the age pattern may still be distorted in a regular and pernicious way even after binning in 5-year age groups. In that case, it is advised to select a smoothing procedure designed for 5-year age groups (see smooth_age_5() and the vignette on smoothing) before graduating, or else some other more aggressive smoothing option (see agesmth1()). The present vignette does not offer specific guidelines for such adjustments, but we do offer two new age quality indices that might be useful for deciding whether to resort to aggressive smoothing: zero_pref_sawtooth() checks for and rates the mentioned jagged pattern in 5-year age groups. five_year_roughness() gives a total measure of noise for data in 5-year age groups, but does not look for a particular pattern to it. This second measure should not be used in isolation, but together with visual assessment. More details about such adjustments and decisions can be found in a second vignette on smoothing.
Whipple Index
@@ -188,7 +188,7 @@
Noumbissi
This method, based on the underlying principles and assumptions of the original Whipple’s index, improves by extending its basic principle to all ten digits. It compares single terminal digit numerators to denominators consisting in 5-year age groups centered on the terminal digit of age in question (Noumbissi 1992). It relies on a weaker assumption of linearity over an age range of five years rather than ten. It is based once more on the underlying principles and assumptions of the original Whipple’s index and introduces the following formulas to measure age heaping:
Values greater than 0.1 coupled with a sawtooth value greater than 0 already suggest that some smoothing is warranted. If there is no detected sawtooth pattern, then five-year-roughness indices should probably need to be higher and visually confirmed before deciding to smooth. Further guidelines can be found in the vignette on smoothing.
##References
-
-
-
Bachi, Roberto. 1951. “The Tendency to Round Off Age Returns: Measurement and Correction.” Bulletin of the International Statistical Institute 33 (4): 195–222.
+
+
+Bachi, Roberto. 1951. “The Tendency to Round Off Age Returns: Measurement and Correction.”Bulletin of the International Statistical Institute 33 (4): 195–222.
-
-
Booth, Heather, and Patrick Gerland. 2015. “Demographic Techniques: Data Adjustment and Correction.” Elsevier.
+
+Booth, Heather, and others. 2015. “Demographic Techniques, Data Adjustment and Correction.” In International Encyclopedia of the Social and Behavioral Sciences (2015), edited by James E. Wright, 2nd ed. Vol. 6. Oxford: Elsevier.
-
-
Coale, Ansley J, and Shaomin Li. 1991. “The Effect of Age Misreporting in China on the Calculation of Mortality Rates at Very High Ages.” Demography 28 (2). Springer: 293–301.
+
+Coale, Ansley J, and Shaomin Li. 1991. “The Effect of Age Misreporting in China on the Calculation of Mortality Rates at Very High Ages.”Demography 28 (2): 293–301.
-
-
Jdanov, Dmitri. 2008. “Beyond the Kannisto-Thatcher Database on Old Age Mortality: An Assessment of Data Quality at Advanced Ages.” MPIDR Working Paper WP-2008-013.
+
+Jdanov, Dmitri. 2008. “Beyond the Kannisto-Thatcher Database on Old Age Mortality: An Assessment of Data Quality at Advanced Ages.”MPIDR Working Paper WP-2008-013.
-
-
Kannisto, Väinö. 1999. “Assessing the Information on Age at Death of Old Persons in National Vital Statistics.” Validation of Exceptional Longevity”, Odense Monographs on Population Aging 6: 235–49.
+
+Kannisto, Väinö. 1999. “Assessing the Information on Age at Death of Old Persons in National Vital Statistics.”Validation of Exceptional Longevity”, Odense Monographs on Population Aging 6: 235–49.
-
-
Myers, Robert J. 1954. “Accuracy of Age Reporting in the 1950 United States Census.” Journal of the American Statistical Association 49 (268). Taylor & Francis: 826–31.
+
+Myers, Robert J. 1954. “Accuracy of Age Reporting in the 1950 United States Census.”Journal of the American Statistical Association 49 (268): 826–31.
-
-
Noumbissi, Amadou. 1992. “L’indice de Whipple Modifié: Une Application Aux Données Du Cameroun, de La Suède et de La Belgique.” Population (French Edition). JSTOR, 1038–41.
+
+Noumbissi, Amadou. 1992. “L’indice de Whipple Modifié: Une Application Aux Données Du Cameroun, de La Suède Et de La Belgique.”Population (French Edition), 1038–41.
-
-
Siegel Jacob, S, and A Swanson David, eds. 2004. The Methods and Materials of Demography. 2nd ed. San Diego, USA: Elsevier Academic Press, California, USA.
+
+Siegel Jacob, S, and A Swanson David, eds. 2004. The Methods and Materials of Demography. 2nd ed. San Diego, USA: Elsevier Academic Press, California, USA.
-
-
Spoorenberg, Thomas, and Catriona Dutreuilh. 2007. “Quality of Age Reporting: Extension and Application of the Modified Whipple’s Index.” Population 62 (4). INED: 729–41.
+
+Spoorenberg, Thomas, and Catriona Dutreuilh. 2007. “Quality of Age Reporting: Extension and Application of the Modified Whipple’s Index.”Population 62 (4): 729–41.
-
-
United Nations. 1952. “Accuracy Tests for Census Age Distributions Tabulated in Five-Year and Ten-Year Groups.” Population Bulletin, no. 2: 59–79.
+
+United Nations. 1952. “Accuracy Tests for Census Age Distributions Tabulated in Five-Year and Ten-Year Groups.”Population Bulletin, no. 2: 59–79.
-
-
———. 1955. Manual Ii: Methods of Appraisal of Quality of Basic Data for Population Estimates. 23. New York: United Nations Department of International Economic; Social Affairs.
+
+———. 1955. Manual II: Methods of Appraisal of Quality of Basic Data for Population Estimates. 23. New York: United Nations Department of International Economic; Social Affairs.
diff --git a/docs/articles/Age-heaping_quality_with_Demotools_files/figure-html/unnamed-chunk-1-1.png b/docs/articles/Age-heaping_quality_with_Demotools_files/figure-html/unnamed-chunk-1-1.png
index 655240dcc..0d5f68a81 100644
Binary files a/docs/articles/Age-heaping_quality_with_Demotools_files/figure-html/unnamed-chunk-1-1.png and b/docs/articles/Age-heaping_quality_with_Demotools_files/figure-html/unnamed-chunk-1-1.png differ
diff --git a/docs/articles/Age-heaping_quality_with_Demotools_files/figure-html/unnamed-chunk-9-1.png b/docs/articles/Age-heaping_quality_with_Demotools_files/figure-html/unnamed-chunk-9-1.png
index 5e3a1e9ca..e493a4bf0 100644
Binary files a/docs/articles/Age-heaping_quality_with_Demotools_files/figure-html/unnamed-chunk-9-1.png and b/docs/articles/Age-heaping_quality_with_Demotools_files/figure-html/unnamed-chunk-9-1.png differ
diff --git a/docs/articles/Age-heaping_quality_with_Demotools_files/header-attrs-2.10/header-attrs.js b/docs/articles/Age-heaping_quality_with_Demotools_files/header-attrs-2.10/header-attrs.js
new file mode 100644
index 000000000..dd57d92e0
--- /dev/null
+++ b/docs/articles/Age-heaping_quality_with_Demotools_files/header-attrs-2.10/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/Age-heaping_quality_with_Demotools_files/header-attrs-2.8/header-attrs.js b/docs/articles/Age-heaping_quality_with_Demotools_files/header-attrs-2.8/header-attrs.js
new file mode 100644
index 000000000..dd57d92e0
--- /dev/null
+++ b/docs/articles/Age-heaping_quality_with_Demotools_files/header-attrs-2.8/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/Age-heaping_quality_with_Demotools_files/header-attrs-2.9/header-attrs.js b/docs/articles/Age-heaping_quality_with_Demotools_files/header-attrs-2.9/header-attrs.js
new file mode 100644
index 000000000..dd57d92e0
--- /dev/null
+++ b/docs/articles/Age-heaping_quality_with_Demotools_files/header-attrs-2.9/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/case_study_1.html b/docs/articles/case_study_1.html
index 1abc68d6f..c25123bd5 100644
--- a/docs/articles/case_study_1.html
+++ b/docs/articles/case_study_1.html
@@ -38,7 +38,7 @@
DemoTools
- 01.13.20
+ 01.13.76
Here the idea is use API to read in death count and pop count data, evaluate them, adjust them, get to single ages, and make a single age lifetable. Or Abrdiged lifetable. Or both and compare. Show different closeout options too.
+
Here the idea is use API to read in death count and pop count data, evaluate them, adjust them, get to single ages, and make a single age lifetable. Or Abridged lifetable. Or both and compare. Show different closeout options too.
diff --git a/docs/articles/case_study_1_files/header-attrs-2.10/header-attrs.js b/docs/articles/case_study_1_files/header-attrs-2.10/header-attrs.js
new file mode 100644
index 000000000..dd57d92e0
--- /dev/null
+++ b/docs/articles/case_study_1_files/header-attrs-2.10/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/case_study_1_files/header-attrs-2.8/header-attrs.js b/docs/articles/case_study_1_files/header-attrs-2.8/header-attrs.js
new file mode 100644
index 000000000..dd57d92e0
--- /dev/null
+++ b/docs/articles/case_study_1_files/header-attrs-2.8/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/case_study_1_files/header-attrs-2.9/header-attrs.js b/docs/articles/case_study_1_files/header-attrs-2.9/header-attrs.js
new file mode 100644
index 000000000..dd57d92e0
--- /dev/null
+++ b/docs/articles/case_study_1_files/header-attrs-2.9/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/graduation_with_demotools.html b/docs/articles/graduation_with_demotools.html
index c176fe21b..6a257d280 100644
--- a/docs/articles/graduation_with_demotools.html
+++ b/docs/articles/graduation_with_demotools.html
@@ -38,7 +38,7 @@
DemoTools
- 01.13.20
+ 01.13.76
Graduation is a practice used to derive figures for n-year age groups, for example 5-year age groups from census data, that are corrected for net reporting error (Siegel Jacob and Swanson David 2004). The basic idea is to fit different curves to the original n-year and redistribute them into single-year values. These techniques are designed so that the sum of the interpolated single-year values is consistent with the total for the groups as a whole. Among the major graduation methods are the Sprague (Sprague 1880) and Beers (H. Beers 1945) oscilatory methods, monotone spline and the uniform distribution. More recently, the penalized composite link model (pclm) was proposed to ungroup coarsely aggregated data (Rizzi, Gampe, and Eilers 2015). All of these methodologies are implemented in DemoTools.
+
Graduation is a practice used to derive figures for n-year age groups, for example 5-year age groups from census data, that are corrected for net reporting error (Siegel Jacob and Swanson David 2004). The basic idea is to fit different curves to the original n-year and redistribute them into single-year values. These techniques are designed so that the sum of the interpolated single-year values is consistent with the total for the groups as a whole. Among the major graduation methods are the Sprague (Sprague 1880) and Beers (H. Beers 1945) oscillatory methods, monotone spline and the uniform distribution. More recently, the penalized composite link model (pclm) was proposed to ungroup coarsely aggregated data (Rizzi, Gampe, and Eilers 2015). All of these methodologies are implemented in DemoTools.
Following a similar idea, Beers interpolated two overlapping curves minimizing the squares of the differences within the interpolation range (H. Beers 1945). Specifically, Beers did this by minimizing fifth differences for a six-term formula, refered to as the ‘Ordinary’ Beers method (H. S. Beers 1945). Subsequently, the ordinay formula was modified to relax the requirement that the given value be reproduced and yield smoother interpolated results, refered to as ‘Modified’ Beers method (H. Beers 1945).
+
Following a similar idea, Beers interpolated two overlapping curves minimizing the squares of the differences within the interpolation range (H. Beers 1945). Specifically, Beers did this by minimizing fifth differences for a six-term formula, refered to as the ‘Ordinary’ Beers method (H. S. Beers 1945). Subsequently, the ordinary formula was modified to relax the requirement that the given value be reproduced and yield smoother interpolated results, referred to as ‘Modified’ Beers method (H. Beers 1945).
Graduation by a monotone spline is another option available in DemoTools. This option offers an algorithm to produce interpolants that preserve properties such as monotonicity or convexity that are present in the data (Fritsch and Carlson 1980). These are desirable conditions to not introduce biased details that cannot be ascertained from the data (Hyman 1983). To run this algorithm in DemoTools the option mono should be selected as follows:
+
Graduation by a monotone spline is another option available in DemoTools. This option offers an algorithm to produce interpolations that preserve properties such as monotonicity or convexity that are present in the data (Fritsch and Carlson 1980). These are desirable conditions to not introduce biased details that cannot be ascertained from the data (Hyman 1983). To run this algorithm in DemoTools the option mono should be selected as follows:
Beers, Henry S. 1945. “Six-Term Formulas for Routine Actuarial Interpolation.” The Record of the American Institute of Actuaries 34 (69): 59–60.
+
+
+Beers, Henry S. 1945. “Six-Term Formulas for Routine Actuarial Interpolation.”The Record of the American Institute of Actuaries 34 (69): 59–60.
-
-
Beers, HS. 1945. “Modified-Interpolation Formulas That Minimize Fourth Differences.” Record of the American Institute of Actuaries 34 (69): 14–20.
+
+Beers, HS. 1945. “Modified-Interpolation Formulas That Minimize Fourth Differences.”Record of the American Institute of Actuaries 34 (69): 14–20.
-
-
Brass, William. 1960. “The Graduation of Fertility Distributions by Polynomial Functions.” Population Studies 14 (2). Taylor & Francis: 148–62.
+
+Brass, William. 1960. “The Graduation of Fertility Distributions by Polynomial Functions.”Population Studies 14 (2): 148–62.
-
-
Fritsch, Frederick N, and Ralph E Carlson. 1980. “Monotone Piecewise Cubic Interpolation.” SIAM Journal on Numerical Analysis 17 (2). SIAM: 238–46.
+
+Fritsch, Frederick N, and Ralph E Carlson. 1980. “Monotone Piecewise Cubic Interpolation.”SIAM Journal on Numerical Analysis 17 (2): 238–46.
-
-
Hyman, James M. 1983. “Accurate Monotonicity Preserving Cubic Interpolation.” SIAM Journal on Scientific and Statistical Computing 4 (4). SIAM: 645–54.
+
+Hyman, James M. 1983. “Accurate Monotonicity Preserving Cubic Interpolation.”SIAM Journal on Scientific and Statistical Computing 4 (4): 645–54.
-
-
Pascariu, Marius D, Maciej J Dańko, Jonas Schöley, and Silvia Rizzi. 2018. “Ungroup: An R Package for Efficient Estimation of Smooth Distributions from Coarsely Binned Data.” Age 10: 0.
+
+Pascariu, Marius D, Maciej J Dańko, Jonas Schöley, and Silvia Rizzi. 2018. “Ungroup: An r Package for Efficient Estimation of Smooth Distributions from Coarsely Binned Data.”Age 10: 0.
-
-
Rizzi, Silvia, Jutta Gampe, and Paul HC Eilers. 2015. “Efficient Estimation of Smooth Distributions from Coarsely Grouped Data.” American Journal of Epidemiology 182 (2). Oxford University Press: 138–47.
+
+Rizzi, Silvia, Jutta Gampe, and Paul HC Eilers. 2015. “Efficient Estimation of Smooth Distributions from Coarsely Grouped Data.”American Journal of Epidemiology 182 (2): 138–47.
-
-
Siegel Jacob, S, and A Swanson David, eds. 2004. The Methods and Materials of Demography. 2nd ed. San Diego, USA: Elsevier Academic Press, California, USA.
+
+Siegel Jacob, S, and A Swanson David, eds. 2004. The Methods and Materials of Demography. 2nd ed. San Diego, USA: Elsevier Academic Press, California, USA.
-
-
Sprague, Thomas Bond. 1880. “Explanation of a New Formula for Interpolation.” Journal of the Institute of Actuaries 22 (4). Cambridge University Press: 270–85.
+
+Sprague, Thomas Bond. 1880. “Explanation of a New Formula for Interpolation.”Journal of the Institute of Actuaries 22 (4): 270–85.
diff --git a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-1-1.png b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-1-1.png
index 28e7f61ca..ea5507110 100644
Binary files a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-1-1.png and b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-1-1.png differ
diff --git a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-2-1.png b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-2-1.png
index 1c4fc9d02..a397ee0a5 100644
Binary files a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-2-1.png and b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-2-1.png differ
diff --git a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-3-1.png b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-3-1.png
index 1f9c33c13..e2699d582 100644
Binary files a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-3-1.png and b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-3-1.png differ
diff --git a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-4-1.png
index 9ddd4a2ce..501ec6396 100644
Binary files a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-4-1.png differ
diff --git a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-5-1.png
index 5dee34362..c065fb723 100644
Binary files a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-5-1.png differ
diff --git a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-6-1.png
index 2d7c6f8f7..f3511b8ea 100644
Binary files a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-6-1.png differ
diff --git a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-7-1.png
index 08fe01b01..a1c641a80 100644
Binary files a/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/graduation_with_demotools_files/figure-html/unnamed-chunk-7-1.png differ
diff --git a/docs/articles/graduation_with_demotools_files/header-attrs-2.10/header-attrs.js b/docs/articles/graduation_with_demotools_files/header-attrs-2.10/header-attrs.js
new file mode 100644
index 000000000..dd57d92e0
--- /dev/null
+++ b/docs/articles/graduation_with_demotools_files/header-attrs-2.10/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/graduation_with_demotools_files/header-attrs-2.8/header-attrs.js b/docs/articles/graduation_with_demotools_files/header-attrs-2.8/header-attrs.js
new file mode 100644
index 000000000..dd57d92e0
--- /dev/null
+++ b/docs/articles/graduation_with_demotools_files/header-attrs-2.8/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/graduation_with_demotools_files/header-attrs-2.9/header-attrs.js b/docs/articles/graduation_with_demotools_files/header-attrs-2.9/header-attrs.js
new file mode 100644
index 000000000..dd57d92e0
--- /dev/null
+++ b/docs/articles/graduation_with_demotools_files/header-attrs-2.9/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/index.html b/docs/articles/index.html
index 6ae677515..b3a456073 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -79,7 +79,7 @@
DemoTools
- 01.13.20
+ 01.13.76
This can be used as an external check of population counts
+in older ages, assuming the stable population standard is representative enough, or it can be used to redistribute population in ages above a
+specified ages Redistribute_from. This is handy, for instance, for
+ensuring all censuses extend to a specified maximum age (e.g. 100+)
+prior to intercensal interpolations. The assumption is that, at least in
+ages including Age_fit and higher ages, the population should follow
+a stable pattern proportional to a given survival curve subject to
+constant growth, r.
integer vector of the lower bounds of the population age groups
+
+
+
nLx
+
numeric vector of stationary population age structure in arbitrary integer age groups
+
+
+
Age_nLx
+
integer vector of lower bounds of age groups of nLx
+
+
+
Age_fit
+
integer vector of lower bounds for age groups of Pop_fit
+
+
+
AgeInt_fit
+
integer vector of widths of age groups of Pop_fit
+
+
+
Redistribute_from
+
integer lower age bound that forms the cutoff, above which we redistribute counts using the stable standard.
+
+
+
OAnew
+
integer. Desired open age group in the output (must being element of Age_nLx)
+
+
+
method
+
character, graduation method used for intermediate graduation. Default "mono". Other reasonable choices include "pclm" or "uniform".
+
+
+
+
Details
+
+
It may be helpful to try more than one fitting possibility,
+and more than one Redistribute_from cut point, as results may vary.
+
Redistribute_from can be lower than your current open age group,
+and OAnew can be higher, as long as it is within the range of Age_nLx.
+If Age_nLx doesn't go high enough for your needs, you can extrapolate
+it ahead of time. For this, you'd want the nMx the underlie it, and you
+can use lt_abridged(), specifying a higher open age, and then
+extracting nLx again from it.
+
+
Examples
+
# India Males, 1971
+Pop<-smooth_age_5(pop1m_ind,
+ Age =0:100,
+ method ="Arriaga")
+Age_Pop<-names2age(Pop)
+AgeInt_Pop<-age2int(Age_Pop, OAvalue =1)
+
+nLx<-downloadnLx(NULL, "India","male",1971)
+
#> Downloading nLx data for India, years 1971, gender male
The stationary standard, nLx is transformed into a stable standard by optimizing a growth rate, r such that the stable standard matches observed population counts in selected age groups. Usually the ages used for fitting are wide age groups in older ages preceding the open age group. The standard output by this function is used by OPAG to create the standard used to redistribute counts over older age groups up to a specified open age group, such as 100.
numeric vector of at least two population counts to use for fitting
+
+
+
Age_fit
+
integer vector of lower bounds for age groups of Pop_fit
+
+
+
AgeInt_fit
+
integer vector of widths of age groups of Pop_fit
+
+
+
Lx1
+
numeric vector of stable population standard by single ages
+
+
+
Age_Lx1
+
integer vector of lower bounds for age groups of Lx1
+
+
+
+
Value
+
+
list constaining
+
Standard numeric vector, the transformed nLx to be used for
+redistribution in OPAG()
+
r_opt the output of optimize(), where min is the growth parameter, r
+
+
+
Details
+
+
The argument method don't have much leverage on the result. In short, the stable population transformation is done by ungrouping nLx to single ages (if it isn't already), and method controls which graduation method is used for this, where "uniform", "mono", "pclm" are the reasonable choices at this writing.
We take nLx as indicative of a stationary population age structure,
+then subject the population structure to long-term growth by a constant rate, r.
+
+
+
OPAG_nLx_warp_r(Lx1, Age_Lx1, r)
+
+
Arguments
+
+
+
+
Lx1
+
numeric vector of stationary population age structure in arbitrary integer age groups
+
+
+
Age_Lx1
+
integer vector of lower bounds of age groups of nLx
+
+
+
r
+
stable growth rate
+
+
+
+
Value
+
+
numeric vector of the transformed nLx. Note, this vector sums to 1.
+
Details
+
+
Lx1 could be any population structure of any scale, as long as you're comfortable
+assuming it's stationary and can be warped into stable. For the oldest ages, this is probably
+quite often an acceptable and useful approximation. The transformation is applied at the single-age scale, even if the input nLx is in wider (e.g. abridged) age groups. When needed, we reduce to single ages using (default) graduate_uniform(), then apply the transformation, then group back. This is innocuous if nLx is given in single ages. You may want to change method to "mono" or "pclm".
For a given set of age groups to fit against, and a given stable growth rate, $r$,
+what is the error implied given the current $r$ and stationary standard?
integer vector of lower bounds for age groups of Pop_fit
+
+
+
Pop_fit
+
numeric vector of at least two population counts to use for fitting
+
+
+
AgeInt_fit
+
integer vector of widths of age groups of Pop_fit
+
+
+
Lx1
+
numeric vector of stable population standard by single ages
+
+
+
Age_Lx1
+
integer vector of lower bounds for age groups of Lx1
+
+
+
+
Value
+
+
numeric. A residual that you're presumably trying to minimize.
+
Details
+
+
This is a utility function for OPAG(), which needs to optimize $r$ for a
+given population vector and stationary standard.
+
+
Examples
+
# Make up some population data to fit to:
+Pop_fit<-c(85000,37000)
+Age_fit<-c(70,80)
+AgeInt_fit<-c(10,10)
+nLx<-downloadnLx(NULL, "Spain","female",1971)
+
#> Downloading nLx data for Spain, years 1971, gender female
+
+
+
+
+
+
+
+
diff --git a/docs/reference/OPAG_simple.html b/docs/reference/OPAG_simple.html
index 9f63a1512..986258528 100644
--- a/docs/reference/OPAG_simple.html
+++ b/docs/reference/OPAG_simple.html
@@ -6,7 +6,7 @@
-redistripute an open age group count over higher ages proportional to an arbitrary standard — OPAG_simple • DemoTools
+redistribute an open age group count over higher ages proportional to an arbitrary standard — OPAG_simple • DemoTools
@@ -46,7 +46,7 @@
-
+
@@ -80,7 +80,7 @@
DemoTools
- 01.13.20
+ 01.13.76
Details
It is also assumed that the final age group is open, unless ageMax < max(Age).
Setting OAG = FALSE will override this and potentially include max(Age) in calculations.
The method argument determines the weighting of numerators and denominators, where the UN method puts
-twice the numerator over the sum of the adjacent ages classes, Zelnich does thrice the
+twice the numerator over the sum of the adjacent ages classes, Zelnik does thrice the
numerator over the sum of the whole range from the next lowest to the next highest age, and
Ramachandran does four times the numerator over the same sum, but with the central age
double-counted in the numerator.
@@ -229,7 +229,7 @@
This is a robustness utility, in place to avoid annoying hang-ups in LTAbr(). If data are given in non-standard ages, they are forced to standard abrdiged ages on the fly. Really this should happen prior to calling lt_abridged()
+
This is a robustness utility, in place to avoid annoying hang-ups in LTAbr(). If data are given in non-standard ages, they are forced to standard abridged ages on the fly. Really this should happen prior to calling lt_abridged()
LOESS (locally weighted smoothing) helps to smooth data over age, preserving the open age group if necessary.
It is a popular tool to create a smooth line through a timeplot or scatter plot.
Loess smoothness may be tweaked by specifying an optional "span" argument.
-Polynomial fitting is used to mooth data over age or time fitting linear models.
+Polynomial fitting is used to smooth data over age or time fitting linear models.
It can be tweaked by changing the degree and by either log or power transforming.
The open age group can be kept as-is if desired by specifying OAG = TRUE.
May be used on any age groups, including irregularly spaced, single age, or 5-year age groups.
diff --git a/docs/reference/asfr_mat_five.html b/docs/reference/asfr_mat_five.html
new file mode 100644
index 000000000..f02703456
--- /dev/null
+++ b/docs/reference/asfr_mat_five.html
@@ -0,0 +1,204 @@
+
+
+
+
The reference year for which the reported population pertain
+(these are the population counts in Females_five and
+Males_five). Can either be a decimal date, a Date class.
+If nLxDatesIn or AsfrDatesIn are not supplied and the
+corresponding nLxFemale/Male/AsfrMat is not supplied,
+refDate must be at a minimum 1962.5. This is because we can only
+fetch WPP data from 1955 onwards, and these minimum date is assumed to be
+7.5 years before refDate, meaning 1955.
+
+
+
Age
+
integer vector of lower bounds of abridged age groups given in Females_five and Males_five.
+
+
+
Females_five
+
A named numeric vector with the population counts for
+five-year abridged age groups for females in refDate. The names of the
+vector should reflect the age groups. See the example section for some
+examples.
+
+
+
Males_five
+
A named numeric vector with the population counts for
+five-year abridged age groups for males in refDate. The names of
+the vector should reflect the age groups. See the example section for
+some examples.
+
+
+
nLxFemale
+
A numeric matrix. The female nLx function of two abridged life tables
+with ages in the rows and time in columns. The earlier date should be at least
+7.5 years before the reference date of the "reported" population. The later
+date should be no earlier than one-half year before the reference date of
+the "reported" population. If not provided, it's automatically downloaded if
+location, refDate and the equivalent population counts
+*_five are provided.
+
+
+
nLxMale
+
A numeric matrix. The male nLx function of two abridged life tables
+with ages in the rows and time in columns. The dates which are represented
+in the columns are assumed to be the same as nLxDatesIn. This
+argument is only used when female is set to FALSE and
+Males_five is provided. If Males_five is provided and
+female set to FALSE, the nLx for males is
+automatically downloaded for the dates in nLxDatesIn.
+
+
+
nLxDatesIn
+
A vector of numeric years (for example, 1986). The dates
+which pertain to the columns in nLxFemale and nLxMale. If not
+provided, the function automatically determines two dates which are 8 years
+before refDate and 0.5 years after refDate.
+
+
+
AsfrMat
+
A numeric matrix. An age-period matrix of age specific
+fertility rates with age in rows, time in columns. If not provided, the
+function automatically downloads the ASFR matrix based on the dates in
+AsfrDatesIn.
+
+
+
AsfrDatesIn
+
A vector of numeric years (for example, 1986). These are
+the dates which pertain to the columns in AsfrMat. If not provided,
+the function automatically determines two dates which are 8 years before
+refDate and 0.5 before refDate.
+
+
+
...
+
Arguments passed to \link{interp}. In particular, users
+might be interested in changing the interpolation method for the nLx*
+matrices and the Asfr matrix. By default, it's linearly interpolated.
+
+
+
SRB
+
A numeric. Sex ratio at birth (males / females). Default is set
+to 1.046. Only a maximum of three values permitted.
+
+
+
SRBDatesIn
+
A vector of numeric years (for example, 1986). Only a maximum
+number of three dates allowed. These are
+the dates which pertain to the values in SRB. If not provided,
+the function automatically determines three dates which are 7.5 years,
+2.5 and 0.5 years before refDate.
+
+
+
radix
+
starting point to use in the adjustment of the three first age
+groups. Default is NULL. If not provided, it is inferred based on the scale of age 1L0.
+
+
+
verbose
+
when downloading new data, should the function print details
+about the download at each step? Defaults to TRUE. We recommend the
+user to set this to TRUE at all times because the function needs to
+make decisions (such as picking the dates for the Asfr and nLx) that the user
+should be aware of.
+
+
+
+
Value
+
+
basepop_five returns a list with the following elements:
+*
+
Females_adjusted numeric vector of adjusted population counts for females. Age groups 0, 1-4, and 5-9 are adjusted, while ages 10 and higher are unchanged.
+
Males_adjusted numeric vector of adjusted population counts for males. Age groups 0, 1-4, and 5-9 are adjusted, while ages 10 and higher are unchanged.
+
Females_five numeric vector of female population counts given as input.
+
Males_five numeric vector of male population counts given as input.
+
nLxf numeric matrix of female nLx, abridged ages in rows and (potentially interpolated) time in columns. Potentially downloaded.
+
nLxm numeric matrix of male nLx, abridged ages in rows and (potentially interpolated) time in columns. Potentially downloaded.
+
Asfr numeric matrix of age specific fertility in 5-year age groups ages 15-19 until 45-49 in rows, and (potentially interpolated) time in columns. Potentially downloaded.
+
Exposure_female numeric matrix of approximated age-specific exposure in 5-year age groups ages 15-19 until 45-49 in rows, and (potentially interpolated) time in columns.
+
Bt births at three time points prior to census corresponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9.
+
SRB sex ratio at birth at three time points prior to census corresponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9. Potentially downloaded.
+
Age age groups of the input population counts.
+
+
+
Details
+
+
basepop_five and basepop_single can estimate both the BPA and
+BPE methods. If the user specifies SmoothedFemales, both
+basepop_* functions will return the BPA method.
+If SmoothedFemales is left empty, both basepop_* functions will
+adjust using the BPE method.
+
For basepop_five, adjusting the female population counts is the
+default. For this, only the location, refDate and
+Females_five are needed. All other arguments are downloaded
+or set to sensible defaults. For adjusting the male population
+counts, the user needs to specify the Males_five population
+counts and set female = FALSE.
+
Currently, basepop_five works only with five year abridged age groups
+
The BPE method is used by default. To adjust the counts using
+the BPA method, the user needs to provide the SmoothedFemales
+argument. This is the female population counts passed through
+a smoothing function such as smooth_age_5. See the examples
+section for some examples.
+
BPA
+
+
+
+
+
Description:
+
The method estimates a smoothed population ages 10 and over and adjusts
+the population under age 10 using the smoothed population and estimates
+of fertility and mortality.
+
Based on the smoothed female population counts, it rejuvenates the female
+"reported" population 20 to 59 years of age for the two 5 year periods prior
+to the census date to represent the female population in reproductive ages
+5 and 10 years earlier. Based on the rejuvenated population and fertility
+and mortality levels, the method then estimates the male and female births
+during the two 5 year periods prior to the census date. Next, it projects
+the two 5-year birth cohorts to the census date. The projected figures
+represent the adjusted population ages 0 to 4 years and 5 to 9 years
+at the census date.
+
Advantages:
+
(1) The method adjusts under-10 population to be consistent with fertility
+and mortality levels and adjusted adult female population.
+
Limitations:
+
(1) BPA assumes a linear change in fertility and mortality during the decade
+prior to the reference year.
+
(2) The procedure ignores migration, which can lead to misleading results.
+There are two issues. First, age groups 0-4 and 5-9 are subject to migration,
+which would affect the comparability of estimated and reported populations
+in the base year. Second, the estimated size of age groups 0-4 and 5-9 are
+calculated from numbers of women of reproductive age in the base year
+rejuvenated to points in the past. With migration, rejuvenated number of
+women may exceed or be smaller than the number present, and giving
+birth to children, in the decade prior to the base year.
+
(3) BPA’s smoothing calculations may mask unusual, but real, variations
+in population age group size. Smoothing irregularities in age structure
+not attributable to age misreporting will distort estimated births and
+survived children in the base year.
+
Assumptions:
+
(1) No significant international migration took place within the
+reference periods for the population, mortality, and fertility input.
+
(2) The data input as the "reported" population is not affected by
+underenumeration of persons in certain ages, nor by age misreporting.
+
BPE
+
+
+
+
+
Description:
+
The method adjusts the population under age 10 using the reported population
+ages 10 and above and estimates of fertility and mortality.
+
The method rejuvenates the reported female population 20 to 59 years of age
+for the two 5 year periods prior to the census date to represent the female
+population in reproductive ages 5 and 10 years earlier. Based on the
+rejuvenated population and fertility and mortality levels, the method then
+estimates the male and female births during the two 5 year periods prior to
+the census date. Next, it projects the two 5-year birth cohorts to the
+census date. The projected figures represent the adjusted population ages
+0 to 4 years and 5 to 9 years at the census date.
+
Advantages:
+
(1) The method adjusts the under-10 population to be consistent with
+fertility and mortality levels and adult female population.
+
Limitations:
+
(1) BPE assumes a linear change in fertility and mortality during the decade
+prior to the reference year.
+
(2) The procedure ignores migration, which can lead to misleading results.
+There are two issues. First, age groups 0-4 and 5-9 are subject to
+migration, which would affect the comparability of estimated and reported
+populations in the base year. Second, the estimated size of age groups
+0-4 and 5-9 are calculated from numbers of women of reproductive age in
+the base year rejuvenated to points in the past. With migration, rejuvenated
+number of women may exceed or be smaller than the number present, and
+giving birth to children, in the decade prior to the base year.
+
(3) The method does not adjust for possible underenumeration and age
+misreporting errors in the over-10 “reported” population. If the
+reported population is subject to age-misreporting or age-sex-specific
+underenumeration, the over-10 population should be smoothed or otherwise
+corrected prior to use.
+
Assumptions:
+
(1) No significant international migration took place within the reference
+periods for the population, mortality, and fertility input.
+
(2) The data input as the “reported” population is not affected by
+underenumeration of persons in certain ages, nor by age misreporting.
+
References
+
+
Arriaga EE, Johnson PD, Jamison E (1994).
+Population analysis with microcomputers, volume 1.
+Bureau of the Census.
+
+United States Census Bureau (2017).
+“Population Analysis System (PAS) Software.”
+https://www.census.gov/data/software/pas.html, https://www.census.gov/data/software/pas.html.
+
+
Examples
+
+ if(FALSE){
+
+################ BPE (five year age groups) #####################
+
+# Grab population counts for females
+refDate<-1986
+location<-"Brazil"
+pop_female_single<-fertestr::FetchPopWpp2019(location,
+ refDate,
+ ages =0:100,
+ sex ="female")
+pop_female_counts<-single2abridged(setNames(pop_female_single$pop,
+ pop_female_single$ages))
+pop_male_single<-fertestr::FetchPopWpp2019(location,
+ refDate,
+ ages =0:100,
+ sex ="male")
+pop_male_counts<-single2abridged(setNames(pop_male_single$pop,
+ pop_male_single$ages))
+Age<-names2age(pop_male_counts)
+# Automatically downloads the nLx, ASFR, and SRB data
+bpe<-basepop_five(
+ location =location,
+ refDate =refDate,
+ Females_five =pop_female_counts,
+ Males_five =pop_male_counts,
+ Age =Age
+)
+
+# The counts for the first three age groups have been adjusted:
+bpe$Females_adjusted[1:3]
+pop_female_counts[1:3]
+
+bpe$Males_adjusted[1:3]
+pop_male_counts[1:3]
+
+
+################ BPE (for single ages) ############################
+# blocked out for now, until single age function refactored as
+# TR: actually, it just needs to be rethought for single ages..
+# pop_female_single <- setNames(pop_female_single$pop, pop_female_single$ages)
+#
+# # Automatically downloads the nLx and ASFR data
+# bpe_female <- basepop_single(
+# location = location,
+# refDate = refDate,
+# Females_single = pop_female_single
+# )
+#
+# # The counts for the first 10 age groups have been adjusted:
+# bpe_female[1:10]
+# pop_female_single[1:10]
+################ BPA (five year age groups) #####################
+# for BPA, smooth counts in advance
+smoothed_females<-smooth_age_5(Value =pop_female_counts,
+ Age =Age,
+ method ="Arriaga",
+ OAG =TRUE,
+ young.tail ="Original")
+# Note, smooth_age_5() will group infants into the 0-4 age group. So,
+# we manually stick them back in place.
+smoothed_females<-c(pop_female_counts[1:2], smoothed_females[-1])
+smoothed_males<-smooth_age_5(Value =pop_male_counts,
+ Age =Age,
+ method ="Arriaga",
+ OAG =TRUE,
+ young.tail ="Original")
+smoothed_males<-c(smoothed_males[1:2], smoothed_males[-1])
+
+# Automatically downloads the nLx, ASFR, and SRB data
+bpa<-basepop_five(
+ location =location,
+ refDate =refDate,
+ Females_five =smoothed_females,
+ Males_five =smoothed_males,
+ Age =Age
+)
+
+# The counts for the first three age groups have been adjusted:
+bpa$Females_adjusted[1:3]
+smoothed_females[1:3]
+pop_female_counts[1:3]
+
+bpa$Males_adjusted[1:3]
+smoothed_males[1:3]
+pop_male_counts[1:3]
+
+################ PAS example ###############################
+
+ # (1) refDate
+ refDate<-1986.21
+
+ # (2) Reported population by 5-year age groups and sex in the base year
+ # (Include unknowns).
+
+ pop_male_counts<-c(11684, 46738, 55639, 37514, 29398, 27187, 27770, 20920, 16973,
+ 14999, 11330, 10415, 6164, 7330, 3882, 3882, 1840, 4200)
+
+ pop_female_counts<-c(11673, 46693, 55812, 35268, 33672, 31352, 33038, 24029, 16120,
+ 14679, 8831, 9289, 4172, 6174, 2715, 3344, 1455, 4143)
+ Age<-c(0,1, seq(5, 80, by =5))
+
+ # (4) Sex ratio at birth (m/f)
+ sex_ratio<-1.0300
+
+ # (6) The male and female nLx functions for ages under 1 year, 1 to 4 years, and 5 to 9
+ # years, pertaining to an earlier and later date
+ nLxDatesIn<-c(1977.31, 1986.50)
+
+ nLxMale<-matrix(c(87732, 304435, 361064, 88451, 310605, 370362),
+ nrow =3, ncol =2)
+
+ nLxFemale<-matrix(c(89842, 314521, 372681, 353053, 340650, 326588,
+ 311481, 295396, 278646, 261260, 241395,217419,
+ 90478, 320755, 382531, 364776, 353538, 340687,
+ 326701, 311573, 295501, 278494, 258748,234587),
+ nrow =12,
+ ncol =2)
+
+ # (7) A set of age-specific fertility rates pertaining to an earlier and later
+ # date
+
+ asfrmat<-structure(
+ c(0.2, 0.3, 0.3, 0.25, 0.2, 0.15, 0.05, 0.15, 0.2,
+ 0.275, 0.225, 0.175, 0.125, 0.05), .Dim =c(7L, 2L),
+ .Dimnames =list(
+ c("15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49"),
+ c("1977.81", "1985.71")))
+
+ # for BPA, smooth counts in advance
+ smoothed_females<-smooth_age_5(Value =pop_female_counts,
+ Age =Age,
+ method ="Arriaga",
+ OAG =TRUE,
+ young.tail ="Original")
+ smoothed_females<-c(pop_female_counts[1:2], smoothed_females[-1])
+ smoothed_males<-smooth_age_5(Value =pop_male_counts,
+ Age =Age,
+ method ="Arriaga",
+ OAG =TRUE,
+ young.tail ="Original")
+ smoothed_males<-c(pop_male_counts[1:2], smoothed_males[-1])
+ ## This is the only number that messes up the whole calculation.
+ ## smooth_age_5 returns the same result as the PASS excel sheet
+ ## except for the age groups 10-15 and 15-19. Here we only use
+ ## age group 15-19. If we plug in manually the correct value,
+ ## we get all results match exactly, otherwise there are
+ ## some differences.
+ smoothed_females[4]<-34721
+
+ # For adjusting using BPA for males, we need to specify
+ # female = FALSE with Males and nLxMale.
+ bpa<-
+ basepop_five(
+ refDate =refDate,
+ Males_five =smoothed_males,
+ Females_five =smoothed_females,
+ Age =Age,
+ SRB =sex_ratio,
+ nLxFemale =nLxFemale,
+ nLxMale =nLxMale,
+ nLxDatesIn =nLxDatesIn,
+ AsfrMat =asfrmat,
+ AsfrDatesIn =AsfrDatesIn,
+ radix =1e5
+ )
+
+ # See adjustments?
+ pop_male_counts[1:3]
+ bpa$Male_adjusted[1:3]
+
+ pop_female_counts[1:3]
+ bpa$Female_adjusted[1:3]
+
+ # For adjustment using BPE, we use exactly the same definitions as above
+ # but use the original inputs
+
+ bpe<-
+ basepop_five(
+ refDate =refDate,
+ Females_five =pop_female_counts,
+ Males_five =pop_male_counts,
+ SRB =sex_ratio,
+ nLxFemale =nLxFemale,
+ nLxDatesIn =nLxDatesIn,
+ AsfrMat =asfrmat,
+ AsfrDatesIn =AsfrDatesIn
+ )
+
+ pop_female_counts[1:3]
+ bpe$Females_adjusted[1:3]
+
+# basepop_single for single ages
+# Single ages for males and females
+
+# pop_male_counts <-
+# c(11684, 11473, 11647, 11939, 11680, 10600, 11100, 11157, 11238,
+# 11544, 7216, 7407, 7461, 7656, 7774, 5709, 5629, 5745, 6056,
+# 6259, 5303, 5423, 5497, 5547, 5417, 5441, 5466, 5500, 5668, 5694,
+# 4365, 4252, 4122, 4142, 4039, 3210, 3222, 3258, 3413, 3871, 2684,
+# 2844, 3052, 3182, 3237, 2263, 2298, 2318, 2257, 2194, 2231, 2172,
+# 2072, 2008, 1932, 1301, 1262, 1213, 1197, 1191, 1601, 1593, 1490,
+# 1348, 1299, 568, 745, 843, 801, 925, 806, 883, 796, 725, 672,
+# 470, 441, 340, 300, 289, 4200)
+#
+# pop_female_counts <-
+# c(11673, 11474, 11670, 11934, 11614, 10603, 11144, 11179, 11269,
+# 11617, 6772, 6948, 7030, 7211, 7306, 6531, 6443, 6535, 6951,
+# 7213, 6096, 6234, 6327, 6410, 6285, 6464, 6492, 6549, 6739, 6795,
+# 5013, 4888, 4735, 4747, 4646, 3040, 3068, 3107, 3246, 3658, 2650,
+# 2788, 2977, 3108, 3156, 1756, 1784, 1802, 1764, 1724, 1982, 1935,
+# 1846, 1795, 1731, 863, 850, 825, 819, 816, 1348, 1342, 1246,
+# 1138, 1101, 391, 520, 585, 560, 659, 670, 750, 686, 634, 604,
+# 353, 340, 270, 246, 247, 4143)
+# Age <- 0:80
+#
+# smoothed_females <- smooth_age_5(Value = pop_female_counts,
+# Age = Age,
+# method = "Arriaga",
+# OAG = TRUE,
+# young.tail = "Original")
+# smoothed_males <- smooth_age_5(Value = pop_male_counts,
+# Age = Age,
+# method = "Arriaga",
+# OAG = TRUE,
+# young.tail = "Original")
+
+ # For adjusting using BPA for males, we need to specify
+ # female = FALSE with Males and nLxMale.
+
+ # This needs work still
+ # bpa_male <-
+ # basepop_single(
+ # refDate = refDate,
+ # Males_single = pop_male_counts,
+ # Females_single = pop_female_counts,
+ # SRB = sex_ratio,
+ # nLxFemale = nLxFemale,
+ # nLxMale = nLxMale,
+ # nLxDatesIn = nLxDatesIn,
+ # AsfrMat = asfrmat,
+ # AsfrDatesIn = AsfrDatesIn
+ # )
+
+ # See adjustments?
+ # pop_male_counts[1:10]
+ # bpa_male[1:10]
+
+ # Adjusting the BPA for females requires less arguments
+ # bpa_female <-
+ # basepop_single(
+ # refDate = refDate,
+ # Females_single = pop_female_counts,
+ # SmoothedFemales = smoothed_females,
+ # SRB = sex_ratio,
+ # nLxFemale = nLxFemale,
+ # nLxDatesIn = nLxDatesIn,
+ # AsfrMat = asfrmat,
+ # AsfrDatesIn = AsfrDatesIn
+ # )
+
+ # pop_female_counts[1:10]
+ # bpa_female[1:10]
+ #
+ # # For adjustment using BPE, we use exactly the same definitions as above
+ # # but remove SmoothedFemales.
+ # bpe_male <-
+ # basepop_single(
+ # refDate = refDate,
+ # Males_single = pop_male_counts,
+ # Females_single = pop_female_counts,
+ # SRB = sex_ratio,
+ # nLxFemale = nLxFemale,
+ # nLxMale = nLxMale,
+ # nLxDatesIn = nLxDatesIn,
+ # AsfrMat = asfrmat,
+ # AsfrDatesIn = AsfrDatesIn,
+ # female = FALSE
+ # )
+
+ # See adjustments?
+ # pop_male_counts[1:10]
+ # bpa_male[1:10]
+ # bpe_male[1:10]
+
+ # Adjusting the BPA for females requires less arguments
+ # bpe_female <-
+ # basepop_single(
+ # refDate = refDate,
+ # Females_single = pop_female_counts,
+ # SRB = sex_ratio,
+ # nLxFemale = nLxFemale,
+ # nLxDatesIn = nLxDatesIn,
+ # AsfrMat = asfrmat,
+ # AsfrDatesIn = AsfrDatesIn
+ # )
+ #
+ # pop_female_counts[1:10]
+ # bpa_female[1:10]
+ # bpe_female[1:10]
+
+ }
+
+
If you shift the groupings, then the first age groups may have a negative lower bound
-(for example of -5). These counts would be discarded for the osculatory version of Sprague smoothing,
+(for example of -5). These counts would be discarded for the oscillatory version of Sprague smoothing,
for example, but they are preserved in this function. The important thing to know is that if you shift
the groups, the first and last groups won't be N years wide. For example if shiftdown is 1,
the first age group is 4-ages wide.
Matches the (single) ages of a census to single cohorts. For use in intercensal interpolations. Ages are potentially blended to match single cohort line assuming that the population in each age is uniformly distributed over the age group.
+
+
+
census_cohort_adjust(pop, age, date)
+
+
Arguments
+
+
+
+
pop
+
numeric vector. Population counts in age groups, presumably from a census with an exact reference date.
+
+
+
age
+
integer vector. Lower bound of single age groups
+
+
+
date
+
Either a Date class object or an unambiguous character string in the format "YYYY-MM-DD".
ageMax is an inclusive upper bound, treated as interval. If you want ages
-23 to 77, then give ageMin = 23 and ageMax = 77, not 80. The ageMin is respected strictly, whereas ageMax could be higher than the actual maximum age used. You can see the age ranges actually used by specifying details = TRUE.
+23 to 77, then give ageMin = 23 and ageMax = 77. The ageMin is respected strictly, whereas ageMax is calculated flexibly- if you specify something too high then it is reduced and we warn accordingly, and if it's missing then we pick something reasonable. You can see the age ranges actually used by specifying details = TRUE.
References
United States Census Bureau (2017).
“Population Analysis System (PAS) Software.”
-https://www.census.gov/data/software/pas.html.
+https://www.census.gov/data/software/pas.html, https://www.census.gov/data/software/pas.html.
Bachi R (1951).
“The tendency to round off age returns: measurement and correction.”
diff --git a/docs/reference/check_heaping_coale_li.html b/docs/reference/check_heaping_coale_li.html
index 4b4c61330..588851a29 100644
--- a/docs/reference/check_heaping_coale_li.html
+++ b/docs/reference/check_heaping_coale_li.html
@@ -84,7 +84,7 @@
DemoTools
- 01.13.20
+ 01.13.76
The index looks down two ages and up two ages, so the data must accommodate that range. The denominator is a mean of the counts in the sourrounding 5 single ages. The kind of mean can be controlled with the pow argument. By default, this takes the antilog of the arithmetic mean of the natural log of the five denominator counts. That will fail if one of the counts is equal to 0. In such cases, another power, such as 2 or 10 or 100 may be used, which is more robust to 0s. The higher the power, the closer the result will resemble the default output. If pow=="exp" but a 0 is detected among the denominator ages, then pow is assigned a value of 1000. pow=1 would imply an arithmetic mean in the denominator.
+
The index looks down two ages and up two ages, so the data must accommodate that range. The denominator is a mean of the counts in the surrounding 5 single ages. The kind of mean can be controlled with the pow argument. By default, this takes the antilog of the arithmetic mean of the natural log of the five denominator counts. That will fail if one of the counts is equal to 0. In such cases, another power, such as 2 or 10 or 100 may be used, which is more robust to 0s. The higher the power, the closer the result will resemble the default output. If pow=="exp" but a 0 is detected among the denominator ages, then pow is assigned a value of 1000. pow=1 would imply an arithmetic mean in the denominator.
For a given age-structured vector of counts, how rough is data after grouping to 5-year age bins? Data may require smoothing even if there is no detectable sawtooth pattern. It is best to use the value in this method together with visual evidence to gauage whether use of smooth_age_5() is recommended.
+
For a given age-structured vector of counts, how rough is data after grouping to 5-year age bins? Data may require smoothing even if there is no detectable sawtooth pattern. It is best to use the value in this method together with visual evidence to gauge whether use of smooth_age_5() is recommended.
check_heaping_roughness(
@@ -186,7 +186,7 @@
Arg
ageMax
-
integer evently divisibly by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.
+
integer evenly divisible by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.
Detect if heaping is worse on terminal digits 0s than on 5s
-
Ages ending in 0 often have higher apparent heaping than ages ending in 5. In this case, data in 5-year age bins might show a sawtooth pattern. If heaping ocurrs in roughly the same amount on 0s and 5s, then it may be sufficient to group data into 5-year age groups and then graduate back to single ages. However, if heaping is worse on 0s, then this procedure tends to produce a wavy pattern in count data, with 10-year periodicity. In this case it is recommended to use one of the methods of smooth_age_5() as an intermediate step before graduation.
+
Ages ending in 0 often have higher apparent heaping than ages ending in 5. In this case, data in 5-year age bins might show a sawtooth pattern. If heaping occurs in roughly the same amount on 0s and 5s, then it may be sufficient to group data into 5-year age groups and then graduate back to single ages. However, if heaping is worse on 0s, then this procedure tends to produce a wavy pattern in count data, with 10-year periodicity. In this case it is recommended to use one of the methods of smooth_age_5() as an intermediate step before graduation.
check_heaping_sawtooth(
@@ -186,7 +186,7 @@
Arg
ageMax
-
integer evently divisibly by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.
+
integer evenly divisible by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.
Numeric expression of the date, year plus the fraction of the year passed as of the date.
Details
-
This makes use of the lubridate::decimal_date to compute the proportion of the year that has passed. If the date is numeric, it is returned as such. If it is "character", we try to coerce to date through lubridate::ymd, ergo, it is best to specify a character string in an unambiguous "YYYY-MM-DD" format. If date is given in a "Date" class it is dealt with accordingly.
+
This makes use of the lubridate::decimal_date to compute the proportion of the year that has passed. If the date is numeric, it is returned as such. If it is "character", we try to coerce to date through lubridate::ymd, ergo, it is best to specify a character string in an unambiguous "YYYY-MM-DD" format. If date is given in a "Date" class it is dealt with accordingly.
We extract ASFRx from wpp2019, interpolated to exact dates. Different methods available.
+A vector of countries can handle, but with an unique sex. Row names are not indicative of countries.
character. Could be "linear", "exponential", or "power"
+
+
+
+
Value
+
+
numeric matrix interpolated asfr
+
+
Examples
+
# Total fertility ratio calculated from ASFRx downloaded from WPP19.
+# See `downloadnLx` for analogous examples on multiple countries or using codes instead of names.
+ASFR_Arg<-downloadAsfr(Asfrmat =NULL, location ="Argentina", AsfrDatesIn =1950:2025)
+
We extract Lx from wpp2019, interpolated to exact dates. Different methods available.
+A vector of countries can handle, but with an unique sex. Row names are not indicative of countries.
numeric. either NULL or a numeric vector of lifetable exposure. If it's the second then we just pass it back.
+
+
+
location
+
vector. UN Pop Div LocName or LocID
+
+
+
gender
+
character. "male", "female", or "both"
+
+
+
nLxDatesIn
+
numeric. Vector of three decimal dates produced by (or passed through) basepop_five()
+
+
+
method
+
character. Could be "linear", "exponential", or "power"
+
+
+
+
Value
+
+
numeric matrix of nLx with length(nLxDatesIn) and abridged ages in rows.
+
+
Examples
+
# life expectancy calculated from Lx downloaded from WPP19. Using names or codes.
+Lxs_name<-downloadnLx(nLx=NULL, location ="Argentina",
+ gender ="both", nLxDatesIn =1950:2030)
+
Function that determines the case/problem we have to solve
+It also performs some checks
+
+
+
find_my_case(par_ind)
+
+
Arguments
+
+
+
+
par_ind
+
logical vector of length 5
+
+
+
+
Details
+
+
par_ind should consist in logicals in the following order: q0_5, q0_1, q15_45, q15_35, e0. This is faithfully constructed in calling functions as required.
Beers may either be ordinary "beers(ord)" or modified "beers(mod)", and either can pass on the optional argument johnson = TRUE if desired (this has a different distribution pattern for young ages, FALSE by default). If method = "beers" is given, then "beers(ord)" is used.
This wrapper standardizes some inconsistencies in how open ages are dealt with. For example, with the "pclm" method, the last age group can be redistributed over a specified interval implied by increase OAnew beyond the range of Age. To get this same behavior from "mono", or "uniform" specify OAG = FALSE along with an appropriately high OAnew (or integer final value of AgeInt.
OAnew cannot be higher than max(Age)+4 for "sprague" or "beers" methods. For "uniform","mono","pclm" it can be higher than this, and in each case the open age group is completely redistributed within this range, meaning it's not really open anymore.
-
For all methods, negative values are detected in output. If present, we deal with these in the following way: we take the geometric mean between the given output (with negative imputed with 0s) and the output of graduate_mono(), which is guaranteed non-negative. This only affects age groups where negatives were produced in the first pass. In our experience this only arises when using sprague, beers, or grabill methods, whereas all others are guarateed non-negative.
+
For all methods, negative values are detected in output. If present, we deal with these in the following way: we take the geometric mean between the given output (with negative imputed with 0s) and the output of graduate_mono(), which is guaranteed non-negative. This only affects age groups where negatives were produced in the first pass. In our experience this only arises when using Sprague, Beers, or Grabill methods, whereas all others are guaranteed non-negative.
For any case where input data are in single ages, constraining results to sum to values in the original age groups will simply return the original input data, which is clearly not your intent. This might arise when using graduation as an implicit two-step smoother (group + graduate). In this case, separate the steps, first group using groupAges() then use graduate(..., constrain = TRUE).
# pclm can also graduate rates if both
+#> 4.120705 2.953872 2.110491 1.504532 1.071260
# pclm can also graduate rates if both# numerators and denominators are on hand:Exposures<-c(100958,466275,624134,559559,446736,370653,301862,249409,
247473,223014,172260,149338,127242,105715,79614,53660,
diff --git a/docs/reference/graduate_beers.html b/docs/reference/graduate_beers.html
index ba07b1576..2e668b811 100644
--- a/docs/reference/graduate_beers.html
+++ b/docs/reference/graduate_beers.html
@@ -80,7 +80,7 @@
DemoTools
- 01.13.20
+ 01.13.76
Ages should refer to lower age bounds. Value must be labelled with ages unless Age is given separately. There must be at least six 5-year age groups (including the open group, 5 otherwise). If you want the johnson adjustment then Value must contain a single-year estimate of the population count in age 0. That means Value must come either as standard abridged or single age data.
-
If the highest age does not end in a 0 or 5, and OAG == TRUE, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and OAG == FALSE, then results extend to single ages covering the entire 5-year age group.
+
Ages should refer to lower age bounds. Value must be labeled with ages unless Age is given separately. There must be at least six 5-year age groups (including the open group, 5 otherwise). If you want the johnson adjustment then Value must contain a single-year estimate of the population count in age 0. That means Value must come either as standard abridged or single age data.
+
method option "ord" conserves sums in 5-year age groups, whereas "mod" does some smoothing between 5-year age groups too, and is not constrained.
+
If the highest age does not end in a 0 or 5, and OAG == TRUE, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and OAG = FALSE, then results extend to single ages covering the entire 5-year age group.
References
Beers HS (1945).
@@ -231,10 +232,10 @@
Examp
#> 25 30386 30605 30978 31439 31940
p1<-graduate_beers(p5[,1], Age =a5, OAG =FALSE)head(p1)
Sprague estimated single-age population counts for the first and final ten ages. Open age groups are preserved, as are annual totals.
-
graduate_grabill(Value, Age, OAG =TRUE)
+
graduate_grabill(Value, Age, AgeInt, OAG =TRUE)
Arguments
@@ -177,6 +177,10 @@
Arg
Age
integer vector, lower bounds of age groups
+
+
AgeInt
+
integer vector, age interval widths
+
OAG
logical, default = TRUE is the final age group open?
@@ -188,7 +192,7 @@
Value
numeric vector in single ages.
Details
-
Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as a single-column matrix. Data may be given in either single or grouped ages. If the highest age does not end in a 0 or 5, and OAG == TRUE, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and OAG == FALSE, then results extend to single ages covering the entire 5-year age group.
+
Dimension labeling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as a single-column matrix. Data may be given in either single or grouped ages. If the highest age does not end in a 0 or 5, and OAG == TRUE, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and OAG == FALSE, then results extend to single ages covering the entire 5-year age group.
Take the cumulative sum of Value and then run a monotonic spline through it. The first differences split back single-age estimates of Value. Optionally keep the open age group untouched.
-
graduate_mono(Value, AgeInt, Age, OAG =TRUE)
+
graduate_mono(Value, Age, AgeInt, OAG =TRUE)
Arguments
@@ -171,14 +171,14 @@
Arg
Value
numeric vector, presumably counts in grouped ages
-
-
AgeInt
-
integer vector, age interval widths
-
Age
integer vector, lower bounds of age groups
+
+
AgeInt
+
integer vector, age interval widths
+
OAG
logical, default = TRUE is the final age group open?
@@ -190,7 +190,7 @@
Value
Numeric. vector of single smoothed age counts.
Details
-
The "monoH.FC" method of stats::splinefun() is used to fit the spline because 1) it passes exactly through the points, 2) it is monotonic and therefore guarantees positive counts, and 3) it seems to be a bit less wiggly (lower average first differences of split counts). Single-age data is returned as-is. If you want to use this function as a smoother you first need to group to non-single ages.
+
The "hyman" method of stats::splinefun() is used to fit the spline because 1) it passes exactly through the points, 2) it is monotonic and therefore guarantees positive counts, and 3) it seems to be a bit less wiggly (lower average first differences of split counts). Single-age data is returned as-is. If you want to use this function as a smoother you first need to group to non-single ages. Age be any age range, it does not need to start at 0.
References
Fritsch FN, Carlson RE (1980).
@@ -204,69 +204,80 @@
Examp
"10", "15", "20", "25", "30", "35", "40", "45", "50", "55", "60",
"65", "70", "75", "80", "85", "90", "95", "100"))
-# overwrite open age group with a single age estimate for that age
-# (doesn't extrapolate)
-graduate_mono(Value)
-
# or respect open age group
+# if the last age group is closed, then it's best to use AgeInt, otherwise,
+# one is assumed from the age siphoned from the names attribute of Value.
+graduate_mono(Value, OAG =FALSE)
+
The pivotAge must be at least 10 years below the maximum age detected from
-rownames(popmat), but not lower than 75. In the exact pivotAge, we may either take the Sprague estimates or the spline estimates, depending on which is larger, then the single-age estimates for this 5-year age group are rescaled to sum to the original total in Value. Higher ages are taken from the spline-based age splits. The spline results are derive from the "monoH.FC" method of splinefun() on the cumulative sum of the original age grouped data. One could use this function to perform the same closeout to Grabill estimates, if these are given via the pops argument. See examples. Note that the Grabill split method mixed with this closeout will not necessarily preserve the annual totals, and this function performs to rescaling. The open age group is preserved (and must be included in Value).
+rownames(popmat), but not lower than 75. In the exact pivotAge, we may either take the Sprague estimates or the spline estimates, depending on which is larger, then the single-age estimates for this 5-year age group are rescaled to sum to the original total in Value. Higher ages are taken from the spline-based age splits. The spline results are derive from the "hyman" method of splinefun() on the cumulative sum of the original age grouped data. One could use this function to perform the same closeout to Grabill estimates, if these are given via the pops argument. See examples. Note that the Grabill split method mixed with this closeout will not necessarily preserve the annual totals, and this function performs to rescaling. The open age group is preserved (and must be included in Value).
# giving a different single-age split to close out this way:popg<-graduate_grabill(Value =popvec, Age =a5, OAG =TRUE)grabill.closed.out<-graduate_mono_closeout(Value =popvec, Age =a5, pops =popg)# totals not necessarily preserved if mixed w Grabill
@@ -258,7 +262,7 @@
Examp
# one may wish to instead rescale results colSums() of# popg at age pivotAge and higher.sum(grabill.closed.out)-sum(popvec)
-
#> [1] -22.01899
# also works on an age-labelled vector of data
+
#> [1] -22.01899
# also works on an age-labeled vector of dataclosed.vec<-graduate_mono_closeout(popvec, Age =a5, OAG =TRUE)# let's compare this one with sprague()simple.vec<-graduate_sprague(popvec, Age =a5, OAG =TRUE)
diff --git a/docs/reference/graduate_pclm.html b/docs/reference/graduate_pclm.html
index bd1561314..60994af99 100644
--- a/docs/reference/graduate_pclm.html
+++ b/docs/reference/graduate_pclm.html
@@ -80,7 +80,7 @@
DemoTools
- 01.13.20
+ 01.13.76
The PCLM method can also be used to graduate rates using an offset if both numerators and denominators are available. In this case Value is the event count and offset is person years of exposure. The denominator must match the length of Value or else the length of the final single age result length(min(Age):OAnew). This method can be used to redistribute counts in the open age group if OAnew gives sufficient space. Likewise, it can give a rate extrapolation beyond the open age.
+
If there are 0s in Value, these are replaced with a small value prior to fitting. If negatives result from the pclm fit, we retry after multiplying Value by 10, 100, or 1000, as sometimes a temporary rescale for fitting can help performance.
+
Age be any age range, it does not need to start at 0.
This method is used to interpolate counts based on the Sprague formula. It is based on the first stage of the Sprague R script prepared by Thomas Buettner and Patrick Gerland, itself based on the description in Siegel and Swanson, 2004, p. 727.
-
graduate_sprague(Value, Age, OAG =TRUE)
+
graduate_sprague(Value, Age, AgeInt, OAG =TRUE)
Arguments
@@ -175,6 +175,10 @@
Arg
Age
integer vector, lower bounds of age groups
+
+
AgeInt
+
integer vector, age interval widths
+
OAG
logical, default = TRUE is the final age group open?
@@ -186,7 +190,7 @@
Value
Numeric vector of counts split into single ages.
Details
-
Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age). Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as or coercible to a single-column matrix. This method may produce negative values, most likely in the youngest or oldest ages. This case is dealt with in the graduate() wrapper function but not in this function.
+
Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age). Dimension labeling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as or coercible to a single-column matrix. This method may produce negative values, most likely in the youngest or oldest ages. This case is dealt with in the graduate() wrapper function but not in this function.
If the highest age does not end in a 0 or 5, and OAG == TRUE, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and OAG == FALSE, then results extend to single ages covering the entire 5-year age group.
logical, default = TRUE is the final age group open?
@@ -194,7 +194,7 @@
Value
Numeric vector of counts for single year age groups.
Details
-
Assumes that the population is uniformly distributed across each age interval, and that initial age intervals are integers greater than or equal to 1. If AgeInt is given, its final value is used as the interval for the final age group. If AgeInt is missing, then Age must be given, and the open age group is by default preserved OAvalue rather than split. To instead split the final age group into, e.g., a 5-year age class, either give AgeInt, or give Age, OAG = TRUE, and OAvalue = 5.
+
Assumes that the population is uniformly distributed across each age interval, and that initial age intervals are integers greater than or equal to 1. If AgeInt is given, its final value is used as the interval for the final age group. If AgeInt is missing, then Age must be given, and the open age group is by default preserved OAvalue rather than split. To instead split the final age group into, e.g., a 5-year age class, either give AgeInt, or give Age, OAG = TRUE, and OAvalue = 5. Age be any age range, it does not need to start at 0.
If you shift the groupings, then the first age groups may have a negative lower bound
-(for example of -5). These counts would be discarded for the osculatory version of Sprague smoothing,
+(for example of -5). These counts would be discarded for the oscillatory version of Sprague smoothing,
for example, but they are preserved in this function. The important thing to know is that if you shift
the groups, the first and last groups will not be N years wide. For example if shiftdown is 1, the first age group is 4-ages wide. The ages themselves are not returned,
but they are the name attribute of the output count vector. Note this will also correctly group abridged ages
diff --git a/docs/reference/groupOAG.html b/docs/reference/groupOAG.html
index e62f9a40b..a4bea3fd8 100644
--- a/docs/reference/groupOAG.html
+++ b/docs/reference/groupOAG.html
@@ -80,7 +80,7 @@
DemoTools
- 01.13.20
+ 01.13.76
@@ -88,7 +88,7 @@
numeric vector. The first (left) census in single age groups
+
+
+
c2
+
numeric vector. The second (right) census in single age groups
+
+
+
date1
+
reference date of c1`. Either a Date class object or an unambiguous character string in the format "YYYY-MM-DD".
+
+
+
date2
+
reference date of c2`. Either a Date class object or an unambiguous character string in the format "YYYY-MM-DD".
+
+
+
age1
+
integer vector. single ages of c1
+
+
+
age2
+
integer vector. single ages of c2
+
+
+
dates_out
+
vector of desired output dates coercible to numeric using dec.date()
+
+
+
lxMat
+
numeric matrix containing lifetable survivorship, l(x). Each row is an age group and each column a time point. At least two intercensal time points needed.
+
+
+
age_lx
+
integer vector. Age classes in lxMat
+
+
+
dates_lx
+
date, character, or numeric vector of the column time points for lxMat. If these are calendar-year estimates, then you can choose mid-year time points
+
+
+
births
+
integer vector. Raw birth counts for the corresponding (sub)-population, one value per each year of the intercensal period including both census years. The first and last years should include all births in the given year; don't discount them in advance.
+
+
+
years_births
+
numeric vector of calendar years of births.
+
+
+
location
+
UN Pop Division LocName or LocID
+
+
+
sex
+
character string, either "male", "female", or "both"
+
+
+
midyear
+
logical. FALSE means all Jan 1 dates between date1 and date2 are returned. TRUE means all July 1 intercensal dates are returned.
+
+
+
verbose
+
logical. Shall we send informative messages to the console?
+
+
+
...
+
optional arguments passed to
+
+
+
+
Details
+
+
The basic approach is to i) align the censuses to single-year cohorts by blending adjacent ages assuming that the birthdays in each age group are uniformly distributed through the year ii) decrement the first census forward within cohorts using period-cohort survival probabilities calculated from (supplied or downloaded) l(x) values, iii) redistribute the residual at the time of the second census uniformly over time within cohorts. These steps are always done on Jan 1 reference dates. If midyear = TRUE, then we do within-age band arithmetic interpolation to July 1 reference dates.
Cohorts between two censuses are interpolated flexibly using linear, exponential, or power rules. The lower and upper intercensal triangles are filled using within-age interpolation. This function is experimental and still in development.
Given a data frame with dates, sex and mortality data by age (rates, conditioned probabilities of death
+or survival function), this function interpolate/extrapolate life tables
+using the method for limited data suggested by Li et. al (2004) (at least three observed years).
numeric. Vector of decimal years to interpolate or extrapolate.
+
+
+
Single
+
logical. Whether or not the lifetable output is by single ages.
+
+
+
dates_e0
+
numeric. Vector of decimal years where "e_0" should be fitted when apply method.
+
+
+
e0_Males
+
numeric. Vector of life expectancy by year to be fitted. Same length than "dates_e0".
+
+
+
e0_Females
+
numeric. Vector of life expectancy by year to be fitted. Same length than "dates_e0".
+
+
+
prev_divergence
+
logical. Whether or not prevent divergence and sex crossover. Default FALSE.
+
+
+
OAG
+
logical. Whether or not the last element of nMx (or nqx or lx) is an open age group. Default TRUE.
+
+
+
verbose
+
logical. Default FALSE.
+
+
+
SVD
+
logical. Use Singular Value Decomposition for estimate b and k or Maximum Likelihood Estimation. Default FALSE for Maximum Likelihood Estimation.
+
+
+
...
+
Other arguments to be passed on to the lt_abridged function.
+
+
+
+
Value
+
+
List with:
+
Interpolated/extrapolated lifetables in a data.frame with columns:
+
Date numeric. Dates included in dates_out,
+
Sex character. Male "m" or female "f",
+
Age integer. Lower bound of abridged age class,
+
`AgeInt`` integer. Age class widths.
+
nMx numeric. Age-specific central death rates.
+
nAx numeric. Average time spent in interval by those deceased in interval.
+
nqx numeric. Age-specific conditional death probabilities.
+
lx numeric. Lifetable survivorship
+
ndx numeric. Lifetable deaths distribution.
+
nLx numeric. Lifetable exposure.
+
Sx numeric. Survivor ratios in uniform 5-year age groups.
+
Tx numeric. Lifetable total years left to live above age x.
+
ex numeric. Age-specific remaining life expectancy.
+
List with estimated Lee-Carter parameters for each sex:
+
kt numeric time vector. Time trend in mortality level.
+
ax numeric age vector. Average time of log(m_{x,t}).
+
bx numeric age vector. Pattern of change in response to kt.
+
+
+
+
Details
+
+
Based on spreadsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
+Useful for abridged or single ages, and allows output in both formats also.
+One option is the use of non-divergent method for sex coherency (Li & Lee, 2005).
+The other is the possibility of fitting "k" to replicate "e_0" at some given dates.
+
Note
+
+
Draft Version
+
References
+
+
Li N, Lee R (2005).
+“Coherent mortality forecasts for a group of populations: An extension of the Lee-Carter method.”
+Demography, 42(3), 575.
+doi: 10.1353/dem.2005.0021
+.
+
+Li N, Lee R, Tuljapurkar S (2004).
+“Using the Lee-Carter Method to Forecast Mortality for Populations with Limited Data\(\ast\).”
+Int. Stat. Rev., 72(1), 19--36.
+ISSN 0306-7734, doi: 10.1111/j.1751-5823.2004.tb00221.x
+.
numeric. Matrix with many rows as ages and columns as dates_in.
+
+
+
dates_in
+
numeric. Vector of dates with input rates.
+
+
+
dates_out
+
numeric. Vector of dates for estimate a set of rates.
+
+
+
SVD
+
logical. Use Singular Value Decomposition for estimate b and k or Maximum Likelihood Estimation. Default FALSE for Maximum Likelihood Estimation.
+
+
+
+
Details
+
+
SVD for ax and bx. Fit a simple linear model for k and interpolate/extrapolate for objective dates.
+
References
+
+
Li N, Lee R, Tuljapurkar S (2004).
+“Using the Lee-Carter Method to Forecast Mortality for Populations with Limited Data\(\ast\).”
+Int. Stat. Rev., 72(1), 19--36.
+ISSN 0306-7734, doi: 10.1111/j.1751-5823.2004.tb00221.x
+.
Given a data frame with groups (country, region, sex), dates, sex and mortality data by age (rates, conditioned probabilities of death
+or survival function), this function interpolate/extrapolate life tables
+using the method for limited data suggested by Li et. al (2004) (at least three observed years).
data.frame. Columns: id, Date, Sex, Age, nMx (opt), nqx (opt), lx (opt).
+The first column (id) can be a numeric index or character vector identifying each group.
+
+
+
dates_out
+
numeric. Vector of decimal years to interpolate or extrapolate.
+
+
+
Single
+
logical. Whether or not the lifetable output is by single ages.
+
+
+
input_e0
+
data.frame with cols: id, Date, Sex and "e_0". This should be fitted when apply method.
+
+
+
prev_divergence
+
logical. Whether or not prevent divergence and sex crossover between groups. Default FALSE.
+
+
+
weights
+
list. For prev_divergence option. A double for each element of a list with names as id columns. Should sum up to 1. Default: same weight for each group.
+
+
+
OAG
+
logical. Whether or not the last element of nMx (or nqx or lx) is an open age group. Default TRUE.
+
+
+
verbose
+
logical. Default FALSE.
+
+
+
SVD
+
logical. Use Singular Value Decomposition for estimate b and k or Maximum Likelihood Estimation. Default FALSE for Maximum Likelihood Estimation.
+
+
+
...
+
Other arguments to be passed on to the lt_abridged function.
+
+
+
+
Value
+
+
List with:
+
Lifetable in a data.frame with columns:
+
Date numeric. Dates included in dates_out,
+
Sex character. Male "m" or female "f",
+
Age integer. Lower bound of abridged age class,
+
`AgeInt`` integer. Age class widths.
+
nMx numeric. Age-specific central death rates.
+
nAx numeric. Average time spent in interval by those deceased in interval.
+
nqx numeric. Age-specific conditional death probabilities.
+
lx numeric. Lifetable survivorship
+
ndx numeric. Lifetable deaths distribution.
+
nLx numeric. Lifetable exposure.
+
Sx numeric. Survivor ratios in uniform 5-year age groups.
+
Tx numeric. Lifetable total years left to live above age x.
+
ex numeric. Age-specific remaining life expectancy.
+
List with parameters estimated for each group:
+
kt numeric time vector. Time trend in mortality level.
+
ax numeric age vector. Average time of log(m_{x,t}).
+
bx numeric age vector. Pattern of change in response to kt.
+
+
+
+
Details
+
+
Based on spreadsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
+Useful for abridged or single ages, and allows output in both formats also.
+One option is the use of non-divergent method for sex coherency (Li & Lee, 2005).
+The other is the possibility of fitting "k" to replicate "e_0" at some given dates.
+id column in input argument works for separate between groups. In case only one population/sex is given,
+is recommended to give some group name to id, if not the function will try to infer the case.
+
Note
+
+
Draft Version
+
References
+
+
Li N, Lee R (2005).
+“Coherent mortality forecasts for a group of populations: An extension of the Lee-Carter method.”
+Demography, 42(3), 575.
+doi: 10.1353/dem.2005.0021
+.
+
+Li N, Lee R, Tuljapurkar S (2004).
+“Using the Lee-Carter Method to Forecast Mortality for Populations with Limited Data\(\ast\).”
+Int. Stat. Rev., 72(1), 19--36.
+ISSN 0306-7734, doi: 10.1111/j.1751-5823.2004.tb00221.x
+.
numeric. Vector (same length of age) of parameters from LC model.
+
+
+
bx
+
numeric. Vector (same length of age) of parameters from LC model.
+
+
+
age
+
numeric.
+
+
+
sex
+
numeric.
+
+
+
e0_target
+
numeric.
+
+
+
...
+
Other arguments to be passed on to the lt_abridged function.
+
+
+
+
Details
+
+
Given LC parameters at some date, change a bit k for replicate already know e_0 values.
+This is useful to give some sort of flexibility, and not follow strictly linear model implied in LC model,
+but taking advantage of estimated structure (ax) and change by age (bx) for some trustable period.
logical. TRUE if the arguments are considered consistent.
Details
-
If OAG is TRUE then AgeInt must be coded as NA. If Age is not sorted then we sort both Age and AgeInt, assuming that they are in matched order. This isn't incoherence per se, but a message is returned to the console.
+
If OAG is TRUE then AgeInt must be coded as NA. If Age is not sorted then we sort both Age and AgeInt, assuming that they are in matched order. This isn't incoherence in itself, but a message is returned to the console.
the sex ratio at birth (boys / girls), detault 1.05
+
the sex ratio at birth (boys / girls), default 1.05
OAG
@@ -262,7 +262,7 @@
Arg
extrapLaw
character. If extrapolating, which parametric mortality law should be invoked? Options include
-"Kannisto", "Kannisto_Makeham", "Makeham", "Gompertz", "GGompertz", "Beard", "Beard_Makeham", "Quadratic". Default "Kannisto". See details.
+"Kannisto", "Kannisto_Makeham", "Makeham", "Gompertz", "GGompertz", "Beard", "Beard_Makeham", "Quadratic". Default "Kannisto" if the highest age is at least 90, otherwise "makeham". See details.
extrapFrom
@@ -312,7 +312,7 @@
Details
is aligned with the other columns in all 5-year age groups, but note the
first two values have a slightly different age-interval interpretation:
In Age 0, the interpretation is survival from birth until interval 0-4.
-In Age 1, it is survival from 0-4 into 5-9. Therafter the age groups align.
+In Age 1, it is survival from 0-4 into 5-9. Thereafter the age groups align.
This column is required for population projections.
Computes single year of age life table by graduating the mortality schedule of an abridged life table, using the ungroup::pclm() to ungroup binned count data. Returns complete single-age lifetable.
numeric. Infant mortality rate q0, in case available and nqx is not specified. Default NA.
+
+
+
mod
+
logical. If "un" specified for axmethod, whether or not to use Nan Li's modification for ages 5-14. Default TRUE.
+
+
+
SRB
+
the sex ratio at birth (boys / girls), default 1.05
+
+
+
OAG
+
logical. Whether or not the last element of nMx (or nqx or lx) is an open age group. Default TRUE.
+
+
+
OAnew
+
integer. Desired open age group (5-year ages only). Default max(Age). If higher then rates are extrapolated.
+
+
+
extrapLaw
+
character. If extrapolating, which parametric mortality law should be invoked? Options include
+"Kannisto", "Kannisto_Makeham", "Makeham", "Gompertz", "GGompertz", "Beard", "Beard_Makeham", "Quadratic". Default "Kannisto" if the highest age is at least 90, otherwise "makeham". See details.
+
+
+
extrapFrom
+
integer. Age from which to impute extrapolated mortality.
+
+
+
extrapFit
+
integer vector. Ages to include in model fitting. Defaults to all ages > =60.
+
+
+
...
+
optional arguments passed to pclm(). For example, if you pass an explicit lambda parameter via the control argument, you can speed up estimation
+
+
+
+
Value
+
+
Single-year lifetable in data.frame with columns
+
Ageinteger. Lower bound of single year age class,
+
AgeIntinteger. Age class widths.
+
nMxnumeric. Age-specific central death rates.
+
nAxnumeric. Average time spent in interval by those deceased in interval.
+
nqxnumeric. Age-specific conditional death probabilities.
+
lxnumeric. Lifetable survivorship
+
ndxnumeric. Lifetable deaths distribution.
+
nLxnumeric. Lifetable exposure.
+
Sxnumeric. Survivor ratios.
+
Txnumeric. Lifetable total years left to live above age x.
+
exnumeric. Age-specific remaining life expectancy.
This is a wrapper around the other lifetable utilities. We start with either nMx, nqx, or lx in single or abridged ages, and returns a full lifetable in either single or abridged ages. All optional arguments of lt_abridged() or lt_single*() can be passed in, for instance the nax assumptions or the extrapolation arguments.
+
+
+
lt_ambiguous(
+ nMx_or_nqx_or_lx =NULL,
+ type ="m",
+ Age =NULL,
+ Sex =NULL,
+ Single =FALSE,
+ ...
+)
+
+
Arguments
+
+
+
+
nMx_or_nqx_or_lx
+
numeric vector of either nMx, nqx, or lx
+
+
+
type
+
character, which variable is x?, either "m", "q", or "l". Default "m"
+
+
+
Age
+
integer vector of the lower age bounds of x
+
+
+
Sex
+
character, "m", "f", or "b".
+
+
+
Single
+
logical, do we want output in single ages?
+
+
+
...
+
optional arguments passed to lt_abridged() or lt_single*()
An extra lifetable column for use in projections, which require uniform time steps both both age and period. Intervals are either single age (N=1) or five-year ages (N=5). Input vectors are assumed to come from either single or standard abridged ages.
+
An extra lifetable column for use in projections, which require uniform time steps both both age and period. Intervals are either single age or five-year ages. Input vectors are assumed to come from either single or standard abridged ages. Note that the ages of the output Sx are the ages the population would be after the N-year projection.
Construct model life tables based on the Log-Quadratic (wilmoth) estimates
+
Construct model life tables based on the Log-Quadratic (Wilmoth) estimates
with various choices of 2 input parameters:
q0_5, q0_1, q15_45, q15_35 and e0. There are 8 possible
combinations (see examples below).
@@ -268,7 +268,7 @@
Arg
SRB
-
the sex ratio at birth (boys / girls), detault 1.05
+
the sex ratio at birth (boys / girls), default 1.05
@@ -286,7 +286,7 @@
Details
integers. For example 45q15 is represented as q45_15.
character. Either "ak" (Andreev-Kingkade) or "cd" (Coale-Demeny).
+
+
+
M0
+
numeric. Event exposure infant mortality rate.
+
+
+
q0
+
a value or vector of values of m0, the death risk in the first year of life.
+
+
+
Sex
+
character, either "m", "f", or "b"
+
+
+
IMR
+
numeric. Optional. q0, the death probability in first year of life, in case available separately.
+
+
+
region
+
character. "n", "e", "s" or "w" for North, East, South, or West.
+
+
+
SRB
+
the sex ratio at birth (boys / girls), default 1.05
+
+
+
+
Value
+
+
a0, the estimated average age at death of those dying in the first year of life, either a single value or a vector of values.
+
Details
+
+
If sex is given as both, "b", then we calculate the male and female results separately, then weight them together using SRB. This is bad in theory, but the leverage is trivial, and it's better than using male or female coefs for the total population.
+
Neither Coale-Demeny nor Andreev-Kingkade have explicit a0 rules for both-sexes combined. There's not a good way to arrive at a both-sex a0 estimate without increasing data requirements (you'd need data from each sex, which are not always available). It's more convenient to blend sex-specific a0 estimates based on something. Here we use SRB to do this, for no other reason than it has an easy well-known default value. This is bad because it assumes no sex differences in infant mortality, but this choice has a trivial impact on results.
These formulas and cutpoints are based on a supplementary analysis from Andreev & Kingkade. The original formulation was in terms of IMR. There is also an analytic path to convert M0 to q0 and then use the original q0 cutpoints. Code ported from HMD code base.
+
+
+
lt_rule_ak_m0_a0(M0, Sex)
+
+
Arguments
+
+
+
+
M0
+
a value or vector of values of m0, the death risk in the first year of life.
+
+
+
Sex
+
either "m" or "f"
+
+
+
+
Value
+
+
a0, the estimated average age at death of those dying in the first year of life, either a single value or a vector of values.
An object of class lt_rule_m_extrapolate with the following components:
input
List with arguments provided in input. Saved for convenience.
-
call
An unevaluated function call, that is, an unevaluated expressionwhich consists of the named function applied to the given arguments.
+
call
An unevaluated function call, that is, an unevaluated expression that consists of the named function applied to the given arguments.
fitted.model
An object of class MortalityLaw. Here one can find fitted values, residuals, goodness of fit measures etc.
values
A vector or matrix containing the complete mortality data, that is the modified input data following the extrapolation procedure.
+
Details
+
+
If fitting fails to converge, then we refit assuming Gompertz mortality with explicit starting parameters of parS = c(A = 0.005, B = 0.13) and a warning is issued.
Examp
mx_matrix<-matrix(rep(mx1, 3), ncol =3)%*%diag(c(1, 1.05, 1.1))dimnames(mx_matrix)<-list(age =x1, year =c("year1", "year2", "year3"))
-F1<-lt_rule_m_extrapolate(mx_matrix, x =x1, x_fit, x_extr, law ="kannisto")
-
#> Warning: the condition has length > 1 and only the first element will be used
F1
+# TR: temporary warning suppression until case handling is fixed
+# in MortalityLaws package
+F1<-suppressWarnings(lt_rule_m_extrapolate(mx_matrix, x =x1, x_fit, x_extr, law ="kannisto"))
+F1
Computes abridged life table columns based on the lx, nLx , and ex values from
+a single year life table, in accordance with step 2.2 of the Human Life Table Protocol
+https://www.lifetable.de/methodology.pdf. Output abridged life table has same open age group
+as input single age life table
+
+
+
lt_single2abridged(lx, nLx, ex, Age =1:length(lx)-1, ...)
+
+
Arguments
+
+
+
+
lx
+
numeric. Vector of lifetable survivorship at single ages.
+
+
+
nLx
+
numeric. Vector of lifetable exposure at single ages.
+
+
+
ex
+
numeric. Vector of Age-specific remaining life expectancy at single ages.
+
+
+
Age
+
integer. Lower bounds of single ages.
+
+
+
...
+
optional args, not currently used.
+
+
+
+
Value
+
+
Abridged lifetable in data.frame with columns
+
Ageinteger. Lower bound of abridged age class,
+
AgeIntinteger. Age class widths.
+
nMxnumeric. Age-specific central death rates.
+
nAxnumeric. Average time spent in interval by those deceased in interval.
+
nqxnumeric. Age-specific conditional death probabilities.
+
lxnumeric. Lifetable survivorship
+
ndxnumeric. Lifetable deaths distribution.
+
nLxnumeric. Lifetable exposure.
+
Sxnumeric. Survivor ratios in uniform 5-year age groups.
+
Txnumeric. Lifetable total years left to live above age x.
+
exnumeric. Age-specific remaining life expectancy.
the sex ratio at birth (boys / girls), detault 1.05
+
the sex ratio at birth (boys / girls), default 1.05
OAG
@@ -230,7 +230,7 @@
Arg
extrapLaw
character. If extrapolating, which parametric mortality law should be invoked? Options include
-"Kannisto", "Kannisto_Makeham", "Makeham", "Gompertz", "GGompertz", "Beard", "Beard_Makeham", "Quadratic". Default "Kannisto". See details.
+"Kannisto", "Kannisto_Makeham", "Makeham", "Gompertz", "GGompertz", "Beard", "Beard_Makeham", "Quadratic". Default "Kannisto" if the highest age is at least 90, otherwise "makeham". See details.
Computes lifetable columns from single age qx by first computing 1ax, then computing
+1mx from 1qx and 1ax, and finally passing the 1mx to the lt_single_mx() function
numeric. Infant mortality rate q0, in case available and nqx is not specified. Default NA.
+
+
+
mod
+
logical. If "un" specified for axmethod, whether or not to use Nan Li's modification for ages 5-14. Default TRUE.
+
+
+
SRB
+
the sex ratio at birth (boys / girls), default 1.05
+
+
+
OAG
+
logical. Whether or not the last element of nMx (or nqx or lx) is an open age group. Default TRUE.
+
+
+
OAnew
+
integer. Desired open age group (5-year ages only). Default max(Age). If higher then rates are extrapolated.
+
+
+
extrapLaw
+
character. If extrapolating, which parametric mortality law should be invoked? Options include
+"Kannisto", "Kannisto_Makeham", "Makeham", "Gompertz", "GGompertz", "Beard", "Beard_Makeham", "Quadratic". Default "Kannisto" if the highest age is at least 90, otherwise "makeham". See details.
+
+
+
extrapFrom
+
integer. Age from which to impute extrapolated mortality.
+
+
+
extrapFit
+
integer vector. Ages to include in model fitting. Defaults to all ages > =60.
+
+
+
...
+
Other arguments to be passed on to the
+MortalityLaw function.
+
+
+
+
Value
+
+
Lifetable in data.frame with columns
+
Ageinteger. Lower bound of abridged age class,
+
AgeIntinteger. Age class widths.
+
nMxnumeric. Age-specific central death rates.
+
nAxnumeric. Average time spent in interval by those deceased in interval.
+
nqxnumeric. Age-specific conditional death probabilities.
+
lxnumeric. Lifetable survivorship
+
ndxnumeric. Lifetable deaths distribution.
+
nLxnumeric. Lifetable exposure.
+
Sxnumeric. Survivor ratios in uniform single-year age groups.
+
Txnumeric. Lifetable total years left to live above age x.
+
exnumeric. Age-specific remaining life expectancy.
Considering different mortality input for each sex/year data,
+smooth older ages with makeham or kannisto in case no law was specified,
+and return a data.frame with standard LT.
Calculate the moving average (mav) over 3 or 5 years.
This arithmetic smoothing technique aims to eliminate irregularities of the population pyramid by averaging values in a moving window of user-defined width.
-
mav(Value, Age, n =3, OAG =TRUE)
+
mav(Value, Age, n =3, OAG =TRUE, tails =FALSE)
Arguments
@@ -183,6 +183,12 @@
Arg
OAG
logical. Whether or not the top age group is open. Default TRUE.
+
+
tails
+
logical. If set to TRUE, smaller-n moving averages are applied on both tails
+such that all values are non-NA. If FALSE (default), tails are set to NA
+due to the lag of moving averages.
+
Value
@@ -190,16 +196,13 @@
Value
Vector with the smoothed demographic counts.
Details
-
The moving window is applied symmetrically. Data endpoints are imputed with NAs in output: the is nothing under 0 or over the highest closed age group to average with. The open age group is imputed with NA prior to calculations, since it cannot be averaged into the next lowest group. For example, for n=3, age 0 will be NA, as will the open age group and next lowest age. Age intervals are assumed uniform. This function could be used with either single or 5-year age groups.
-
Ages may be single or grouped, but all age intervals are assumed equal.
+
The moving window is applied symmetrically. By default (tails = FALSE) data endpoints are imputed with NAs in output: the is nothing under 0 or over the highest closed age group to average with. The open age group is not used in averaging, and it is returned as-is. Age intervals are assumed uniform. This function could be used with either single or 5-year age groups.
+
If tails is set to TRUE, then tails have been imputed using moving averages with successively smaller values of n, the cascade method.
References
Roger G, Waltisperger D, Corbille-Guitton C (1981).
Les structures par sexe et âge en Afrique.
GDA, Paris, France.
This methods projects population from the first starting point to next census
+without migration and computes the "Net Census Error" (NCE) which is
+Census - Estimate by age from projection. It then distributes the NCE over
+the cohort parallelogram assuming uniform distribution assuming it is all
+migration. It finalizes by summing the estimate by age groups across the entire
+intercensal period to have a total migration during the entire period.
+Alternatively, a child adjustment and an old age adjustment can be applied.
numeric vector. The first (left) census in single age groups
+
+
+
c2
+
numeric vector. The second (right) census in single age groups
+
+
+
date1
+
reference date of c1`. Either a Date class object or an unambiguous character string in the format "YYYY-MM-DD".
+
+
+
date2
+
reference date of c2`. Either a Date class object or an unambiguous character string in the format "YYYY-MM-DD".
+
+
+
age1
+
integer vector. single ages of c1
+
+
+
age2
+
integer vector. single ages of c2
+
+
+
dates_out
+
vector of desired output dates coercible to numeric using dec.date()
+
+
+
lxMat
+
numeric matrix containing lifetable survivorship, l(x). Each row is an age group and each column a time point. At least two intercensal time points needed.
+
+
+
age_lx
+
integer vector. Age classes in lxMat
+
+
+
dates_lx
+
date, character, or numeric vector of the column time points for lxMat. If these are calendar-year estimates, then you can choose mid-year time points
+
+
+
births
+
integer vector. Raw birth counts for the corresponding (sub)-population, one value per each year of the intercensal period including both census years. The first and last years should include all births in the given year; don't discount them in advance.
+
+
+
years_births
+
numeric vector of calendar years of births.
+
+
+
location
+
country name or LocID
+
+
+
sex
+
character string, either "male", "female", or "both"
+
+
+
midyear
+
logical. FALSE means all Jan 1 dates between date1 and date2 are returned. TRUE means all July 1 intercensal dates are returned.
+
+
+
verbose
+
logical. Shall we send informative messages to the console?
+
+
+
child_adjust
+
The method with which to adjust the youngest age groups.
+If "none", no adjustment is applied (default). If
+child-woman ratio ("cwr") is chosen, the first cohorts reflecting the
+difference between date2 - date1 are adjusted (plus age 0). If
+child constant ratio ("constant") is chosen, the first 15 age groups
+are adjusted.
+
+
+
childage_max
+
The maximum age from which to apply child_adjust.
+By default, set to NULL, which gets translated into all the cohorts
+between date2 and date1. If date2 is 2010 and
+date1 is 2002, the first 8 cohorts are adjusted. Otherwise, the user
+can supply an integer.
+
+
+
cwr_factor
+
A numeric between 0 and 1 to which adjust the CWR method
+for the young ages from child_adjust. This is only used
+when child_adjust is "cwr".
+
+
+
oldage_adjust
+
The type of adjustment to apply to ages at and above
+oldage_min. 'beers' applies a beers graduation method
+while 'mav' applies a moving average with cascading on the tails.
+For more information see ?mav and ?graduation_beers.
+
+
+
oldage_min
+
The minimum age from which to apply oldage_adjust.
+By default, set to 65, so any adjustment from oldage_adjust will be
+applied for 65+.
+
+
+
...
+
optional arguments passed to lt_single_qx
+
+
+
+
Value
+
+
a numeric vector of the total migration in the intercensal period
+for each age. Ages are set as names of each migration estimate.
In the full 13 parameter model, the migration rate at age x, \(m(x)\) is defined as
-$$m(x) = a1*exp(-1*alpha1*x) + a2*exp(-1*alpha2*(x - mu2) - exp(-1*lambda2*(x - mu2))) + a3*exp(-1*alpha3*(x - 3) - exp(-1*lambda3*(x - mu3))) + a4*exp(lambda4*x) + c$$
The first, second, third and fourth pieces of the equation represent pre-working age, working age, retirement and post-retirement age patterns, respectively.
Models with less parameters gradually remove terms at the older ages. Parameters in each family are:
A numeric matrix with population counts. Rows should
+be ages and columns should be years. Only five year age groups are supported.
+See examples.
+
+
+
pop_f_mat
+
A numeric matrix with population counts. Rows should
+be ages and columns should be years. Only five year age groups are supported.
+See examples.
+
+
+
sr_m_mat
+
A numeric matrix with survival rates for males. Rows
+should be ages and columns should be years. ** This matrix should have
+one column less than pop_m_mat and pop_f_mat. For example,
+if the last year in these matrices is 2050, then the last year in
+sr_m_mat should be 2045. **
+
+
+
sr_f_mat
+
A numeric matrix with survival rates for females. Rows
+should be ages and columns should be years. ** This matrix should have
+one column less than pop_m_mat and pop_f_mat. For example,
+if the last year in these matrices is 2050, then the last year in
+sr_f_mat should be 2045. **.
+
+
+
asfr_mat
+
A numeric matrix with age specific fertility rates.
+Rows should be ages and columns should be years. ** This matrix should have
+one column less than pop_m_mat and pop_f_mat. For example,
+if the last year in these matrices is 2050, then the last year in
+asfr_mat should be 2045**. This row will usually have fewer age groups
+(rows) than in the population matrices or survival matrices, so the user
+needs to supply the specific ages in the ages_asfr argument.
+
+
+
srb_vec
+
A numeric vector of sex ratios at birth for every year.
+The years should be the same as the years in sr_m_mat,
+sr_f_mat, and asfr_mat.
+
+
+
ages
+
A numeric vector of ages used in the rows in
+pop_m_mat, pop_f_mat, sr_m_mat, sr_f_mat.
+
+
+
ages_asfr
+
A numeric vector of ages used in the rows in
+asfr_mat.
+
+
+
years_pop
+
Years used in the column names of population. If
+pop_m_mat or pop_f_mat doesn't have column names, these
+names are used. Otherwise ignored.
+
+
+
years_sr
+
Years used in the column names of survival rates. If
+sr_r_mat doesn't have column names, these names are used. Otherwise
+ignored.
+
+
+
years_asfr
+
Years used in the column names of age-specific fertility
+rate. If codeasfr_r_mat doesn't have column names, these names are used.
+Otherwise ignored.
+
+
+
years_srb
+
Years used in the column names of sex-ratio at birth. If
+srb_r_mat is not named, these names are used. Otherwise ignored.
+
+
+
verbose
+
Default set to TRUE. If TRUE, the function prints important
+operations carried out in the function like if years were trimmed from the
+data.
+
+
+
method
+
which residual migration method to use. This only works when
+using mig_resid and the possible options are 'stock', 'cohort' and
+'time', with 'stock' being the default.
+
+
+
+
Value
+
+
A list with two matrices. One is for males (called mig_m) and the
+other for females (called mig_f). Both matrices contain net migration
+estimates by age/period using one of the three methods.
+
Details
+
+
+
+
The stock method (mig_resid_stock) is the difference in stocks that
+survive between t and t+5, and the first age group is based on the difference
+with the surviving births by sex. It provides net migrants by lexis cohort
+parallelograms, and basically such info gets used as end-period migration
+since the migrants don't get exposed to mortality within the period.
+
The time even flow (mig_resid_time) method uses the result from
+the first option, but splits it back into lexis period squares and assumes
+that half of the net migrants get exposed to the mortality risk during this
+period. Such info can get used as evenly distributed migration by period,
+but the assumptions lead to zig-zag age patterns that are highly implausible.
+
The cohort even flow (mig_resid_cohort) method provides the most
+meaningful pattern of net migration by age consistent by cohort and assumes
+an evenly distribution within the 5-year period, and half of the migrants
+get exposed both fertility and mortality within this period.
+
+
+
mig_resid is a general function able to call the three methods only by
+specifying the method argument. By default it is set to the
+stock method. See the examples section.
+
+
Examples
+
+library(DemoTools)
+
+# The data is loaded with DemoTools
+
+################ Stock change method #####################
+
+# Generic mig_resid method which allows to choose either stock,
+# cohort or time method for five year ages groups
+
+mig_res<-
+ mig_resid(
+ pop_m_mat =pop_m_mat_five,
+ pop_f_mat =pop_f_mat_five,
+ sr_m_mat =sr_m_mat_five,
+ sr_f_mat =sr_f_mat_five,
+ asfr_mat =asfr_mat_five,
+ srb_vec =srb_vec_five,
+ ages =ages_five,
+ ages_asfr =ages_asfr_five,
+ # With the stock method
+ method ="stock"
+ )
+
+# Or directly the mid_resid_stock function
+# (works for both single and five year age groups)
+
+mig_res<-
+ mig_resid_stock(
+ pop_m_mat =pop_m_mat_five,
+ pop_f_mat =pop_f_mat_five,
+ sr_m_mat =sr_m_mat_five,
+ sr_f_mat =sr_f_mat_five,
+ asfr_mat =asfr_mat_five,
+ srb_vec =srb_vec_five,
+ ages =ages_five,
+ ages_asfr =ages_asfr_five
+ )
+
+# Net migration for males using stock change method
+mig_res$mig_m
+
+
+################ cohort even flow method #####################
+
+# We reuse the same data from before
+# Either use the generic mig_resid choosing 'cohort'
+
+# Five year age groups
+mig_res<-
+ mig_resid(
+ pop_m_mat =pop_m_mat_five,
+ pop_f_mat =pop_f_mat_five,
+ sr_m_mat =sr_m_mat_five,
+ sr_f_mat =sr_f_mat_five,
+ asfr_mat =asfr_mat_five,
+ srb_vec =srb_vec_five,
+ ages =ages_five,
+ ages_asfr =ages_asfr_five,
+ # With the cohort method
+ method ="cohort"
+ )
+
+################ time even flow method #####################
+
+# We reuse the same data from before
+# Either use the generic mig_resid with the 'time' method
+
+# For five year age groups
+mig_res<-
+ mig_resid(
+ pop_m_mat =pop_m_mat_five,
+ pop_f_mat =pop_f_mat_five,
+ sr_m_mat =sr_m_mat_five,
+ sr_f_mat =sr_f_mat_five,
+ asfr_mat =asfr_mat_five,
+ srb_vec =srb_vec_five,
+ ages =ages_five,
+ ages_asfr =ages_asfr_five,
+ # With the time method
+ method ="time"
+ )
+
+# Or directly the mid_resid_time function
+# (works for both five and single year ages)
+
+mig_res<-
+ mig_resid_time(
+ pop_m_mat =pop_m_mat_five,
+ pop_f_mat =pop_f_mat_five,
+ sr_m_mat =sr_m_mat_five,
+ sr_f_mat =sr_f_mat_five,
+ asfr_mat =asfr_mat_five,
+ srb_vec =srb_vec_five,
+ ages =ages_five,
+ ages_asfr =ages_asfr_five
+ )
+
+# Net migration for males using the time even flow method
+mig_res$mig_m
+
A numeric matrix with population counts. Rows should
+be ages and columns should be years. Only five year age groups are supported.
+See examples.
+
+
+
pop_f_mat
+
A numeric matrix with population counts. Rows should
+be ages and columns should be years. Only five year age groups are supported.
+See examples.
+
+
+
sr_m_mat
+
A numeric matrix with survival rates for males. Rows
+should be ages and columns should be years. ** This matrix should have
+one column less than pop_m_mat and pop_f_mat. For example,
+if the last year in these matrices is 2050, then the last year in
+sr_m_mat should be 2045. **
+
+
+
sr_f_mat
+
A numeric matrix with survival rates for females. Rows
+should be ages and columns should be years. ** This matrix should have
+one column less than pop_m_mat and pop_f_mat. For example,
+if the last year in these matrices is 2050, then the last year in
+sr_f_mat should be 2045. **.
+
+
+
asfr_mat
+
A numeric matrix with age specific fertility rates.
+Rows should be ages and columns should be years. ** This matrix should have
+one column less than pop_m_mat and pop_f_mat. For example,
+if the last year in these matrices is 2050, then the last year in
+asfr_mat should be 2045**. This row will usually have fewer age groups
+(rows) than in the population matrices or survival matrices, so the user
+needs to supply the specific ages in the ages_fertility argument.
+
+
+
srb_vec
+
A numeric vector of sex ratios at birth for every year.
+The years should be the same as the years in sr_m_mat,
+sr_f_mat, and asfr_mat.
+
+
+
ages
+
A numeric vector of ages used in the rows in
+pop_m_mat, pop_f_mat, sr_m_mat, sr_f_mat.
+
+
+
ages_fertility
+
A numeric vector of ages used in the rows in
+asfr_mat.
+
+
+
+
Value
+
+
A list with two matrices. One is for males (called mig_m) and the
+other for females (called mig_f). Both matrices contain net migration
+estimates by age/period using one of the three methods.
+
Details
+
+
+
+
The stock method (mig_resid_stock) is the difference in stocks that
+survive between t and t+5, and the first age group is based on the difference
+with the surviving births by sex. It provides net migrants by lexis cohort
+parallelograms, and basically such info gets used as end-period migration
+since the migrants don't get exposed to mortality within the period.
+
The time even flow (mig_resid_time) method uses the result from
+the first option, but splits it back into lexis period squares and assumes
+that half of the net migrants get exposed to the mortality risk during this
+period. Such info can get used as evenly distributed migration by period,
+but the assumptions lead to zig-zag age patterns that are highly implausible.
+
The cohort even flow (mig_resid_cohort) method provides the most
+meaningful pattern of net migration by age consistent by cohort and assumes
+an evenly distribution within the 5-year period, and half of the migrants
+get exposed both fertility and mortality within this period.
+
+################ cohort even flow method #####################
+
+# We reuse the same data from before
+
+mig_res<-
+ mig_resid_cohort(
+ pop_m_mat =pop_m_mat,
+ pop_f_mat =pop_f_mat,
+ sr_m_mat =sr_m_mat,
+ sr_f_mat =sr_f_mat,
+ asfr_mat =asfr_mat,
+ srb_vec =srb_vec,
+ ages =ages,
+ ages_fertility =ages_fertility
+ )
+
+# Net migration for males using the cohort even flow method
+mig_res$mig_m
+
+################ time even flow method #####################
+
+# We reuse the same data from before
+
+mig_res<-
+ mig_resid_time(
+ pop_m_mat =pop_m_mat,
+ pop_f_mat =pop_f_mat,
+ sr_m_mat =sr_m_mat,
+ sr_f_mat =sr_f_mat,
+ asfr_mat =asfr_mat,
+ srb_vec =srb_vec,
+ ages =ages,
+ ages_fertility =ages_fertility
+ )
+
+# Net migration for males using the time even flow method
+mig_res$mig_m
+
Given a total net migration,
+calculate the net migration age schedule based on the Rogers and Castro formula for UN families.
+
+
+
mig_un_fam(NM, family, Single =TRUE, OAnew =100)
+
+
Arguments
+
+
+
+
NM
+
numeric. Total net migration to distribute between ages and sex.
+
+
+
family
+
character. Could be "Family", "Female Labor", "Male Labor".
+
+
+
Single
+
logical. Results by simple age. Default FALSE.
+Typically from pre-working age and working age parts of in Roger-Castro formula.
+
+
+
OAnew
+
The age from which to group all ages into an open ended age group.
+By default it is set to 100, so it groups all ages up to 120, which is the
+maximum age.
+
+
+
+
Value
+
+
List with
+
params_RC data.frame. Roger-Castro parameters in a data.frame. Same as mig_un_params data.
+
net_migr data.frame. Net migrants by age and sex for the chosen family.
+
+
+
+
Examples
+
# 10000 net migrants, comparing two possible families
+nm1<-mig_un_fam(NM =10000, family ="Male Labor", OAnew =100)
+nm2<-mig_un_fam(NM =10000, family ="Family", OAnew =100)
+# See the female profile in for these models:
+if(FALSE){
+plot(nm1$net_migr$age[nm1$net_migr$sex=="Female"],
+ nm1$net_migr$nm[nm1$net_migr$sex=="Female"],
+ xlab="Age",ylab="nm",ylim=c(0,300))
+points(nm2$net_migr$age[nm2$net_migr$sex=="Female"],
+ nm2$net_migr$nm[nm2$net_migr$sex=="Female"], col=2)
+}
+
Matches the (single) ages of a census to single cohorts. For use in intercensal interpolations. Ages are potentially blended to match single cohort line assuming that the population in each age is uniformly distributed over the age group.
Smooth populations in 5-year age groups using various methods
Smooth population counts in 5-year age groups using the Carrier-Farrag,
-Karup-King-Newton, Arriaga, United Nations, Stong, or Zigzag methods. Allows for imputation
+Karup-King-Newton, Arriaga, United Nations, Strong, MAV or Zigzag methods. Allows for imputation
of values in the youngest and oldest age groups for the Carrier-Farrag, Karup-King-Newton,
and United Nations methods.
@@ -177,7 +177,7 @@
Smooth populations in 5-year age groups using various methods
character string. Options include "Carrier-Farrag","Arriaga","KKN","United Nations", "Strong", and "Zigzag". See details. Default "Carrier-Farrag".
+
character string. Options include "Carrier-Farrag","Arriaga","KKN","United Nations", "Strong", MAV and "Zigzag". See details. Default "Carrier-Farrag".
OAG
@@ -234,8 +234,8 @@
Details
operate based on 10-year age group totals, excluding the open age group.
The Carrier-Farrag, Karup-King-Newton, and United Nations methods do not produce estimates
for the first and final 10-year age groups. By default, these are imputed with the original 5-year age group totals, but
-you can also specify to impute with NA, or the results of the Arriaga or
-Strong methods. If the terminal digit of the open age group is 5, then the terminal 10-year
+you can also specify to impute with NA, or the results of the Arriaga,
+Strong and Cascade methods. If the terminal digit of the open age group is 5, then the terminal 10-year
age group shifts down, so imputations may affect more ages in this case. Imputation can follow
different methods for young and old ages.
Method names are simplified using simplify.text and checked against a set of plausible matches
diff --git a/docs/reference/smooth_age_5_arriaga.html b/docs/reference/smooth_age_5_arriaga.html
index 5628bd1f5..b8e892870 100644
--- a/docs/reference/smooth_age_5_arriaga.html
+++ b/docs/reference/smooth_age_5_arriaga.html
@@ -80,7 +80,7 @@
DemoTools
- 01.13.20
+ 01.13.76
@@ -88,7 +88,7 @@
Smooth in 5-year age groups using a moving average
Smooth data in 5-year age groups.
-
smooth_age_5_mav(Value, Age, OAG =TRUE, n =3)
+
smooth_age_5_mav(Value, Age, OAG =TRUE, n =3, tails =FALSE)
Arguments
@@ -183,6 +183,11 @@
Arg
n
integer. The width of the moving average. Default 3 intervals (x-5 to x+9).
+
+
tails
+
logical. If tails is FALSE, both tails are left untouched.
+Otherwise, the tails are filled out using a cascade method.
+
Value
@@ -191,7 +196,9 @@
Value
Details
This function calls smooth_age_5_zigzag_inner(), but prepares data in a way consistent with other methods called by smooth_age_5(). It is probably preferable to call zigzag() from the top level, or else call this method from agesmth() for more control over tail imputations.
-
This function calls mav(), which itself relies on the more general ma(). We lose the lowest and highest ages with this method, unless n=1, in which case data is returned in the original 5-year age groups. The total population count is not constrained to sum to the orignal total.
+
If tails is set to FALSE, this function calls mav(), which itself relies on the more general ma(). We lose the lowest and highest ages with this method, unless n=1, in which case data is returned in the original 5-year age groups. The total population count is not constrained to sum to the original total.
+
If tails is TRUE, the same results are expected but the tails are
+filled in using a cascading method.
-
+
github
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index a924cd1b1..7e8fb85a5 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -123,6 +123,9 @@
https://timriffe.github.io/DemoTools//reference/dec.date.html
+
+ https://timriffe.github.io/DemoTools//reference/downloadAsfr.html
+ https://timriffe.github.io/DemoTools//reference/downloadSRB.html
@@ -135,6 +138,9 @@
https://timriffe.github.io/DemoTools//reference/e0_swe.html
+
+ https://timriffe.github.io/DemoTools//reference/fetch_wpp_births.html
+ https://timriffe.github.io/DemoTools//reference/find_my_case.html
@@ -360,6 +366,9 @@
https://timriffe.github.io/DemoTools//reference/maxA2abridged.html
+
+ https://timriffe.github.io/DemoTools//reference/mig_beta.html
+ https://timriffe.github.io/DemoTools//reference/mig_calculate_rc.html
diff --git a/man/HMD_old_logquad.Rd b/man/HMD_old_logquad.Rd
new file mode 100644
index 000000000..dd6f75f87
--- /dev/null
+++ b/man/HMD_old_logquad.Rd
@@ -0,0 +1,83 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/log_quad_augm.R
+\name{HMD_old_logquad}
+\alias{HMD_old_logquad}
+\title{HMD pattern for adult ages.}
+\usage{
+HMD_old_logquad(
+ nMx,
+ Age = NULL,
+ Sex = "b",
+ q0_5 = NULL,
+ q15_45 = NULL,
+ q60_15 = NULL,
+ Age_transition = 80,
+ window_transition = 3,
+ plot_comparison = FALSE,
+ fitted_logquad = NULL,
+ ...
+)
+}
+\arguments{
+\item{nMx}{numeric. Vector of mortality rates in abridged age classes.}
+
+\item{Age}{integer. Single ages (abridged not allowed).}
+
+\item{Sex}{character. Either male \code{"m"}, female \code{"f"}, or both \code{"b"}.}
+
+\item{q0_5}{numeric. Probability of death from born to age 5. By default implicit values in \code{nMx} should be entered.}
+
+\item{q15_45}{numeric. Probability of death from age 15 to age 60. By default implicit values in \code{nMx} should be entered.}
+
+\item{q60_15}{numeric. Probability of death from age 60 to age 75. When external information on those ages level is available,
+can be included to increase parameter \code{ax} from log-quad model in last ages (Li, 2003).}
+
+\item{Age_transition}{integer. Form which age should transition to HMD pattern starts.}
+
+\item{window_transition}{integer. Number of ages to the left and to the right of \code{Age_transition} to do a log-linear transition in rates.}
+
+\item{plot_comparison}{Show or not a plot with the result.}
+
+\item{fitted_logquad}{Optional, defaults to \code{NULL}. An object of class
+\code{wilmoth}. If full HMD is not enough, one
+can fit a Log-Quadratic (\url{https://github.com/mpascariu/MortalityEstimate}) model
+based on any other collection of life tables;}
+
+\item{...}{Other arguments to be passed on to the \code{lt_single} function.}
+}
+\value{
+life table as in \code{lt_single} function.
+}
+\description{
+Adjust rates in oldest ages using HMD pattern, based on log-quad method.
+}
+\details{
+One possible scenario when mortality data on last ages is not reliable, is to use a mortality pattern with some known index in previous ages.
+This function gives a HMD pattern based on 5q0 and 45q15, using log-quad model. Additionally, a value on mortality between 60 and 75 can be included to make a better adjustment in level.
+}
+\examples{
+# Mortality rates from UN Chilean with e0=70. Wat would be the rates based on HMD pattern?
+# In this case making a transition of 10 years at age 80, and returning an OAG=100.
+\dontrun{
+lt <- DemoToolsData::modelLTx1
+lt <- lt[lt$family == "Chilean" & lt$sex == "female" & lt$e0 == 70,]
+chilean70_adjHMD <- HMD_old_logquad(nMx = lt$mx1,
+ Age = lt$age,
+ Sex = "f",
+ q0_5 = 1 - lt$lx1[lt$age==5]/lt$lx1[lt$age==0],
+ q15_45 = 1 - lt$lx1[lt$age==60]/lt$lx1[lt$age==15],
+ Age_transition = 80,
+ window_transition = 10,
+ plot_comparison = TRUE,
+ OAnew = 100)
+# We know (as an example) that q60_15 is .5 higher than what HMD pattern would be.
+chilean70_adjHMD_augm <- HMD_old_logquad(nMx = lt$mx1,
+ Age = lt$age,
+ Sex = "f",
+ q0_5 = 1 - lt$lx1[lt$age==5]/lt$lx1[lt$age==0],
+ q15_45 = 1 - lt$lx1[lt$age==60]/lt$lx1[lt$age==15],
+ q60_15 = (1 - lt$lx1[lt$age==75]/lt$lx1[lt$age==60]) * 1.5,
+ Age_transition = 80, window_transition = 10,
+ OAnew = 100, plot_comparison = TRUE)
+}
+}
diff --git a/man/OPAG.Rd b/man/OPAG.Rd
index e447cc4d5..7aaa27e9a 100644
--- a/man/OPAG.Rd
+++ b/man/OPAG.Rd
@@ -7,16 +7,13 @@
OPAG(
Pop,
Age_Pop,
- AgeInt_Pop,
nLx,
Age_nLx,
- AgeInt_nLx = NULL,
Age_fit = NULL,
AgeInt_fit = NULL,
Redistribute_from = max(Age_Pop),
OAnew = max(Age_nLx),
- method = "uniform",
- continuous = TRUE
+ method = "mono"
)
}
\arguments{
@@ -24,13 +21,9 @@ OPAG(
\item{Age_Pop}{integer vector of the lower bounds of the population age groups}
-\item{AgeInt_Pop}{integer vector of the population age group interval widths, using \code{Inf} for the open age group.}
+\item{nLx}{numeric vector of stationary population age structure in arbitrary integer age groups}
-\item{nLx}{numeric vector of stable population standard}
-
-\item{Age_nLx}{integer vector of lower bounds for age groups of \code{nLx}}
-
-\item{AgeInt_nLx}{optional integer vector of widths of age groups of \code{nLx}, inferred if not given.}
+\item{Age_nLx}{integer vector of lower bounds of age groups of \code{nLx}}
\item{Age_fit}{integer vector of lower bounds for age groups of \code{Pop_fit}}
@@ -40,9 +33,7 @@ OPAG(
\item{OAnew}{integer. Desired open age group in the output (must being element of \code{Age_nLx})}
-\item{method}{character. Graduation method, default \code{"uniform"}. \code{"mono"} or \code{"pclm"} would also be good choices.}
-
-\item{continuous}{logical. If \code{TRUE} we use the growth adjustment. \code{e^(-age*r)}. If \code{FALSE} we assume \code{r} is geometric growth, and we use \code{(1+r)^age} for the growth adjustment.}
+\item{method}{character, graduation method used for intermediate graduation. Default \code{"mono"}. Other reasonable choices include \code{"pclm"} or \code{"uniform"}.}
}
\description{
This can be used as an external check of population counts
@@ -58,37 +49,29 @@ constant growth, \code{r}.
It may be helpful to try more than one fitting possibility,
and more than one \code{Redistribute_from} cut point, as results may vary.
-The argument \code{"method"} refers to which graduation method (see \code{?graduate})
-is only relevant if input data are in grouped ages. This is innocuous if
-ages are single to begin with. The choice of whether to assume
-\code{continuous = TRUE} constant growth versus geometric (\code{FALSE}) growth
-has little leverage.
-
\code{Redistribute_from} can be lower than your current open age group,
and \code{OAnew} can be higher, as long as it is within the range of \code{Age_nLx}.
If \code{Age_nLx} doesn't go high enough for your needs, you can extrapolate
-it ahead of time. For this, you'd want the \code{nMx} the underly it, and you
+it ahead of time. For this, you'd want the \code{nMx} the underlie it, and you
can use \code{lt_abridged()}, specifying a higher open age, and then
extracting \code{nLx} again from it.
}
\examples{
-# India Males, 1991
+# India Males, 1971
Pop <- smooth_age_5(pop1m_ind,
Age = 0:100,
method = "Arriaga")
Age_Pop <- names2age(Pop)
AgeInt_Pop <- age2int(Age_Pop, OAvalue = 1)
-nLx <- downloadnLx(NULL, "India","male",1991)
+nLx <- downloadnLx(NULL, "India","male",1971)
Age_nLx <- names2age(nLx)
AgeInt_nLx <- age2int(Age_nLx, OAvalue = 1)
Pop_fit <- OPAG(Pop,
Age_Pop = Age_Pop,
- AgeInt_Pop = AgeInt_Pop,
nLx = nLx,
Age_nLx = Age_nLx,
- AgeInt_nLx,
Age_fit = c(60,70),
AgeInt_fit = c(10,10),
Redistribute_from = 80)
diff --git a/man/OPAG_fit_stable_standard.Rd b/man/OPAG_fit_stable_standard.Rd
index c3b2a4e07..4f4954dcc 100644
--- a/man/OPAG_fit_stable_standard.Rd
+++ b/man/OPAG_fit_stable_standard.Rd
@@ -4,16 +4,7 @@
\alias{OPAG_fit_stable_standard}
\title{creates stable standard based on optimizing the growth rate}
\usage{
-OPAG_fit_stable_standard(
- Pop_fit,
- Age_fit,
- AgeInt_fit,
- nLx,
- Age_nLx,
- AgeInt_nLx,
- method = "uniform",
- continuous = TRUE
-)
+OPAG_fit_stable_standard(Pop_fit, Age_fit, AgeInt_fit, Lx1, Age_Lx1)
}
\arguments{
\item{Pop_fit}{numeric vector of at least two population counts to use for fitting}
@@ -22,15 +13,9 @@ OPAG_fit_stable_standard(
\item{AgeInt_fit}{integer vector of widths of age groups of \code{Pop_fit}}
-\item{nLx}{numeric vector of stable population standard}
-
-\item{Age_nLx}{integer vector of lower bounds for age groups of \code{nLx}}
-
-\item{AgeInt_nLx}{optional integer vector of widths of age groups of \code{nLx}, inferred if not given.}
-
-\item{method}{character. Graduation method, default \code{"uniform"}. \code{"mono"} or \code{"pclm"} would also be good choices.}
+\item{Lx1}{numeric vector of stable population standard by single ages}
-\item{continuous}{logical. If \code{TRUE} we use the growth adjustment. \code{e^(-age*r)}. If \code{FALSE} we assume \code{r} is geometric growth, and we use \code{(1+r)^age} for the growth adjustment.}
+\item{Age_Lx1}{integer vector of lower bounds for age groups of \code{Lx1}}
}
\value{
list constaining
@@ -44,47 +29,44 @@ redistribution in \code{OPAG()}
The stationary standard, \code{nLx} is transformed into a stable standard by optimizing a growth rate, \code{r} such that the stable standard matches observed population counts in selected age groups. Usually the ages used for fitting are wide age groups in older ages preceding the open age group. The standard output by this function is used by \code{OPAG} to create the standard used to redistribute counts over older age groups up to a specified open age group, such as 100.
}
\details{
-The arguments \code{method} and \code{continous} don't have much leverage on the result. In short, the stable population transformation is done by ungrouping \code{nLx} to single ages (if it isn't already), and \code{method} controls which graduation method is used for this, where \code{"uniform"}, \code{"mono"}, \code{"pclm"} are the reasonable choices at this writing. In single ages, the difference between using a geometric \code{r} versus continuous \code{r} are quite small for this task.
+The argument \code{method} don't have much leverage on the result. In short, the stable population transformation is done by ungrouping \code{nLx} to single ages (if it isn't already), and \code{method} controls which graduation method is used for this, where \code{"uniform"}, \code{"mono"}, \code{"pclm"} are the reasonable choices at this writing.
}
\examples{
Pop_fit <- c(85000,37000)
Age_fit <- c(70,80)
-AgeInt_fit <- c(10,10)
nLx <- downloadnLx(NULL, "Spain","female",1971)
Age_nLx <- names2age(nLx)
-
-# India Males, 1991
+Lx1 <- graduate(nLx,Age=Age_nLx,method = "mono")
+Age_Lx1 <- 0:100
+# India Males, 1971
Pop <- smooth_age_5(pop1m_ind,
Age = 0:100,
method = "Arriaga")
Pop80 <- groupOAG(Pop, names2age(Pop), 80)
Age <- names2age(Pop80)
-AgeInt <- age2int(Age, OAvalue = 1)
-nLx <- downloadnLx(NULL, "India","male",1991)
+nLx <- downloadnLx(NULL, "India","male",1971)
Age_nLx <- names2age(nLx)
-AgeInt_nLx <- age2int(Age_nLx,OAvalue = 1)
+
Pop_fit <- groupAges(Pop80, Age, N = 10)[c("60","70")]
Age_fit <- c(60,70)
AgeInt_fit <- c(10,10)
Standard <- OPAG_fit_stable_standard(
- Pop_fit,
- Age_fit,
- AgeInt_fit,
- nLx = nLx,
- Age_nLx = Age_nLx,
- AgeInt_nLx = AgeInt_nLx,
- method = "uniform",
- continuous = TRUE)
+ Pop_fit = Pop_fit,
+ Age_fit = Age_fit,
+ AgeInt_fit = AgeInt_fit,
+ Lx1=Lx1,
+ Age_Lx1 = Age_Lx1
+)
# A visual comparison:
nL60 <- rescale_vector(nLx[Age_nLx >= 60])
-St60p <- rescale_vector( Standard$Standard[Age_nLx >= 60] )
+St60p <- rescale_vector( Standard$Standard[c(0:100) >= 60] )
ages_plot <- seq(60,100,by=5)
\dontrun{
-plot(ages_plot,nL60, type = 'l')
-lines(ages_plot, St60p, col = "blue")
+ plot(ages_plot,nL60, type = 'l')
+ lines(60:100, St60p, col = "blue")
}
}
diff --git a/man/OPAG_nLx_warp_r.Rd b/man/OPAG_nLx_warp_r.Rd
index f12537911..d884a4171 100644
--- a/man/OPAG_nLx_warp_r.Rd
+++ b/man/OPAG_nLx_warp_r.Rd
@@ -4,27 +4,14 @@
\alias{OPAG_nLx_warp_r}
\title{Warps a given stationary population into a stable population}
\usage{
-OPAG_nLx_warp_r(
- nLx,
- Age,
- r,
- AgeInt = NULL,
- continuous = TRUE,
- method = "uniform"
-)
+OPAG_nLx_warp_r(Lx1, Age_Lx1, r)
}
\arguments{
-\item{nLx}{numeric vector of stationary population age structure in arbitrary integer age groups}
+\item{Lx1}{numeric vector of stationary population age structure in arbitrary integer age groups}
-\item{Age}{interger vector of lower bounds of age groups of \code{nLx}}
+\item{Age_Lx1}{integer vector of lower bounds of age groups of \code{nLx}}
\item{r}{stable growth rate}
-
-\item{AgeInt}{optional integer vector of widths of age groups, inferred if not given.}
-
-\item{continuous}{logical. If \code{TRUE} we use the growth adjustment. \code{e^(-age*r)}. If \code{FALSE} we assume \code{r} is geometric growth, and we use \code{(1+r)^age} for the growth adjustment.}
-
-\item{method}{character, graduation method used for intermediate graduation. Default \code{"uniform"}. Other reasonable choices include \code{"mono"} or \code{"pclm"}.}
}
\value{
numeric vector of the transformed \code{nLx}. Note, this vector sums to \code{1}.
@@ -34,7 +21,7 @@ We take \code{nLx} as indicative of a stationary population age structure,
then subject the population structure to long-term growth by a constant rate, \code{r}.
}
\details{
-\code{nLx} could be any population structure of any scale, as long as you're comfortable
+\code{Lx1} could be any population structure of any scale, as long as you're comfortable
assuming it's stationary and can be warped into stable. For the oldest ages, this is probably
quite often an acceptable and useful approximation. The transformation is applied at the single-age scale, even if the input \code{nLx} is in wider (e.g. abridged) age groups. When needed, we reduce to single ages using (default) \code{graduate_uniform()}, then apply the transformation, then group back. This is innocuous if \code{nLx} is given in single ages. You may want to change \code{method} to \code{"mono"} or \code{"pclm"}.
}
diff --git a/man/OPAG_r_min.Rd b/man/OPAG_r_min.Rd
index c071d52ce..f77b5307c 100644
--- a/man/OPAG_r_min.Rd
+++ b/man/OPAG_r_min.Rd
@@ -4,36 +4,20 @@
\alias{OPAG_r_min}
\title{calculates residual for optimizing growth rate r for OPAG family}
\usage{
-OPAG_r_min(
- r,
- Pop_fit,
- Age_fit,
- AgeInt_fit,
- nLx,
- Age_nLx,
- AgeInt_nLx = NULL,
- continuous = TRUE,
- method = "uniform"
-)
+OPAG_r_min(r, Age_fit, Pop_fit, AgeInt_fit, Lx1, Age_Lx1)
}
\arguments{
\item{r}{given stable growth rate}
-\item{Pop_fit}{numeric vector of at least two population counts to use for fitting}
-
\item{Age_fit}{integer vector of lower bounds for age groups of \code{Pop_fit}}
-\item{AgeInt_fit}{integer vector of widths of age groups of \code{Pop_fit}}
-
-\item{nLx}{numeric vector of stable population standard}
-
-\item{Age_nLx}{integer vector of lower bounds for age groups of \code{nLx}}
+\item{Pop_fit}{numeric vector of at least two population counts to use for fitting}
-\item{AgeInt_nLx}{optional integer vector of widths of age groups of \code{nLx}, inferred if not given.}
+\item{AgeInt_fit}{integer vector of widths of age groups of \code{Pop_fit}}
-\item{continuous}{logical. If \code{TRUE} we use the growth adjustment. \code{e^(-age*r)}. If \code{FALSE} we assume \code{r} is geometric growth, and we use \code{(1+r)^age} for the growth adjustment.}
+\item{Lx1}{numeric vector of stable population standard by single ages}
-\item{method}{character. Graduation method, default \code{"uniform"}. \code{"mono"} or \code{"pclm"} would also be good choices.}
+\item{Age_Lx1}{integer vector of lower bounds for age groups of \code{Lx1}}
}
\value{
numeric. A residual that you're presumably trying to minimize.
@@ -43,7 +27,7 @@ For a given set of age groups to fit against, and a given stable growth rate, $r
what is the error implied given the current $r$ and stationary standard?
}
\details{
-This is a utiltiy function for \code{OPAG()}, which needs to optimize $r$ for a
+This is a utility function for \code{OPAG()}, which needs to optimize $r$ for a
given population vector and stationary standard.
}
\examples{
@@ -52,32 +36,25 @@ Pop_fit <- c(85000,37000)
Age_fit <- c(70,80)
AgeInt_fit <- c(10,10)
nLx <- downloadnLx(NULL, "Spain","female",1971)
-Age_nLx <- names2age(nLx)
+# graduate(nLx, Age_nLx, method = method, constrain = TRUE)
+Ageab <- names2age(nLx)
+Lx1 <- graduate(c(nLx), Ageab, method = "mono", constrain = TRUE)
+Age_Lx1 <- 0:100
r <- .01
OPAG_r_min(r,
- Pop_fit,
- Age_fit,
- AgeInt_fit,
- nLx,
- Age_nLx)
+ Pop_fit = Pop_fit,
+ Age_fit = Age_fit,
+ AgeInt_fit = AgeInt_fit,
+ Lx1 = Lx1,
+ Age_Lx1 = Age_Lx1)
(r_opt <- optimize(OPAG_r_min,
Pop_fit = Pop_fit,
Age_fit = Age_fit,
AgeInt_fit = AgeInt_fit,
- nLx = nLx,
- Age_nLx = Age_nLx,
+ Lx1 = Lx1,
+ Age_Lx1 = Age_Lx1,
interval = c(-0.05,.05))$min)
-ai <- age2int(Age_nLx)
-
-# Note the whole age range is being scaled to 1 here, but in practice
-# you'd only be doing this in the highest ages. If only two fitting
-# ages are given, then we can get an r that matches them perfectly,
-# at least within reasonable bounds.
-\dontrun{
-plot(Age, nLx / sum(nLx) / ai, type = 's')
-lines(Age,OPAG_nLx_warp_r(Lx,Age,r=r_opt)/ai,type='s',col = "red")
-}
}
diff --git a/man/OPAG_simple.Rd b/man/OPAG_simple.Rd
index ece6c6a27..405bed0ad 100644
--- a/man/OPAG_simple.Rd
+++ b/man/OPAG_simple.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/OPAG.R
\name{OPAG_simple}
\alias{OPAG_simple}
-\title{redistripute an open age group count over higher ages proportional to an arbitrary standard}
+\title{redistribute an open age group count over higher ages proportional to an arbitrary standard}
\usage{
OPAG_simple(Pop, Age, OAnow = max(Age), StPop, StAge, OAnew = max(StAge))
}
diff --git a/man/ageRatioScore.Rd b/man/ageRatioScore.Rd
index 71b6507b3..8970c5ea9 100644
--- a/man/ageRatioScore.Rd
+++ b/man/ageRatioScore.Rd
@@ -39,7 +39,7 @@ Age groups must be of equal intervals. Five year age groups are assumed.
It is also assumed that the final age group is open, unless \code{ageMax < max(Age)}.
Setting \code{OAG = FALSE} will override this and potentially include \code{max(Age)} in calculations.
The method argument determines the weighting of numerators and denominators, where the UN method puts
-twice the numerator over the sum of the adjacent ages classes, Zelnich does thrice the
+twice the numerator over the sum of the adjacent ages classes, Zelnik does thrice the
numerator over the sum of the whole range from the next lowest to the next highest age, and
Ramachandran does four times the numerator over the same sum, but with the central age
double-counted in the numerator.
diff --git a/man/age_abridge_force.Rd b/man/age_abridge_force.Rd
index a6052a31d..06c6ec526 100644
--- a/man/age_abridge_force.Rd
+++ b/man/age_abridge_force.Rd
@@ -4,17 +4,15 @@
\alias{age_abridge_force}
\title{force a (count) vector to abridged ages}
\usage{
-age_abridge_force(Value, AgeInt, Age)
+age_abridge_force(Value, Age)
}
\arguments{
\item{Value}{numeric vector, presumably counts in grouped ages}
-\item{AgeInt}{integer vector, age interval widths}
-
\item{Age}{integer vector, lower bounds of age groups}
}
\description{
-This is a robustness utility, in place to avoid annoying hang-ups in \code{LTAbr()}. If data are given in non-standard ages, they are forced to standard abrdiged ages on the fly. Really this should happen prior to calling \code{lt_abridged()}
+This is a robustness utility, in place to avoid annoying hang-ups in \code{LTAbr()}. If data are given in non-standard ages, they are forced to standard abridged ages on the fly. Really this should happen prior to calling \code{lt_abridged()}
}
\details{
This should be able to group up and group down as needed. \code{graduate_mono()} is used below the hood. \code{pclm()} or \code{graduate_uniform()} out to be flexible enough to do the same.
@@ -26,7 +24,7 @@ AgeInt <- c(1,2,2,rep(5,19),1)
Value <- tapply(V1,rep(Age,times=AgeInt), sum)
is_abridged(Age)
-age_abridge_force(Value, AgeInt, Age)
+age_abridge_force(Value, Age)
}
\seealso{
graduate_mono_closeout, lt_abridged
diff --git a/man/ages_asfr_five.Rd b/man/ages_asfr_five.Rd
index 0ece98bd7..cf44c2765 100644
--- a/man/ages_asfr_five.Rd
+++ b/man/ages_asfr_five.Rd
@@ -8,7 +8,7 @@
A vector of length 7
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
ages_asfr_five
diff --git a/man/ages_asfr_single.Rd b/man/ages_asfr_single.Rd
index 8e1eab99e..8b397587b 100644
--- a/man/ages_asfr_single.Rd
+++ b/man/ages_asfr_single.Rd
@@ -8,7 +8,7 @@
A vector of length 36
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
ages_asfr_single
diff --git a/man/ages_five.Rd b/man/ages_five.Rd
index 4bb4a2642..40606061a 100644
--- a/man/ages_five.Rd
+++ b/man/ages_five.Rd
@@ -8,7 +8,7 @@
A vector of length 21
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
ages_five
diff --git a/man/ages_single.Rd b/man/ages_single.Rd
index 773c95c27..4532d9626 100644
--- a/man/ages_single.Rd
+++ b/man/ages_single.Rd
@@ -8,7 +8,7 @@
A vector of length 101
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
ages_single
diff --git a/man/agesmth1.Rd b/man/agesmth1.Rd
index ddaaac244..2df2e1e70 100644
--- a/man/agesmth1.Rd
+++ b/man/agesmth1.Rd
@@ -42,7 +42,7 @@ constrained to the original total in all cases.
LOESS (locally weighted smoothing) helps to smooth data over age, preserving the open age group if necessary.
It is a popular tool to create a smooth line through a timeplot or scatter plot.
Loess smoothness may be tweaked by specifying an optional \code{"span"} argument.
-Polynomial fitting is used to mooth data over age or time fitting linear models.
+Polynomial fitting is used to smooth data over age or time fitting linear models.
It can be tweaked by changing the degree and by either log or power transforming.
The open age group can be kept as-is if desired by specifying \code{OAG = TRUE}.
May be used on any age groups, including irregularly spaced, single age, or 5-year age groups.
diff --git a/man/asfr_mat_five.Rd b/man/asfr_mat_five.Rd
index 7e60ea2f1..e5f945a95 100644
--- a/man/asfr_mat_five.Rd
+++ b/man/asfr_mat_five.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 7 x 20
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
asfr_mat_five
diff --git a/man/asfr_mat_single.Rd b/man/asfr_mat_single.Rd
index 7940b4dfc..43395ac3c 100644
--- a/man/asfr_mat_single.Rd
+++ b/man/asfr_mat_single.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 35 x 20
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
asfr_mat_single
diff --git a/man/basepop_five.Rd b/man/basepop_five.Rd
index 0a37bc585..501f54d4a 100644
--- a/man/basepop_five.Rd
+++ b/man/basepop_five.Rd
@@ -111,8 +111,8 @@ should be aware of.}
\item \code{nLxm} numeric matrix of male \code{nLx}, abridged ages in rows and (potentially interpolated) time in columns. Potentially downloaded.
\item \code{Asfr} numeric matrix of age specific fertility in 5-year age groups ages 15-19 until 45-49 in rows, and (potentially interpolated) time in columns. Potentially downloaded.
\item \code{Exposure_female} numeric matrix of approximated age-specific exposure in 5-year age groups ages 15-19 until 45-49 in rows, and (potentially interpolated) time in columns.
-\item \code{Bt} births at three time points prior to census corrsponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9.
-\item \code{SRB} sex ratio at birth at three time points prior to census corrsponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9. Potentially downloaded.
+\item \code{Bt} births at three time points prior to census corresponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9.
+\item \code{SRB} sex ratio at birth at three time points prior to census corresponding to the midpoints of the cohorts entering ages 0, 1-4, and 5-9. Potentially downloaded.
\item \code{Age} age groups of the input population counts.
}
}
diff --git a/man/calcAgeN.Rd b/man/calcAgeN.Rd
index ff3e1702d..fd2ac7306 100644
--- a/man/calcAgeN.Rd
+++ b/man/calcAgeN.Rd
@@ -21,7 +21,7 @@ Assign single ages to age groups of equal and arbitrary width, and also optional
}
\details{
If you shift the groupings, then the first age groups may have a negative lower bound
-(for example of -5). These counts would be discarded for the osculatory version of Sprague smoothing,
+(for example of -5). These counts would be discarded for the oscillatory version of Sprague smoothing,
for example, but they are preserved in this function. The important thing to know is that if you shift
the groups, the first and last groups won't be N years wide. For example if \code{shiftdown} is 1,
the first age group is 4-ages wide.
diff --git a/man/check_heaping_kannisto.Rd b/man/check_heaping_kannisto.Rd
index 7e1840b0c..f2d69a00e 100644
--- a/man/check_heaping_kannisto.Rd
+++ b/man/check_heaping_kannisto.Rd
@@ -22,7 +22,7 @@ The value of the index.
This age heaping index is used for particular old ages, such as 90, 95, 100, 105, and so forth.
}
\details{
-The index looks down two ages and up two ages, so the data must accommodate that range. The denominator is a mean of the counts in the sourrounding 5 single ages. The kind of mean can be controlled with the \code{pow} argument. By default, this takes the antilog of the arithmetic mean of the natural log of the five denominator counts. That will fail if one of the counts is equal to 0. In such cases, another power, such as 2 or 10 or 100 may be used, which is more robust to 0s. The higher the power, the closer the result will resemble the default output. If \code{pow=="exp"} but a 0 is detected among the denominator ages, then \code{pow} is assigned a value of 1000. \code{pow=1} would imply an arithmetic mean in the denominator.
+The index looks down two ages and up two ages, so the data must accommodate that range. The denominator is a mean of the counts in the surrounding 5 single ages. The kind of mean can be controlled with the \code{pow} argument. By default, this takes the antilog of the arithmetic mean of the natural log of the five denominator counts. That will fail if one of the counts is equal to 0. In such cases, another power, such as 2 or 10 or 100 may be used, which is more robust to 0s. The higher the power, the closer the result will resemble the default output. If \code{pow=="exp"} but a 0 is detected among the denominator ages, then \code{pow} is assigned a value of 1000. \code{pow=1} would imply an arithmetic mean in the denominator.
}
\examples{
Age <- 0:99
diff --git a/man/check_heaping_roughness.Rd b/man/check_heaping_roughness.Rd
index 258b2bef6..10c36f2cb 100644
--- a/man/check_heaping_roughness.Rd
+++ b/man/check_heaping_roughness.Rd
@@ -18,10 +18,10 @@ check_heaping_roughness(
\item{ageMin}{integer evenly divisible by 5. Lower bound of evaluated age range, default 20.}
-\item{ageMax}{integer evently divisibly by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.}
+\item{ageMax}{integer evenly divisible by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.}
}
\description{
-For a given age-structured vector of counts, how rough is data after grouping to 5-year age bins? Data may require smoothing even if there is no detectable sawtooth pattern. It is best to use the value in this method together with visual evidence to gauage whether use of \code{smooth_age_5()} is recommended.
+For a given age-structured vector of counts, how rough is data after grouping to 5-year age bins? Data may require smoothing even if there is no detectable sawtooth pattern. It is best to use the value in this method together with visual evidence to gauge whether use of \code{smooth_age_5()} is recommended.
}
\details{
First we group data to 5-year age bins. Then we take first differences (d1) of these within the evaluated age range. Then we smooth first differences (d1s) using a generic smoother (\code{ogive()}). Roughness is defined as the mean of the absolute differences between \code{mean(abs(d1 - d1s) / abs(d1s))}. Higher values indicate rougher data, and may suggest more aggressive smoothing. Just eyeballing, one could consider smoothing if the returned value is greater than ca 0.2, and values greater than 0.5 already highly recommend it (pending visual verification).
diff --git a/man/check_heaping_sawtooth.Rd b/man/check_heaping_sawtooth.Rd
index 563343ebc..b02257a68 100644
--- a/man/check_heaping_sawtooth.Rd
+++ b/man/check_heaping_sawtooth.Rd
@@ -18,13 +18,13 @@ check_heaping_sawtooth(
\item{ageMin}{integer evenly divisible by 10. Lower bound of evaluated age range, default 40.}
-\item{ageMax}{integer evently divisibly by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.}
+\item{ageMax}{integer evenly divisible by 5. Upper bound of evaluated age range, defaults to highest age evenly divisible by 10.}
}
\value{
numeric, ratio of 0s to 5s. If > 1 then the pattern is present.
}
\description{
-Ages ending in 0 often have higher apparent heaping than ages ending in 5. In this case, data in 5-year age bins might show a sawtooth pattern. If heaping ocurrs in roughly the same amount on 0s and 5s, then it may be sufficient to group data into 5-year age groups and then graduate back to single ages. However, if heaping is worse on 0s, then this procedure tends to produce a wavy pattern in count data, with 10-year periodicity. In this case it is recommended to use one of the methods of \code{smooth_age_5()} as an intermediate step before graduation.
+Ages ending in 0 often have higher apparent heaping than ages ending in 5. In this case, data in 5-year age bins might show a sawtooth pattern. If heaping occurs in roughly the same amount on 0s and 5s, then it may be sufficient to group data into 5-year age groups and then graduate back to single ages. However, if heaping is worse on 0s, then this procedure tends to produce a wavy pattern in count data, with 10-year periodicity. In this case it is recommended to use one of the methods of \code{smooth_age_5()} as an intermediate step before graduation.
}
\details{
Data is grouped to 5-year age bins. The ratio of each value to the average of its neighboring values is calculated. If 0s have stronger attraction than 5s then we expect these ratios to be >1 for 0s and <1 for 5s. Ratios are compared within each 10-year age group in the evaluated age range. If in the evaluated range there are at most two exceptions to this rule (0s>5s), then the ratio of the mean of these ratios is returned, and it is recommended to use a smoother method. Higher values suggest use of a more aggressive method. This approach is only slightly different from that of Feeney, as implemented in the \code{smooth_age_5_zigzag_inner()} functions. This is not a general measure of roughness, but rather an indicator of this particular pattern of age attraction.
diff --git a/man/downloadAsfr.Rd b/man/downloadAsfr.Rd
index 609808be2..91b544745 100644
--- a/man/downloadAsfr.Rd
+++ b/man/downloadAsfr.Rd
@@ -19,7 +19,7 @@ downloadAsfr(Asfrmat, location = NULL, AsfrDatesIn, method = "linear")
numeric matrix interpolated asfr
}
\description{
-We extract \code{ASFRx} from \code{wpp2019}, interpolated to exact dates. Different methods availables.
+We extract \code{ASFRx} from \code{wpp2019}, interpolated to exact dates. Different methods available.
A vector of countries can handle, but with an unique sex. Row names are not indicative of countries.
}
\examples{
diff --git a/man/downloadnLx.Rd b/man/downloadnLx.Rd
index 0b5f18d67..ba1dfed2a 100644
--- a/man/downloadnLx.Rd
+++ b/man/downloadnLx.Rd
@@ -18,10 +18,10 @@ downloadnLx(nLx, location, gender, nLxDatesIn, method = "linear")
\item{method}{character. Could be \code{"linear"}, \code{"exponential"}, or \code{"power"}}
}
\value{
-numeric matrix of \code{nLx} with \code{length(nLxDatesIn)} and abrdiged ages in rows.
+numeric matrix of \code{nLx} with \code{length(nLxDatesIn)} and abridged ages in rows.
}
\description{
-We extract \code{Lx} from \code{wpp2019}, interpolated to exact dates. Different methods availables.
+We extract \code{Lx} from \code{wpp2019}, interpolated to exact dates. Different methods available.
A vector of countries can handle, but with an unique sex. Row names are not indicative of countries.
}
\examples{
diff --git a/man/graduate.Rd b/man/graduate.Rd
index 9b57bfdd0..8753033ae 100644
--- a/man/graduate.Rd
+++ b/man/graduate.Rd
@@ -10,8 +10,7 @@ graduate(
AgeInt = age2int(Age),
OAG = TRUE,
OAnew = max(Age),
- method = c("sprague", "beers(ord)", "beers(mod)", "grabill", "pclm", "mono",
- "uniform"),
+ method = c("sprague", "beers(ord)", "beers(mod)", "grabill", "pclm", "mono", "uniform"),
keep0 = FALSE,
constrain = FALSE,
...
@@ -50,7 +49,7 @@ This wrapper standardizes some inconsistencies in how open ages are dealt with.
\code{OAnew} cannot be higher than \code{max(Age)+4} for \code{"sprague"} or \code{"beers"} methods. For \code{"uniform","mono","pclm"} it can be higher than this, and in each case the open age group is completely redistributed within this range, meaning it's not really open anymore.
-For all methods, negative values are detected in output. If present, we deal with these in the following way: we take the geometric mean between the given output (with negative imputed with 0s) and the output of \code{graduate_mono()}, which is guaranteed non-negative. This only affects age groups where negatives were produced in the first pass. In our experience this only arises when using sprague, beers, or grabill methods, whereas all others are guarateed non-negative.
+For all methods, negative values are detected in output. If present, we deal with these in the following way: we take the geometric mean between the given output (with negative imputed with 0s) and the output of \code{graduate_mono()}, which is guaranteed non-negative. This only affects age groups where negatives were produced in the first pass. In our experience this only arises when using Sprague, Beers, or Grabill methods, whereas all others are guaranteed non-negative.
For any case where input data are in single ages, constraining results to sum to values in the original age groups will simply return the original input data, which is clearly not your intent. This might arise when using graduation as an implicit two-step smoother (group + graduate). In this case, separate the steps, first group using \code{groupAges()} then use \code{graduate(..., constrain = TRUE)}.
}
diff --git a/man/graduate_beers.Rd b/man/graduate_beers.Rd
index 2d4618773..5744d038b 100644
--- a/man/graduate_beers.Rd
+++ b/man/graduate_beers.Rd
@@ -26,7 +26,7 @@ A numeric vector of single age data.
This method offers both ordinary and modified Beers splitting, with an optional \href{https://www.census.gov/data/software/dapps.html}{Demographic Analysis & Population Projection System Software} adjustment \code{johnson} for ages under 10.
}
\details{
-Ages should refer to lower age bounds. \code{Value} must be labelled with ages unless \code{Age} is given separately. There must be at least six 5-year age groups (including the open group, 5 otherwise). If you want the \code{johnson} adjustment then \code{Value} must contain a single-year estimate of the population count in age 0. That means \code{Value} must come either as standard abridged or single age data.
+Ages should refer to lower age bounds. \code{Value} must be labeled with ages unless \code{Age} is given separately. There must be at least six 5-year age groups (including the open group, 5 otherwise). If you want the \code{johnson} adjustment then \code{Value} must contain a single-year estimate of the population count in age 0. That means \code{Value} must come either as standard abridged or single age data.
\code{method} option \code{"ord"} conserves sums in 5-year age groups, whereas \code{"mod"} does some smoothing between 5-year age groups too, and is not constrained.
diff --git a/man/graduate_grabill.Rd b/man/graduate_grabill.Rd
index beff7cb91..bdd223f93 100644
--- a/man/graduate_grabill.Rd
+++ b/man/graduate_grabill.Rd
@@ -23,7 +23,7 @@ This method uses Grabill's redistribution of middle ages and blends into
Sprague estimated single-age population counts for the first and final ten ages. Open age groups are preserved, as are annual totals.
}
\details{
-Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as a single-column matrix. Data may be given in either single or grouped ages. If the highest age does not end in a 0 or 5, and \code{OAG == TRUE}, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and \code{OAG == FALSE}, then results extend to single ages covering the entire 5-year age group.
+Dimension labeling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as a single-column matrix. Data may be given in either single or grouped ages. If the highest age does not end in a 0 or 5, and \code{OAG == TRUE}, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and \code{OAG == FALSE}, then results extend to single ages covering the entire 5-year age group.
}
\examples{
a5 <- as.integer(rownames(pop5_mat))
diff --git a/man/graduate_mono.Rd b/man/graduate_mono.Rd
index ace844684..36637b510 100644
--- a/man/graduate_mono.Rd
+++ b/man/graduate_mono.Rd
@@ -22,7 +22,7 @@ Numeric. vector of single smoothed age counts.
Take the cumulative sum of \code{Value} and then run a monotonic spline through it. The first differences split back single-age estimates of \code{Value}. Optionally keep the open age group untouched.
}
\details{
-The \code{"hyman"} method of \code{stats::splinefun()} is used to fit the spline because 1) it passes exactly through the points, 2) it is monotonic and therefore guarantees positive counts, and 3) it seems to be a bit less wiggly (lower average first differences of split counts). Single-age data is returned as-is. If you want to use this function as a smoother you first need to group to non-single ages.
+The \code{"hyman"} method of \code{stats::splinefun()} is used to fit the spline because 1) it passes exactly through the points, 2) it is monotonic and therefore guarantees positive counts, and 3) it seems to be a bit less wiggly (lower average first differences of split counts). Single-age data is returned as-is. If you want to use this function as a smoother you first need to group to non-single ages. \code{Age} be any age range, it does not need to start at 0.
}
\examples{
Value <- structure(c(88623, 90842, 93439, 96325, 99281, 102051, 104351,
diff --git a/man/graduate_mono_closeout.Rd b/man/graduate_mono_closeout.Rd
index 90829643a..a841b3cdc 100644
--- a/man/graduate_mono_closeout.Rd
+++ b/man/graduate_mono_closeout.Rd
@@ -55,7 +55,7 @@ grabill.closed.out <- graduate_mono_closeout(Value = popvec, Age = a5, pops =
# one may wish to instead rescale results colSums() of
# popg at age pivotAge and higher.
sum(grabill.closed.out) - sum(popvec)
-# also works on an age-labelled vector of data
+# also works on an age-labeled vector of data
closed.vec <- graduate_mono_closeout(popvec, Age = a5, OAG = TRUE)
# let's compare this one with sprague()
simple.vec <- graduate_sprague(popvec, Age = a5, OAG = TRUE)
diff --git a/man/graduate_pclm.Rd b/man/graduate_pclm.Rd
index 7a26f364e..5e0f8951e 100644
--- a/man/graduate_pclm.Rd
+++ b/man/graduate_pclm.Rd
@@ -26,6 +26,8 @@ This is exactly the function \code{pclm()} from the \code{ungroup} package, exce
The PCLM method can also be used to graduate rates using an offset if both numerators and denominators are available. In this case \code{Value} is the event count and \code{offset} is person years of exposure. The denominator must match the length of \code{Value} or else the length of the final single age result \code{length(min(Age):OAnew)}. This method can be used to redistribute counts in the open age group if \code{OAnew} gives sufficient space. Likewise, it can give a rate extrapolation beyond the open age.
If there are 0s in \code{Value}, these are replaced with a small value prior to fitting. If negatives result from the pclm fit, we retry after multiplying \code{Value} by 10, 100, or 1000, as sometimes a temporary rescale for fitting can help performance.
+
+\code{Age} be any age range, it does not need to start at 0.
}
\examples{
a5 <- seq(0,100,by=5)
diff --git a/man/graduate_sprague.Rd b/man/graduate_sprague.Rd
index 45173f286..8162a5d6d 100644
--- a/man/graduate_sprague.Rd
+++ b/man/graduate_sprague.Rd
@@ -22,7 +22,7 @@ Numeric vector of counts split into single ages.
This method is used to interpolate counts based on the Sprague formula. It is based on the first stage of the Sprague R script prepared by Thomas Buettner and Patrick Gerland, itself based on the description in Siegel and Swanson, 2004, p. 727.
}
\details{
-Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age). Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as or coercible to a single-column matrix. This method may produce negative values, most likely in the youngest or oldest ages. This case is dealt with in the \code{graduate()} wrapper function but not in this function.
+Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age). Dimension labeling is necessary. There must be at least six age groups (including the open group). One year of data will work as well, as long as it's given as or coercible to a single-column matrix. This method may produce negative values, most likely in the youngest or oldest ages. This case is dealt with in the \code{graduate()} wrapper function but not in this function.
If the highest age does not end in a 0 or 5, and \code{OAG == TRUE}, then the open age will be grouped down to the next highest age ending in 0 or 5. If the highest age does not end in a 0 or 5, and \code{OAG == FALSE}, then results extend to single ages covering the entire 5-year age group.
}
diff --git a/man/graduate_uniform.Rd b/man/graduate_uniform.Rd
index 7edc5c588..48c7aacc0 100644
--- a/man/graduate_uniform.Rd
+++ b/man/graduate_uniform.Rd
@@ -24,7 +24,7 @@ Numeric vector of counts for single year age groups.
Uniformly splits aggregate counts in age groups into single year age groups.
}
\details{
-Assumes that the population is uniformly distributed across each age interval, and that initial age intervals are integers greater than or equal to 1. If \code{AgeInt} is given, its final value is used as the interval for the final age group. If \code{AgeInt} is missing, then \code{Age} must be given, and the open age group is by default preserved \code{OAvalue} rather than split. To instead split the final age group into, e.g., a 5-year age class, either give \code{AgeInt}, \emph{or} give \code{Age}, \code{OAG = TRUE}, and \code{OAvalue = 5}.
+Assumes that the population is uniformly distributed across each age interval, and that initial age intervals are integers greater than or equal to 1. If \code{AgeInt} is given, its final value is used as the interval for the final age group. If \code{AgeInt} is missing, then \code{Age} must be given, and the open age group is by default preserved \code{OAvalue} rather than split. To instead split the final age group into, e.g., a 5-year age class, either give \code{AgeInt}, \emph{or} give \code{Age}, \code{OAG = TRUE}, and \code{OAvalue = 5}. \code{Age} be any age range, it does not need to start at 0.
}
\examples{
MalePop <- c(9544406,7471790,11590109,11881844,11872503,12968350,
diff --git a/man/groupAges.Rd b/man/groupAges.Rd
index 04345a9a0..9ed9c5e63 100644
--- a/man/groupAges.Rd
+++ b/man/groupAges.Rd
@@ -34,7 +34,7 @@ This can be useful to check constrained sums, or as an intermediate step for smo
}
\details{
If you shift the groupings, then the first age groups may have a negative lower bound
-(for example of -5). These counts would be discarded for the osculatory version of Sprague smoothing,
+(for example of -5). These counts would be discarded for the oscillatory version of Sprague smoothing,
for example, but they are preserved in this function. The important thing to know is that if you shift
the groups, the first and last groups will not be N years wide. For example if \code{shiftdown} is 1, the first age group is 4-ages wide. The ages themselves are not returned,
but they are the name attribute of the output count vector. Note this will also correctly group abridged ages
diff --git a/man/interp.Rd b/man/interp.Rd
index 727df227b..f059301b5 100644
--- a/man/interp.Rd
+++ b/man/interp.Rd
@@ -11,6 +11,7 @@ interp(
method = c("linear", "exponential", "power"),
power = 2,
extrap = FALSE,
+ negatives = FALSE,
...
)
}
@@ -25,7 +26,9 @@ interp(
\item{power}{numeric power to interpolate by, if \code{method = "power"}. Default 2.}
-\item{extrap}{logical. In case \code{datesOut} is out of range of datesIn, do extrapolation using slope in extreme pairwise. Deafult \code{FALSE}.}
+\item{extrap}{logical. In case \code{datesOut} is out of range of \code{datesIn}, do extrapolation using slope in extreme pairwise. Default \code{FALSE}.}
+
+\item{negatives}{logical. In case negative output are accepted, set to \code{TRUE}. Default \code{FALSE}.}
\item{...}{arguments passed to \code{stats::approx}. For example, \code{rule}, which controls extrapolation behavior.}
}
diff --git a/man/interp_coh.Rd b/man/interp_coh.Rd
index ee79198e8..5c2aa5e2e 100644
--- a/man/interp_coh.Rd
+++ b/man/interp_coh.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/interp_coh.R
\name{interp_coh}
\alias{interp_coh}
-\title{component-free intercensal cohort interpolation}
+\title{Cohort component intercensal interpolation}
\usage{
interp_coh(
c1,
@@ -60,7 +60,10 @@ interp_coh(
\item{...}{optional arguments passed to}
}
\description{
-Cohorts between two censuses are interpolated flexibly using linear, exponential, or power rules. The lower and upper intercensal triangles are filled using within-age interpolation. This function is experimental and still in development.
+Cohorts between two censuses are interpolated using a cohort component approach.
+}
+\details{
+The basic approach is to i) align the censuses to single-year cohorts by blending adjacent ages assuming that the birthdays in each age group are uniformly distributed through the year ii) decrement the first census forward within cohorts using period-cohort survival probabilities calculated from (supplied or downloaded) \code{l(x)} values, iii) redistribute the residual at the time of the second census uniformly over time within cohorts. These steps are always done on Jan 1 reference dates. If \code{midyear = TRUE}, then we do within-age band arithmetic interpolation to July 1 reference dates.
}
\examples{
diff --git a/man/interp_lc_lim.Rd b/man/interp_lc_lim.Rd
index 682133e8e..a1728aa04 100644
--- a/man/interp_lc_lim.Rd
+++ b/man/interp_lc_lim.Rd
@@ -23,7 +23,7 @@ interp_lc_lim(
\item{dates_out}{numeric. Vector of decimal years to interpolate or extrapolate.}
-\item{Single}{logical. Wheter or not the lifetable output is by single ages.}
+\item{Single}{logical. Whether or not the lifetable output is by single ages.}
\item{dates_e0}{numeric. Vector of decimal years where \code{"e_0"} should be fitted when apply method.}
@@ -67,12 +67,12 @@ List with:
}
}
\description{
-Given a data frame with dates, sex and mortality data by age (rates, conditionated probabilities of death
+Given a data frame with dates, sex and mortality data by age (rates, conditioned probabilities of death
or survival function), this function interpolate/extrapolate life tables
using the method for limited data suggested by Li et. al (2004) (at least three observed years).
}
\details{
-Based on spreedsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
+Based on spreadsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
Useful for abridged or single ages, and allows output in both formats also.
One option is the use of non-divergent method for sex coherency (Li & Lee, 2005).
The other is the possibility of fitting \code{"k"} to replicate \code{"e_0"} at some given dates.
diff --git a/man/interp_lc_lim_estimate.Rd b/man/interp_lc_lim_estimate.Rd
index d05a2ec12..a6ade6ffc 100644
--- a/man/interp_lc_lim_estimate.Rd
+++ b/man/interp_lc_lim_estimate.Rd
@@ -2,24 +2,24 @@
% Please edit documentation in R/interp_lc_lim.R
\name{interp_lc_lim_estimate}
\alias{interp_lc_lim_estimate}
-\title{Estimate LC with limited data params}
+\title{Estimate LC with limited data parameters}
\usage{
interp_lc_lim_estimate(M, dates_in, dates_out, SVD = F)
}
\arguments{
-\item{M}{numeric. Matrix with many rows as ages and columns as dates_in.}
+\item{M}{numeric. Matrix with many rows as ages and columns as \code{dates_in}.}
\item{dates_in}{numeric. Vector of dates with input rates.}
\item{dates_out}{numeric. Vector of dates for estimate a set of rates.}
-\item{SVD}{logical. Use Singular Value Decomposition for estimate b and k or Maximum Likelihood Estimation. Default \code{FALSE} for Maximum Likelihood Estimation.}
+\item{SVD}{logical. Use Singular Value Decomposition for estimate \code{b} and \code{k} or Maximum Likelihood Estimation. Default \code{FALSE} for Maximum Likelihood Estimation.}
}
\description{
Estimate LC with limited data from a matrix of rates (age by dates).
}
\details{
-SVD for ax and bx. Fit a simmple linear model for k and interp/extrapolate for objective dates.
+SVD for \code{ax} and \code{bx.} Fit a simple linear model for \code{k} and interpolate/extrapolate for objective dates.
}
\references{
\insertRef{Li2004}{DemoTools}
diff --git a/man/interp_lc_lim_group.Rd b/man/interp_lc_lim_group.Rd
index 26c72120a..a7b774f83 100644
--- a/man/interp_lc_lim_group.Rd
+++ b/man/interp_lc_lim_group.Rd
@@ -19,11 +19,11 @@ interp_lc_lim_group(
}
\arguments{
\item{input}{data.frame. Columns: id, Date, Sex, Age, nMx (opt), nqx (opt), lx (opt).
-The first column (id) cn be a numeric index or charcter vector identifying each group.}
+The first column (id) can be a numeric index or character vector identifying each group.}
\item{dates_out}{numeric. Vector of decimal years to interpolate or extrapolate.}
-\item{Single}{logical. Wheter or not the lifetable output is by single ages.}
+\item{Single}{logical. Whether or not the lifetable output is by single ages.}
\item{input_e0}{data.frame with cols: id, Date, Sex and \code{"e_0"}. This should be fitted when apply method.}
@@ -65,12 +65,12 @@ List with:
}
}
\description{
-Given a data frame with groups (country, region, sex), dates, sex and mortality data by age (rates, conditionated probabilities of death
+Given a data frame with groups (country, region, sex), dates, sex and mortality data by age (rates, conditioned probabilities of death
or survival function), this function interpolate/extrapolate life tables
using the method for limited data suggested by Li et. al (2004) (at least three observed years).
}
\details{
-Based on spreedsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
+Based on spreadsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
Useful for abridged or single ages, and allows output in both formats also.
One option is the use of non-divergent method for sex coherency (Li & Lee, 2005).
The other is the possibility of fitting \code{"k"} to replicate \code{"e_0"} at some given dates.
diff --git a/man/is_age_coherent.Rd b/man/is_age_coherent.Rd
index 9a12912fc..04525278f 100644
--- a/man/is_age_coherent.Rd
+++ b/man/is_age_coherent.Rd
@@ -20,7 +20,7 @@ logical. \code{TRUE} if the arguments are considered consistent.
A few checks are carried out to test if \code{Age} is internally consistent, that \code{OAG} is consistent with \code{AgeInt}, and that \code{Age} and \code{AgeInt} are consistent with one another. For \code{Age} to be internally consistent, we cannot have redundant values, and values must be sequential.
}
\details{
-If \code{OAG} is \code{TRUE} then \code{AgeInt} must be coded as \code{NA}. If \code{Age} is not sorted then we sort both \code{Age} and \code{AgeInt}, assuming that they are in matched order. This isn't incoherence per se, but a message is returned to the console.
+If \code{OAG} is \code{TRUE} then \code{AgeInt} must be coded as \code{NA}. If \code{Age} is not sorted then we sort both \code{Age} and \code{AgeInt}, assuming that they are in matched order. This isn't incoherence in itself, but a message is returned to the console.
}
\examples{
Age <- 0:99
diff --git a/man/logquad_augmented.Rd b/man/logquad_augmented.Rd
new file mode 100644
index 000000000..af9923a42
--- /dev/null
+++ b/man/logquad_augmented.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/log_quad_augm.R
+\name{logquad_augmented}
+\alias{logquad_augmented}
+\title{Augmented logquad}
+\usage{
+logquad_augmented(coeffs, k, q0_5, q60_15, Sex = "b", Age, ...)
+}
+\arguments{
+\item{coeffs}{data.frame. Columns \code{a(x)}, \code{b(x)}, \code{c(x)} and \code{v(x)} from fitted logquad model. See \code{fitted_logquad_b}.}
+
+\item{k}{numeric. Adult mortality related value from log-quad estimatation based on one or two input parameters. See \code{lt_model_lq}.}
+
+\item{q0_5}{numeric. Probability of death from born to age 5.}
+
+\item{q60_15}{numeric. Probability of death from age 60 to age 75.}
+
+\item{Sex}{character. Either male \code{"m"}, female \code{"f"}, or both \code{"b"}.}
+
+\item{Age}{integer. Abridged lower bound ages. Same length than rows in \code{coeffs}.}
+
+\item{...}{Other arguments to be passed on to the \code{lt_abridged} function.}
+}
+\value{
+life table as in \code{lt_abridged} function.
+}
+\description{
+Adjust rates in oldest ages that comes from a HMD model, using an external estimate of 15q60 (Li, 2014). As an example see\code{\link[DemoTools]{HMD_old_logquad}}.
+}
+\details{
+Parameter \code{a(x)} is augmented based on an external estimate of 15q60.
+}
+\references{
+See \href{https://www.un.org/development/desa/pd/content/estimating-life-tables-developing-countries}{Li (2014)}.
+}
diff --git a/man/lt_abridged.Rd b/man/lt_abridged.Rd
index 99801cc57..13c8e76c5 100644
--- a/man/lt_abridged.Rd
+++ b/man/lt_abridged.Rd
@@ -57,7 +57,7 @@ lt_abridged(
\item{mod}{logical. If \code{"un"} specified for \code{axmethod}, whether or not to use Nan Li's modification for ages 5-14. Default \code{TRUE}.}
-\item{SRB}{the sex ratio at birth (boys / girls), detault 1.05}
+\item{SRB}{the sex ratio at birth (boys / girls), default 1.05}
\item{OAG}{logical. Whether or not the last element of \code{nMx} (or \code{nqx} or \code{lx}) is an open age group. Default \code{TRUE}.}
@@ -109,7 +109,7 @@ starting with the input open age, but you can lower this age using the
is aligned with the other columns in all 5-year age groups, but note the
first two values have a slightly different age-interval interpretation:
In Age 0, the interpretation is survival from birth until interval 0-4.
-In Age 1, it is survival from 0-4 into 5-9. Therafter the age groups align.
+In Age 1, it is survival from 0-4 into 5-9. Thereafter the age groups align.
This column is required for population projections.
}
\examples{
diff --git a/man/lt_abridged2single.Rd b/man/lt_abridged2single.Rd
index d9cbfb378..4c68540fc 100644
--- a/man/lt_abridged2single.Rd
+++ b/man/lt_abridged2single.Rd
@@ -54,7 +54,7 @@ lt_abridged2single(
\item{mod}{logical. If \code{"un"} specified for \code{axmethod}, whether or not to use Nan Li's modification for ages 5-14. Default \code{TRUE}.}
-\item{SRB}{the sex ratio at birth (boys / girls), detault 1.05}
+\item{SRB}{the sex ratio at birth (boys / girls), default 1.05}
\item{OAG}{logical. Whether or not the last element of \code{nMx} (or \code{nqx} or \code{lx}) is an open age group. Default \code{TRUE}.}
@@ -67,7 +67,7 @@ lt_abridged2single(
\item{extrapFit}{integer vector. Ages to include in model fitting. Defaults to all ages \code{> =60}.}
-\item{...}{optional arguments passed to \code{pclm()}. For example, if you pass an expicit \code{lambda} parameter via the \code{control} argument, you can speed up estimation}
+\item{...}{optional arguments passed to \code{pclm()}. For example, if you pass an explicit \code{lambda} parameter via the \code{control} argument, you can speed up estimation}
}
\value{
Single-year lifetable in data.frame with columns
diff --git a/man/lt_ambiguous.Rd b/man/lt_ambiguous.Rd
index 532075238..1b60868de 100644
--- a/man/lt_ambiguous.Rd
+++ b/man/lt_ambiguous.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/lt_regroup_age.R
\name{lt_ambiguous}
\alias{lt_ambiguous}
-\title{calculate an abidged or single age lifetable from abridged or sinlge age data}
+\title{calculate an abridged or single age lifetable from abridged or single age data}
\usage{
lt_ambiguous(
nMx_or_nqx_or_lx = NULL,
diff --git a/man/lt_id_Ll_S.Rd b/man/lt_id_Ll_S.Rd
index 6baabc54e..5f2ee3b55 100644
--- a/man/lt_id_Ll_S.Rd
+++ b/man/lt_id_Ll_S.Rd
@@ -4,19 +4,21 @@
\alias{lt_id_Ll_S}
\title{Calculate survivor ratios}
\usage{
-lt_id_Ll_S(nLx, lx, AgeInt, N = c(5, 1))
+lt_id_Ll_S(nLx, lx = NULL, Age, AgeInt = NULL, N = 5)
}
\arguments{
\item{nLx}{numeric vector of lifetable exposure.}
-\item{lx}{numeric. Vector of lifetable survivorship at abridged ages.}
+\item{lx}{numeric vector of lifetable survivors from same lifetable than \code{nLx}. Infered radix from nLx in case is \code{NULL}.}
-\item{AgeInt}{integer. Vector of age class widths. Default \code{inferAgeIntAbr(Age = Age)}.}
+\item{Age}{integer vector of starting ages.}
+
+\item{AgeInt}{integer vector of age intervals.}
\item{N}{integer, the age width for survivor ratios, either 5 or 1. Default 5.}
}
\description{
-An extra lifetable column for use in projections, which require uniform time steps both both age and period. Intervals are either single age (\code{N=1}) or five-year ages (\code{N=5}). Input vectors are assumed to come from either single or standard abridged ages.
+An extra lifetable column for use in projections, which require uniform time steps both both age and period. Intervals are either single age or five-year ages. Input vectors are assumed to come from either single or standard abridged ages. Note that the ages of the output Sx are the ages the population would be after the N-year projection.
}
\details{
This function does not account for \code{nLx} having been pre-binned into uniform 5-year age widths, which will throw an error. Just leave them in abridged ages instead. Note that in the case of abridged ages, the interpretation for the first and second value don't follow the original abridged age intervals: the first value in the probability of surviving from birth into ages 0-4 in the first five years, and the second value is the probability of surviving from 0-4 to 5-9. This represents a slight misalignment with the rest of the lifetable, user beware.
diff --git a/man/lt_id_l_q.Rd b/man/lt_id_l_q.Rd
index 93d6f8e3c..94fbba82e 100644
--- a/man/lt_id_l_q.Rd
+++ b/man/lt_id_l_q.Rd
@@ -10,15 +10,14 @@ lt_id_l_q(lx)
\item{lx}{numeric. Vector of age-specific lifetable survivorship.}
}
\value{
-ndx vector of lifetable deaths.
+\code{qx} values of age-specific mortality rates. The last value is always 1.0
}
\description{
This lifetable identity is the same no matter what kind of lifetable is required.
You can find it in any demography textbook.
}
\details{
-The vector returned is the same length as \code{lx} and it sums to the lifetable radix.
-If the radix is one then this is the discrete deaths distribution.
+The vector returned is the same length as \code{lx}.
}
\references{
\insertRef{preston2000demography}{DemoTools}
diff --git a/man/lt_model_lq.Rd b/man/lt_model_lq.Rd
index 23919935d..8b541644f 100644
--- a/man/lt_model_lq.Rd
+++ b/man/lt_model_lq.Rd
@@ -70,7 +70,7 @@ for case 7 and 8 (e0 and 45q15 or 35q15 are known);}
\item{mod}{logical. If \code{"un"} specified for \code{axmethod}, whether or not to use Nan Li's modification for ages 5-14. Default \code{TRUE}.}
-\item{SRB}{the sex ratio at birth (boys / girls), detault 1.05}
+\item{SRB}{the sex ratio at birth (boys / girls), default 1.05}
}
\value{
The output is of class \code{lt_model_lq} with the components:
@@ -79,7 +79,7 @@ The output is of class \code{lt_model_lq} with the components:
and \code{e0}.}
}
\description{
-Construct model life tables based on the Log-Quadratic (wilmoth) estimates
+Construct model life tables based on the Log-Quadratic (Wilmoth) estimates
with various choices of 2 input parameters:
\code{q0_5, q0_1, q15_45, q15_35} and \code{e0}. There are 8 possible
combinations (see examples below).
@@ -90,7 +90,7 @@ of dying \code{nqx} is written \code{qx_n}, where \code{x} and \code{n} are
integers. For example \code{45q15} is represented as \code{q45_15}.
}
\note{
-This function is ported from \code{MortalityEstimate::wilmothLT} experimental package by Marius Pascariu. The package is no longe maintained. The latest version can be found here: \url{https://github.com/mpascariu/MortalityEstimate}
+This function is ported from \code{MortalityEstimate::wilmothLT} experimental package by Marius Pascariu. The package is no longer maintained. The latest version can be found here: \url{https://github.com/mpascariu/MortalityEstimate}
}
\examples{
diff --git a/man/lt_rule_1a0.Rd b/man/lt_rule_1a0.Rd
index a5c2e749a..059574ba1 100644
--- a/man/lt_rule_1a0.Rd
+++ b/man/lt_rule_1a0.Rd
@@ -27,13 +27,13 @@ lt_rule_1a0(
\item{region}{character. \code{"n"}, \code{"e"}, \code{"s"} or \code{"w"} for North, East, South, or West.}
-\item{SRB}{the sex ratio at birth (boys / girls), detault 1.05}
+\item{SRB}{the sex ratio at birth (boys / girls), default 1.05}
}
\value{
a0, the estimated average age at death of those dying in the first year of life, either a single value or a vector of values.
}
\description{
-This function wraps the Coale-Demeny and Andreev-Kingkade approximations for a0, which can come from M0, qo, or IMR.
+This function wraps the Coale-Demeny and Andreev-Kingkade approximations for \code{a0}, which can come from \code{M0}, \code{q0}, or \code{IMR.}
}
\details{
If sex is given as both, \code{"b"}, then we calculate the male and female results separately, then weight them together using SRB. This is bad in theory, but the leverage is trivial, and it's better than using male or female coefs for the total population.
diff --git a/man/lt_rule_1a0_ak.Rd b/man/lt_rule_1a0_ak.Rd
index c036c0d5c..9f7539b4b 100644
--- a/man/lt_rule_1a0_ak.Rd
+++ b/man/lt_rule_1a0_ak.Rd
@@ -7,11 +7,11 @@
lt_rule_1a0_ak(M0 = NULL, q0 = NULL, Sex)
}
\arguments{
-\item{M0}{a value or vector of values of m0, the death probability in the first year of life.}
+\item{M0}{a value or vector of values of `1m0``, the death risk in the first year of life.}
-\item{q0}{a value or vector of values of m0, the death risk in the first year of life.}
+\item{q0}{a value or vector of values of `1q0``, the death probability in the first year of life, sometimes approximated with IMR.}
-\item{Sex}{either "m" or "f"}
+\item{Sex}{either \code{"m"} or \code{"f"}}
}
\value{
a0, the estimated average age at death of those dying in the first year of life, either a single value or a vector of values.
diff --git a/man/lt_rule_4m0_D0.Rd b/man/lt_rule_4m0_D0.Rd
index 347ca5834..6d7bea4dd 100644
--- a/man/lt_rule_4m0_D0.Rd
+++ b/man/lt_rule_4m0_D0.Rd
@@ -4,7 +4,7 @@
\alias{lt_rule_4m0_D0}
\title{rule of thumb for splitting infants from deaths under 5}
\usage{
-lt_rule_4m0_D0(D04, M04, P04, Sex = c("m", "f"))
+lt_rule_4m0_D0(D04, M04, P04, Sex = "m")
}
\arguments{
\item{D04}{numeric. Deaths under age 5.}
diff --git a/man/lt_rule_4m0_m0.Rd b/man/lt_rule_4m0_m0.Rd
index 40269ae43..89fc85768 100644
--- a/man/lt_rule_4m0_m0.Rd
+++ b/man/lt_rule_4m0_m0.Rd
@@ -4,7 +4,7 @@
\alias{lt_rule_4m0_m0}
\title{rule of thumb for estimating infant mortality rate from under 5 mortality}
\usage{
-lt_rule_4m0_m0(M04, D04, P04, Sex = c("m", "f"))
+lt_rule_4m0_m0(M04, D04, P04, Sex = "m")
}
\arguments{
\item{M04}{numeric. Death rate under age 5.}
diff --git a/man/lt_rule_ak_q0_a0.Rd b/man/lt_rule_ak_q0_a0.Rd
index d1611408f..683130cbf 100644
--- a/man/lt_rule_ak_q0_a0.Rd
+++ b/man/lt_rule_ak_q0_a0.Rd
@@ -15,5 +15,5 @@ lt_rule_ak_q0_a0(q0, Sex)
a0, the estimated average age at death of those dying in the first year of life, either a single value or a vector of a_0 values.
}
\description{
-\code{AKq02a0} Andreev Kingkade a0 method. This version has a 3-part segemented linear model, based on cutpoints in q0. Code ported from HMDLifeTables.
+\code{AKq02a0} Andreev Kingkade a0 method. This version has a 3-part segmented linear model, based on cut points in q0. Code ported from HMDLifeTables.
}
diff --git a/man/lt_rule_m_extrapolate.Rd b/man/lt_rule_m_extrapolate.Rd
index 129763202..bcd4668f0 100644
--- a/man/lt_rule_m_extrapolate.Rd
+++ b/man/lt_rule_m_extrapolate.Rd
@@ -45,13 +45,16 @@ The following options are available: \itemize{
\value{
An object of class \code{lt_rule_m_extrapolate} with the following components:
\item{input}{List with arguments provided in input. Saved for convenience.}
-\item{call}{An unevaluated function call, that is, an unevaluated expressionwhich consists of the named function applied to the given arguments.}
+\item{call}{An unevaluated function call, that is, an unevaluated expression that consists of the named function applied to the given arguments.}
\item{fitted.model}{An object of class \code{\link[MortalityLaws]{MortalityLaw}}. Here one can find fitted values, residuals, goodness of fit measures etc.}
\item{values}{A vector or matrix containing the complete mortality data, that is the modified input data following the extrapolation procedure.}
}
\description{
Extrapolate old-age human mortality curve using mortality laws
}
+\details{
+If fitting fails to converge, then we refit assuming Gompertz mortality with explicit starting parameters of \code{parS = c(A = 0.005, B = 0.13)} and a warning is issued.
+}
\examples{
# Example 1 - abridged data
@@ -70,7 +73,8 @@ f1 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "kannisto")
f2 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "kannisto_makeham")
f3 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "gompertz")
f4 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "ggompertz")
-f5 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "makeham")
+ # makeham falls back to gompertz for this data
+suppressWarnings(f5 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "makeham"))
f6 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "beard")
f7 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "beard_makeham")
f8 <- lt_rule_m_extrapolate(mx, x, x_fit, x_extr, law = "quadratic")
@@ -142,17 +146,6 @@ legend("topleft", bty = "n",
lty = c(NA, NA, 1:8), pch = c(16, 16, rep(NA, 8)),
col = c(1, 4, 2:9), lwd = 2, pt.cex = 2)
}
-# ----------------------------------------------
-# Example 3 - Extrapolate mortality for multiple years at once
-
-# Create some data
-mx_matrix <- matrix(rep(mx1, 3), ncol = 3) \%*\% diag(c(1, 1.05, 1.1))
-dimnames(mx_matrix) <- list(age = x1, year = c("year1", "year2", "year3"))
-
-F1 <- lt_rule_m_extrapolate(mx_matrix, x = x1, x_fit, x_extr, law = "kannisto")
-F1
-ls(F1)
-coef(F1)
}
\seealso{
\code{\link[MortalityLaws]{MortalityLaw}}
diff --git a/man/lt_single_mx.Rd b/man/lt_single_mx.Rd
index 4f7ea4b7c..9a3232c72 100644
--- a/man/lt_single_mx.Rd
+++ b/man/lt_single_mx.Rd
@@ -39,7 +39,7 @@ lt_single_mx(
\item{mod}{logical. If \code{"un"} specified for \code{axmethod}, whether or not to use Nan Li's modification for ages 5-14. Default \code{TRUE}.}
-\item{SRB}{the sex ratio at birth (boys / girls), detault 1.05}
+\item{SRB}{the sex ratio at birth (boys / girls), default 1.05}
\item{OAG}{logical. Whether or not the last element of \code{nMx} (or \code{nqx} or \code{lx}) is an open age group. Default \code{TRUE}.}
diff --git a/man/lt_single_qx.Rd b/man/lt_single_qx.Rd
index 54d90efe5..121f99b01 100644
--- a/man/lt_single_qx.Rd
+++ b/man/lt_single_qx.Rd
@@ -39,7 +39,7 @@ lt_single_qx(
\item{mod}{logical. If \code{"un"} specified for \code{axmethod}, whether or not to use Nan Li's modification for ages 5-14. Default \code{TRUE}.}
-\item{SRB}{the sex ratio at birth (boys / girls), detault 1.05}
+\item{SRB}{the sex ratio at birth (boys / girls), default 1.05}
\item{OAG}{logical. Whether or not the last element of \code{nMx} (or \code{nqx} or \code{lx}) is an open age group. Default \code{TRUE}.}
@@ -66,7 +66,7 @@ Lifetable in data.frame with columns
\item{lx}{numeric. Lifetable survivorship}
\item{ndx}{numeric. Lifetable deaths distribution.}
\item{nLx}{numeric. Lifetable exposure.}
-\item{Sx}{numeric. Survivor ratios in uniform 5-year age groups.}
+\item{Sx}{numeric. Survivor ratios in uniform single-year age groups.}
\item{Tx}{numeric. Lifetable total years left to live above age x.}
\item{ex}{numeric. Age-specific remaining life expectancy.}
}
diff --git a/man/lt_smooth_ambiguous.Rd b/man/lt_smooth_ambiguous.Rd
index beff389bc..c1df4397e 100644
--- a/man/lt_smooth_ambiguous.Rd
+++ b/man/lt_smooth_ambiguous.Rd
@@ -13,7 +13,7 @@ lt_smooth_ambiguous(input, ...)
}
\description{
Considering different mortality input for each sex/year data,
-smooth olders with makeham or kannisto in case no law was specified,
+smooth older ages with makeham or kannisto in case no law was specified,
and return a data.frame with standard LT.
}
\details{
diff --git a/man/lthat.logquad.Rd b/man/lthat.logquad.Rd
index 180a06ca2..36810d5ac 100644
--- a/man/lthat.logquad.Rd
+++ b/man/lthat.logquad.Rd
@@ -47,7 +47,7 @@ The following options are available: \itemize{
\item{mod}{logical. If \code{"un"} specified for \code{axmethod}, whether or not to use Nan Li's modification for ages 5-14. Default \code{TRUE}.}
-\item{SRB}{the sex ratio at birth (boys / girls), detault 1.05}
+\item{SRB}{the sex ratio at birth (boys / girls), default 1.05}
}
\description{
Estimated life table using the log-quadratic model
diff --git a/man/mA_swe.Rd b/man/mA_swe.Rd
index 56b227515..f47c3582a 100644
--- a/man/mA_swe.Rd
+++ b/man/mA_swe.Rd
@@ -8,7 +8,7 @@
A data frame with:
\describe{
\item{Date}{Reference time for the rates estimate.}
-\item{Age}{Inferior age for abridged groups. Carefull: last age 100 is not an OAG}
+\item{Age}{Inferior age for abridged groups. Careful: last age 100 is not an OAG}
\item{Sex}{Male \code{m} and female \code{m}.}
\item{nMx}{Mortality rates.}
}
diff --git a/man/mav.Rd b/man/mav.Rd
index 10f785be1..78106f0c0 100644
--- a/man/mav.Rd
+++ b/man/mav.Rd
@@ -4,7 +4,7 @@
\alias{mav}
\title{Calculate the moving average (mav) over 3 or 5 years.}
\usage{
-mav(Value, Age, n = 3, OAG = TRUE)
+mav(Value, Age, n = 3, OAG = TRUE, tails = FALSE)
}
\arguments{
\item{Value}{numeric. A vector of demographic counts in single age groups.}
@@ -14,6 +14,10 @@ mav(Value, Age, n = 3, OAG = TRUE)
\item{n}{integer. A single number, (often 3 or 5), indicating the number of years taken to smooth the population distribution by single ages.}
\item{OAG}{logical. Whether or not the top age group is open. Default \code{TRUE}.}
+
+\item{tails}{logical. If set to \code{TRUE}, smaller-n moving averages are applied on both tails
+such that all values are non-NA. If \code{FALSE} (default), tails are set to NA
+due to the lag of moving averages.}
}
\value{
Vector with the smoothed demographic counts.
@@ -22,9 +26,9 @@ Vector with the smoothed demographic counts.
This arithmetic smoothing technique aims to eliminate irregularities of the population pyramid by averaging values in a moving window of user-defined width.
}
\details{
-The moving window is applied symmetrically. Data endpoints are imputed with \code{NA}s in output: the is nothing under 0 or over the highest closed age group to average with. The open age group is imputed with \code{NA} prior to calculations, since it cannot be averaged into the next lowest group. For example, for \code{n=3}, age 0 will be \code{NA}, as will the open age group and next lowest age. Age intervals are assumed uniform. This function could be used with either single or 5-year age groups.
+The moving window is applied symmetrically. By default (\code{tails = FALSE}) data endpoints are imputed with \code{NA}s in output: the is nothing under 0 or over the highest closed age group to average with. The open age group is not used in averaging, and it is returned as-is. Age intervals are assumed uniform. This function could be used with either single or 5-year age groups.
-Ages may be single or grouped, but all age intervals are assumed equal.
+If \code{tails} is set to \code{TRUE}, then tails have been imputed using moving averages with successively smaller values of \code{n}, the cascade method.
}
\examples{
Pop <-c(303583,390782,523903,458546,517996,400630,485606,325423,471481,189710,
@@ -37,8 +41,15 @@ Pop <-c(303583,390782,523903,458546,517996,400630,485606,325423,471481,189710,
Age <- 0:70
# final age group assumed open
mav(Pop, n = 3, Age = Age)
+
\dontrun{
-nwindows <- sapply(seq(3, 11, by = 2),mav, Value = Pop, Age = Age)
+ odds <- seq(3, 11, by = 2)
+nwindows <- sapply(odds,
+ mav,
+ Value = Pop,
+ Age = Age,
+ OAG = TRUE,
+ tails = FALSE)
cols <- gray(seq(.8, 0, length = 5))
lwds <- seq(3, 1, length = 5)
plot(Age,Pop, col = "red", xlab = "Age", ylab = "The counts", pch=16,
@@ -50,10 +61,31 @@ legend("topright",
lwd = lwds,
legend = paste0("n=",seq(3,11,by=2)))
}
+
+# For cascading smoothing on the tails:
+mav(Pop, Age, tails = TRUE)
+
+\dontrun{
+# Compare
+nwindows_tails <- sapply(odds,
+ mav,
+ Value = Pop,
+ Age = Age,
+ OAG = TRUE,
+ tails = TRUE)
+
+colnames(nwindows) <- odds
+colnamaes(nwindows_tails) <- odds
+
+# NA triangles are completed with
+# successively smaller ns.
+head(nwindows)
+head(nwindows_tails)
+
+tail(nwindows)
+tail(nwindows_tails)
+}
}
\references{
\insertRef{GDA1981IREDA}{DemoTools}
}
-\author{
-Juan Galeano
-}
diff --git a/man/mig_beta.Rd b/man/mig_beta.Rd
index 31fdf791f..e7b8e7d91 100644
--- a/man/mig_beta.Rd
+++ b/man/mig_beta.Rd
@@ -22,6 +22,11 @@ mig_beta(
sex = "both",
midyear = FALSE,
verbose = TRUE,
+ child_adjust = c("none", "cwr", "constant"),
+ childage_max = NULL,
+ cwr_factor = 0.3,
+ oldage_adjust = c("none", "beers", "mav"),
+ oldage_min = 65,
...
)
}
@@ -58,6 +63,32 @@ mig_beta(
\item{verbose}{logical. Shall we send informative messages to the console?}
+\item{child_adjust}{The method with which to adjust the youngest age groups.
+If \code{"none"}, no adjustment is applied (default). If
+child-woman ratio (\code{"cwr"}) is chosen, the first cohorts reflecting the
+difference between \code{date2 - date1} are adjusted (plus age 0). If
+child constant ratio (\code{"constant"}) is chosen, the first 15 age groups
+are adjusted.}
+
+\item{childage_max}{The maximum age from which to apply \code{child_adjust}.
+By default, set to \code{NULL}, which gets translated into all the cohorts
+between \code{date2} and \code{date1}. If \code{date2} is 2010 and
+\code{date1} is 2002, the first 8 cohorts are adjusted. Otherwise, the user
+can supply an integer.}
+
+\item{cwr_factor}{A numeric between 0 and 1 to which adjust the CWR method
+for the young ages from \code{child_adjust}. \strong{This is only used
+when \code{child_adjust} is \code{"cwr"}}.}
+
+\item{oldage_adjust}{The type of adjustment to apply to ages at and above
+\code{oldage_min}. \code{'beers'} applies a beers graduation method
+while \code{'mav'} applies a moving average with cascading on the tails.
+For more information see \code{?mav} and \code{?graduation_beers}.}
+
+\item{oldage_min}{The minimum age from which to apply \code{oldage_adjust}.
+By default, set to 65, so any adjustment from \code{oldage_adjust} will be
+applied for 65+.}
+
\item{...}{optional arguments passed to \code{lt_single_qx}}
}
\value{
@@ -71,11 +102,13 @@ Census - Estimate by age from projection. It then distributes the NCE over
the cohort parallelogram assuming uniform distribution assuming it is all
migration. It finalizes by summing the estimate by age groups across the entire
intercensal period to have a total migration during the entire period.
+Alternatively, a child adjustment and an old age adjustment can be applied.
}
\examples{
\dontrun{
- mig_beta(
+
+mig_beta(
location = "Russian Federation",
sex = "male",
c1 = pop1m_rus2002,
diff --git a/man/mig_calculate_rc.Rd b/man/mig_calculate_rc.Rd
index 1c60f91da..c56805242 100644
--- a/man/mig_calculate_rc.Rd
+++ b/man/mig_calculate_rc.Rd
@@ -17,7 +17,7 @@ Choose between a 7,9,11 or 13 parameter model.
}
\details{
In the full 13 parameter model, the migration rate at age x, \eqn{m(x)} is defined as
-\deqn{m(x) = a1*exp(-1*alpha1*x) + a2*exp(-1*alpha2*(x - mu2) - exp(-1*lambda2*(x - mu2))) + a3*exp(-1*alpha3*(x - 3) - exp(-1*lambda3*(x - mu3))) + a4*exp(lambda4*x) + c}
+\deqn{m(x) = a1*exp(-1*alpha1*x) + a2*exp(-1*alpha2*(x - mu2) - exp(-1*lambda2*(x - mu2))) + a3*exp(-1*alpha3*(x - mu3) - exp(-1*lambda3*(x - mu3))) + a4*exp(lambda4*x) + c}
The first, second, third and fourth pieces of the equation represent pre-working age, working age, retirement and post-retirement age patterns, respectively.
Models with less parameters gradually remove terms at the older ages. Parameters in each family are:
diff --git a/man/mig_un_fam.Rd b/man/mig_un_fam.Rd
index 72b2665d9..e23dad504 100644
--- a/man/mig_un_fam.Rd
+++ b/man/mig_un_fam.Rd
@@ -7,11 +7,11 @@
mig_un_fam(NM, family, Single = TRUE, OAnew = 100)
}
\arguments{
-\item{NM}{numeric. Total net migration to distribuite between ages and sex.}
+\item{NM}{numeric. Total net migration to distribute between ages and sex.}
\item{family}{character. Could be "Family", "Female Labor", "Male Labor".}
-\item{Single}{logical. Results by simple age. Default \code{FALSE}.
+\item{Single}{logical. Results by simple age. Default \code{TRUE}.
Typically from pre-working age and working age parts of in Roger-Castro formula.}
\item{OAnew}{The age from which to group all ages into an open ended age group.
diff --git a/man/pop1m_ind.Rd b/man/pop1m_ind.Rd
index 86aa9242e..0c57fbae2 100644
--- a/man/pop1m_ind.Rd
+++ b/man/pop1m_ind.Rd
@@ -3,7 +3,7 @@
\docType{data}
\name{pop1m_ind}
\alias{pop1m_ind}
-\title{Indian male population 1991}
+\title{Indian male population 1971}
\format{
A numeric vector of length 101
}
@@ -16,6 +16,6 @@ Part II-C(ii), New Delhi
pop1m_ind
}
\description{
-Indian male population 1991
+Indian male population 1971
}
\keyword{datasets}
diff --git a/man/pop_f_mat_five.Rd b/man/pop_f_mat_five.Rd
index 3780519ee..223efc7cf 100644
--- a/man/pop_f_mat_five.Rd
+++ b/man/pop_f_mat_five.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 21 x 21
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
pop_f_mat_five
diff --git a/man/pop_f_mat_single.Rd b/man/pop_f_mat_single.Rd
index 406f57146..685881d14 100644
--- a/man/pop_f_mat_single.Rd
+++ b/man/pop_f_mat_single.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 101 x 21
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
pop_f_mat_single
diff --git a/man/pop_m_mat_five.Rd b/man/pop_m_mat_five.Rd
index 01b693491..408873743 100644
--- a/man/pop_m_mat_five.Rd
+++ b/man/pop_m_mat_five.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 21 x 21
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
pop_m_mat_five
diff --git a/man/pop_m_mat_single.Rd b/man/pop_m_mat_single.Rd
index f9d0d9e4e..8f5781e5f 100644
--- a/man/pop_m_mat_single.Rd
+++ b/man/pop_m_mat_single.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 101 x 21
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
pop_m_mat_single
diff --git a/man/smooth_age_5.Rd b/man/smooth_age_5.Rd
index 59e401326..a249b1e02 100644
--- a/man/smooth_age_5.Rd
+++ b/man/smooth_age_5.Rd
@@ -13,7 +13,7 @@ smooth_age_5(
ageMin = 10,
ageMax = 65,
n = 3,
- young.tail = c("Original", "Arriaga", "Strong", NA),
+ young.tail = c("Original", "Arriaga", "Strong", "Cascade", NA),
old.tail = young.tail
)
}
@@ -22,7 +22,7 @@ smooth_age_5(
\item{Age}{integer vector of ages corresponding to the lower integer bound of the counts.}
-\item{method}{character string. Options include \code{"Carrier-Farrag"},\code{"Arriaga"},\code{"KKN"},\code{"United Nations"}, \code{"Strong"}, and \code{"Zigzag"}. See details. Default \code{"Carrier-Farrag"}.}
+\item{method}{character string. Options include \code{"Carrier-Farrag"},\code{"Arriaga"},\code{"KKN"},\code{"United Nations"}, \code{"Strong"}, \code{MAV} and \code{"Zigzag"}. See details. Default \code{"Carrier-Farrag"}.}
\item{OAG}{logical. Whether or not the top age group is open. Default \code{TRUE}.}
@@ -41,7 +41,7 @@ numeric vector of smoothed counts in 5-year age groups.
}
\description{
Smooth population counts in 5-year age groups using the Carrier-Farrag,
-Karup-King-Newton, Arriaga, United Nations, Stong, or Zigzag methods. Allows for imputation
+Karup-King-Newton, Arriaga, United Nations, Strong, MAV or Zigzag methods. Allows for imputation
of values in the youngest and oldest age groups for the Carrier-Farrag, Karup-King-Newton,
and United Nations methods.
}
@@ -54,8 +54,8 @@ operate based on 10-year age group totals, excluding the open age group.
The Carrier-Farrag, Karup-King-Newton, and United Nations methods do not produce estimates
for the first and final 10-year age groups. By default, these are imputed with the original 5-year age group totals, but
-you can also specify to impute with \code{NA}, or the results of the Arriaga or
-Strong methods. If the terminal digit of the open age group is 5, then the terminal 10-year
+you can also specify to impute with \code{NA}, or the results of the Arriaga,
+Strong and Cascade methods. If the terminal digit of the open age group is 5, then the terminal 10-year
age group shifts down, so imputations may affect more ages in this case. Imputation can follow
different methods for young and old ages.
diff --git a/man/smooth_age_5_mav.Rd b/man/smooth_age_5_mav.Rd
index 42f1094f1..f5871706b 100644
--- a/man/smooth_age_5_mav.Rd
+++ b/man/smooth_age_5_mav.Rd
@@ -4,7 +4,7 @@
\alias{smooth_age_5_mav}
\title{Smooth in 5-year age groups using a moving average}
\usage{
-smooth_age_5_mav(Value, Age, OAG = TRUE, n = 3)
+smooth_age_5_mav(Value, Age, OAG = TRUE, n = 3, tails = FALSE)
}
\arguments{
\item{Value}{numeric vector of (presumably) counts in 5-year age groups.}
@@ -14,6 +14,9 @@ smooth_age_5_mav(Value, Age, OAG = TRUE, n = 3)
\item{OAG}{logical. Whether or not the top age group is open. Default \code{TRUE}.}
\item{n}{integer. The width of the moving average. Default 3 intervals (x-5 to x+9).}
+
+\item{tails}{logical. If tails is \code{FALSE}, both tails are left untouched.
+Otherwise, the tails are filled out using a cascade method.}
}
\value{
numeric vector of smoothed counts in 5-year age groups.
@@ -24,7 +27,10 @@ Smooth data in 5-year age groups.
\details{
This function calls \code{smooth_age_5_zigzag_inner()}, but prepares data in a way consistent with other methods called by \code{smooth_age_5()}. It is probably preferable to call \code{zigzag()} from the top level, or else call this method from \code{agesmth()} for more control over tail imputations.
-This function calls \code{mav()}, which itself relies on the more general \code{ma()}. We lose the lowest and highest ages with this method, unless \code{n=1}, in which case data is returned in the original 5-year age groups. The total population count is not constrained to sum to the orignal total.
+If tails is set to \code{FALSE}, this function calls \code{mav()}, which itself relies on the more general \code{ma()}. We lose the lowest and highest ages with this method, unless \code{n=1}, in which case data is returned in the original 5-year age groups. The total population count is not constrained to sum to the original total.
+
+If tails is \code{TRUE}, the same results are expected but the tails are
+filled in using a cascading method.
}
\examples{
Age <- c(0,1,seq(5,90,by=5))
diff --git a/man/sr_f_mat_five.Rd b/man/sr_f_mat_five.Rd
index 1fe523133..20fee5116 100644
--- a/man/sr_f_mat_five.Rd
+++ b/man/sr_f_mat_five.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 21 x 20
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
sr_f_mat_five
diff --git a/man/sr_f_mat_single.Rd b/man/sr_f_mat_single.Rd
index 2b11cb431..1f8ca2604 100644
--- a/man/sr_f_mat_single.Rd
+++ b/man/sr_f_mat_single.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 101 x 20
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
sr_f_mat_single
diff --git a/man/sr_m_mat_five.Rd b/man/sr_m_mat_five.Rd
index 1d70cae7f..ce8f57874 100644
--- a/man/sr_m_mat_five.Rd
+++ b/man/sr_m_mat_five.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 21 x 20
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
sr_m_mat_five
diff --git a/man/sr_m_mat_single.Rd b/man/sr_m_mat_single.Rd
index 7ae3b16e7..5eaa95a6d 100644
--- a/man/sr_m_mat_single.Rd
+++ b/man/sr_m_mat_single.Rd
@@ -8,7 +8,7 @@
A matrix of dimensions 101 x 20
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
sr_m_mat_single
diff --git a/man/srb_vec_five.Rd b/man/srb_vec_five.Rd
index fb24db727..127635dca 100644
--- a/man/srb_vec_five.Rd
+++ b/man/srb_vec_five.Rd
@@ -8,7 +8,7 @@
A vector of length 20
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
srb_vec_five
diff --git a/man/srb_vec_single.Rd b/man/srb_vec_single.Rd
index 43d246020..9728816f7 100644
--- a/man/srb_vec_single.Rd
+++ b/man/srb_vec_single.Rd
@@ -8,7 +8,7 @@
A vector of length 20
}
\source{
-Migration residual PAS spreadhseet
+Migration residual PAS spreadsheet
}
\usage{
srb_vec_single
diff --git a/tests/testthat/test-OPAG.R b/tests/testthat/test-OPAG.R
index c41a5ed47..025d771e1 100644
--- a/tests/testthat/test-OPAG.R
+++ b/tests/testthat/test-OPAG.R
@@ -153,21 +153,22 @@ r <-0
age_Lx <- c(0,1,seq(5,100,by=5))
names(Lx) <- age_Lx
-test_that("OPAG_nLx_warp_r works", {
- res_stationary <- OPAG_nLx_warp_r(
- nLx = c(Lx),
- Age = age_Lx,
- r = r,
- continuous = TRUE,
- method = "uniform"
- )
- c_St <- res_stationary/sum(res_stationary)
- c_Lx <- c(Lx/sum(Lx))
-
- res <- (c_St - c_Lx) %>% abs() %>% max()
- expect_true(res < 0.0001)
- }
-)
+# TR: this test on hold: now OPAG_nLx_warp_r uses single age only
+# test_that("OPAG_nLx_warp_r works", {
+# res_stationary <- OPAG_nLx_warp_r(
+# nLx = c(Lx),
+# Age = age_Lx,
+# r = r,
+# continuous = TRUE,
+# method = "uniform"
+# )
+# c_St <- res_stationary/sum(res_stationary)
+# c_Lx <- c(Lx/sum(Lx))
+#
+# res <- (c_St - c_Lx) %>% abs() %>% max()
+# expect_true(res < 0.0001)
+# }
+# )
# Pop_fit was generated this way
@@ -259,15 +260,16 @@ test_that("Age intervals of standard population and population still works even
expect_output(OPAG(Pop,
- Age_Pop = Age_Pop,
- AgeInt_Pop = AgeInt_Pop,
- nLx = nLx,
- Age_nLx = Age_nLx,
- AgeInt_nLx,
- Age_fit = c(60,70),
- AgeInt_fit = c(10,10),
- Redistribute_from = 80,
- OAnew = max(Age_nLx)), regexp = "\nAge_Pop and Age_nLx age intervals are different!")
+ Age_Pop = Age_Pop,
+ #AgeInt_Pop = AgeInt_Pop,
+ nLx = nLx,
+ Age_nLx = Age_nLx,
+ # AgeInt_nLx,
+ Age_fit = c(60,70),
+ AgeInt_fit = c(10,10),
+ Redistribute_from = 80,
+ OAnew = max(Age_nLx),
+ method = "mono"), regexp = "\nAge_Pop and Age_nLx age intervals are different!")
})
@@ -276,10 +278,10 @@ test_that("Check if r returned is between -0.5 and 0.5", {
output <-
OPAG(Pop,
Age_Pop = Age_Pop,
- AgeInt_Pop = AgeInt_Pop,
+ # AgeInt_Pop = AgeInt_Pop,
nLx = nLx,
Age_nLx = Age_nLx,
- AgeInt_nLx,
+ # AgeInt_nLx,
Age_fit = c(60,70),
AgeInt_fit = c(10,10),
Redistribute_from = 80,
diff --git a/tests/testthat/test-graduate.R b/tests/testthat/test-graduate.R
index 99c941a73..8e018edc6 100644
--- a/tests/testthat/test-graduate.R
+++ b/tests/testthat/test-graduate.R
@@ -54,5 +54,36 @@ test_that("beers works", {
)
})
+test_that("age ranges flexible for uniform, mono, and pclm", {
+ Pop5 <- c(88962, 73756, 51990, 55395, 48562)
+ Age5 <- seq(0,20,5)
+ Age5alt <- seq(45,65,5)
+
+ m0 <- graduate_mono( Pop5, Age5 )
+ m45 <- graduate_mono( Pop5, Age5alt )
+ p0 <- graduate_pclm( Pop5, Age5 )
+ p45 <- graduate_pclm( Pop5, Age5alt )
+ u0 <- graduate_uniform( Pop5, Age5 )
+ u45 <- graduate_uniform( Pop5, Age5alt )
+
+ expect_true(all(abs(m0 - m45) < 1e-9))
+ expect_true(all(abs(p0 - p45) < 1e-4))
+ expect_true(all(abs(u0 - u45) < 1e-9))
+
+ ma0 <- names2age(m0)
+ ma45 <- names2age(m45)
+ pa0 <- names2age(p0)
+ pa45 <- names2age(p45)
+ ua0 <- names2age(u0)
+ ua45 <- names2age(u45)
+
+ expect_equal(ma0, 0:20, tolerance = 0)
+ expect_equal(pa0, 0:20, tolerance = 0)
+ expect_equal(ua0, 0:20, tolerance = 0)
+ expect_equal(ma45, 45:65, tolerance = 0)
+ expect_equal(pa45, 45:65, tolerance = 0)
+ expect_equal(ua45, 45:65, tolerance = 0)
+
+})
diff --git a/tests/testthat/test-lt_abridged.R b/tests/testthat/test-lt_abridged.R
index 2b2c9e939..93275576f 100644
--- a/tests/testthat/test-lt_abridged.R
+++ b/tests/testthat/test-lt_abridged.R
@@ -9,9 +9,19 @@ context("test-lt_abridged")
# testing function --------------------------------------------------------
-lt_test_all_positive_plus_qx_lt_1 <- function(LT) {
+lt_consistent <- function(LT) {
# check positive values
+ nAx <- LT$nAx
+ qx <- LT$qx
+ lx <- LT$lx
+ ndx <- LT$ndx
+ Lx <- LT$Lx
+ Sx <- LT$Sx
+ ex <- LT$ex
+ Age <- LT$Age
+ AgeInt <- LT$AgeInt
+ # no negatives allows
expect_equal(
LT %>%
# TR: open age AgeInt is NA, not handled well with logicals
@@ -23,13 +33,12 @@ lt_test_all_positive_plus_qx_lt_1 <- function(LT) {
)
# check qx less than 1
- expect_equal(
- LT %>%
- '['("nqx") %>%
- '>'(1) %>%
- sum(),
- 0
- )
+ expect_equal( sum(qx > 1), 0 )
+
+ # check monotonicity of lx, Lx, Tx, x + ex
+ # check sum(dx) = lx[1]
+
+ # ds <- diff(Age + ex) %>% sign()
}
@@ -80,7 +89,7 @@ test_that("lt_abridged works on PAS example", {
)
# positive, qx =< 1
- PASLT %>% lt_test_all_positive_plus_qx_lt_1()
+ PASLT %>% lt_consistent()
})
@@ -126,8 +135,8 @@ test_that("lt_abridged works on UN 1982 (p. 34) example", {
)
# positive, qx =< 1
- UNLT1 %>% lt_test_all_positive_plus_qx_lt_1()
- UNLT2 %>% lt_test_all_positive_plus_qx_lt_1()
+ UNLT1 %>% lt_consistent()
+ UNLT2 %>% lt_consistent()
})
@@ -225,8 +234,32 @@ test_that("lt_abridged works on Mortpak example (United Nations 1988, p. 82)", {
)
# positive, qx =< 1
- MP_UNLT100 %>% lt_test_all_positive_plus_qx_lt_1()
- MP_UNLT80 %>% lt_test_all_positive_plus_qx_lt_1()
- MP_UNLT60 %>% lt_test_all_positive_plus_qx_lt_1()
+ MP_UNLT100 %>% lt_consistent()
+ MP_UNLT80 %>% lt_consistent()
+ MP_UNLT60 %>% lt_consistent()
})
+
+test_that("lt_abridged does not fail when extrapFit is > 2", {
+ lx <- c(1.0000000, 0.8352893, 0.6128018, 0.5733119, 0.5723181, 0.5568574, 0.5477342, 0.5244361, 0.5120798, 0.4926618, 0.4628227, 0.4390118, 0.4100229, 0.3840418)
+
+ Age <- c(0, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60)
+
+ Sex <- "m"
+
+
+ res <- lt_abridged(
+ lx = lx,
+ Age = Age,
+ radix = 1e+05,
+ axmethod = "un",
+ a0rule = "cd",
+ Sex = Sex,
+ region = "w",
+ mod = TRUE,
+ OAG = TRUE,
+ extrapFit = seq(20, 60, by = 5)
+ )
+
+ expect_s3_class(res, "data.frame")
+})
diff --git a/tests/testthat/test-mig_beta.R b/tests/testthat/test-mig_beta.R
index fd1751373..2a14abd45 100644
--- a/tests/testthat/test-mig_beta.R
+++ b/tests/testthat/test-mig_beta.R
@@ -1,6 +1,6 @@
check_form <- function(x) {
expect_is(x, "numeric")
- expect_true(length(x) == 103)
+ expect_true(length(x) == 102)
expect_true(all(!is.na(x)))
expect_named(x)
}
@@ -87,13 +87,13 @@ test_that("Births are pulled from post-processed WPP2019", {
date2 = "2010-10-25",
age1 = 0:100
))
-
+
expect_true(any(outp == "births not provided. Downloading births for Russian Federation (LocID = 643), gender: `male`, years: 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010" #nolintr
))
})
test_that("mig_beta works well with different time points", {
- # 3) mortality (abridged, 2 and 3 time points) and fertility given:
+ # 3) mortality (abridged, 2 and 3 time points) and fertility given:
mortdate1 <- 2003
mortdate2 <- 2006
mortdate3 <- 2010
@@ -138,7 +138,7 @@ test_that("mig_beta works well with different time points", {
lxMat = lxmat3,
dates_lx = c(mortdate1,mortdate2,mortdate3),
age_lx = age_lx,
- births = c(719511L, 760934L, 772973L, 749554L,
+ births = c(719511L, 760934L, 772973L, 749554L,
760831L, 828772L, 880543L, 905380L, 919639L),
years_births = 2002:2010)
@@ -153,7 +153,7 @@ test_that("mig_beta works well with different time points", {
lxMat = lxmat3,
dates_lx = c(mortdate1,mortdate2,mortdate3),
age_lx = age_lx,
- births = c(719511L, 760934L, 772973L, 749554L,
+ births = c(719511L, 760934L, 772973L, 749554L,
760831L, 828772L, 880543L, 905380L, 919639L,1e6),
years_births = 2002:2011)
@@ -228,7 +228,7 @@ test_that("mig_beta fails when lxmat is not correct", {
lxMat = lxmat[, 1, drop = FALSE],
dates_lx = c(mortdate1,mortdate2,mortdate3),
age_lx = age_lx,
- births = c(719511L, 760934L, 772973L, 749554L,
+ births = c(719511L, 760934L, 772973L, 749554L,
760831L, 828772L, 880543L, 905380L, 919639L),
years_births = 2002:2010),
regexp = "lxMat should have at least two or more dates as columns. lxMat contains only one column" #nolintr
@@ -594,7 +594,7 @@ test_that("mig_beta throws download messages when verbose = TRUE", {
test_that("mig_beta throws download messages when verbose = TRUE and LocID used", {
-
+
# 1) lx is downloaded
outp <- capture_output_lines(
mig_beta(
@@ -611,7 +611,7 @@ test_that("mig_beta throws download messages when verbose = TRUE and LocID used"
verbose = TRUE
))
expect_true(any(outp == "lxMat not provided. Downloading lxMat for Russian Federation (LocID = 643), gender: `both`, for years between 2002.8 and 2010.8"))
-
+
# 2) births are downloaded
outp <- capture_output_lines(
mig_beta(
@@ -627,7 +627,7 @@ test_that("mig_beta throws download messages when verbose = TRUE and LocID used"
verbose = TRUE
))
expect_true(any(outp == "births not provided. Downloading births for Russian Federation (LocID = 643), gender: `both`, years: 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010"))
-
+
# 3) dates_lx or years_births are being assumed anything
expect_output(
mig_beta(
@@ -648,3 +648,184 @@ test_that("mig_beta throws download messages when verbose = TRUE and LocID used"
fixed = TRUE
)
})
+
+
+test_that("mig_beta applies child_adjustment correctly", {
+ res_none <-
+ mig_beta(
+ location = "Russian Federation",
+ sex = "male",
+ c1 = pop1m_rus2002,
+ c2 = pop1m_rus2010,
+ date1 = "2002-10-16",
+ date2 = "2010-10-25",
+ age1 = 0:100,
+ births = births,
+ child_adjust = "none"
+ )
+
+
+ res_cwr <-
+ mig_beta(
+ location = "Russian Federation",
+ sex = "male",
+ c1 = pop1m_rus2002,
+ c2 = pop1m_rus2010,
+ date1 = "2002-10-16",
+ date2 = "2010-10-25",
+ age1 = 0:100,
+ births = births,
+ child_adjust = "cwr"
+ )
+
+ res_cwr_high <-
+ mig_beta(
+ location = "Russian Federation",
+ sex = "male",
+ c1 = pop1m_rus2002,
+ c2 = pop1m_rus2010,
+ date1 = "2002-10-16",
+ date2 = "2010-10-25",
+ age1 = 0:100,
+ births = births,
+ child_adjust = "cwr",
+ cwr_factor = 0.9
+ )
+
+ res_constant <-
+ mig_beta(
+ location = "Russian Federation",
+ sex = "male",
+ c1 = pop1m_rus2002,
+ c2 = pop1m_rus2010,
+ date1 = "2002-10-16",
+ date2 = "2010-10-25",
+ age1 = 0:100,
+ births = births,
+ child_adjust = "constant"
+ )
+
+ # Check we don't mess up the format of migs (length, type, etc..)
+ for (i in list(res_none, res_cwr, res_constant)) check_form(i)
+
+ # Since cwr and constant can change, we only test that they adjust a certain
+ # number of ages rather than test the exact equality of results.
+
+ # CWR:
+ # Why 9? Because date1 and date2 differ by 9 years, so only the first 9
+ # cohorts are adjusted.
+ # Test that the first 9 are adjusted:
+ expect_true(all((res_none - res_cwr)[1:9] != 0))
+
+
+ # Constant:
+ # Testhat that the first 9 are adjusted.
+ expect_true(all((res_none - res_constant)[1:9] != 0))
+
+
+ # Test that CWR with high cwr_factor returns higher younger ages than with 0.3,
+ # the default:
+ expect_true(all(res_cwr_high[1:9] > res_cwr[1:9]))
+
+})
+
+
+
+test_that("mig_beta applies oldage_adjustment correctly", {
+ res_none <-
+ mig_beta(
+ location = "Russian Federation",
+ sex = "male",
+ c1 = pop1m_rus2002,
+ c2 = pop1m_rus2010,
+ date1 = "2002-10-16",
+ date2 = "2010-10-25",
+ age1 = 0:100,
+ births = births,
+ oldage_adjust = "none"
+ )
+
+ res_beers <-
+ mig_beta(
+ location = "Russian Federation",
+ sex = "male",
+ c1 = pop1m_rus2002,
+ c2 = pop1m_rus2010,
+ date1 = "2002-10-16",
+ date2 = "2010-10-25",
+ age1 = 0:100,
+ births = births,
+ oldage_adjust = "beers"
+ )
+
+
+ res_mav <-
+ mig_beta(
+ location = "Russian Federation",
+ sex = "male",
+ c1 = pop1m_rus2002,
+ c2 = pop1m_rus2010,
+ date1 = "2002-10-16",
+ date2 = "2010-10-25",
+ age1 = 0:100,
+ births = births,
+ oldage_adjust = "mav"
+ )
+
+ # Check we don't mess up the format of migs (length, type, etc..)
+ for (i in list(res_none, res_beers, res_mav)) check_form(i)
+
+ # beers:
+ # expect that the 65+ are different because they're adjusted
+ # Why 66? Because first age is 0 and total length is 101
+ expect_true(all((res_none - res_beers)[66:100] != 0))
+
+
+ # mav:
+ # expect that the 65+ are different because they're adjusted
+ # Why 66? Because first age is 0 and total length is 101
+ expect_true(all((res_none - res_mav)[66:100] != 0))
+
+
+ # Test that oldage_min controls the age from which to adjust
+ # old ages
+ res_beers <-
+ mig_beta(
+ location = "Russian Federation",
+ sex = "male",
+ c1 = pop1m_rus2002,
+ c2 = pop1m_rus2010,
+ date1 = "2002-10-16",
+ date2 = "2010-10-25",
+ age1 = 0:100,
+ births = births,
+ oldage_adjust = "beers",
+ oldage_min = 70
+ )
+
+
+ res_mav <-
+ mig_beta(
+ location = "Russian Federation",
+ sex = "male",
+ c1 = pop1m_rus2002,
+ c2 = pop1m_rus2010,
+ date1 = "2002-10-16",
+ date2 = "2010-10-25",
+ age1 = 0:100,
+ births = births,
+ oldage_adjust = "mav",
+ oldage_min = 70
+ )
+
+ # beers:
+ # expect that the 65+ are different because they're adjusted
+ # Why 71? Because first age is 0 and total length is 101
+ expect_true(all((res_none - res_beers)[71:100] != 0))
+
+
+ # mav:
+ # expect that the 65+ are different because they're adjusted
+ # Why 71? Because first age is 0 and total length is 101
+ expect_true(all((res_none - res_mav)[71:100] != 0))
+})
diff --git a/tests/testthat/test-smooth_age_5.R b/tests/testthat/test-smooth_age_5.R
index 3984ae87b..4a72c39c4 100644
--- a/tests/testthat/test-smooth_age_5.R
+++ b/tests/testthat/test-smooth_age_5.R
@@ -5,108 +5,151 @@ context("test-agesmth")
# smooth_age_5_cf -----------------------------------------------------
-test_that("smooth_age_5_cf works",{
-
- Ages <- seq(0, 80, by = 5)
-
- CFmales <- smooth_age_5_cf(pop5m_pasex, Ages, OAG = TRUE)
-
- CFtest <- c(NA, NA, 346290, 287083, 285855, 261082, 237937,
- 202809, 162973, 125720, 88730, 67352, 55187, 40657, NA, NA, NA)
-
- # test
- expect_equivalent(CFmales, CFtest, tolerance = 1)
- expect_equal(CFmales %>% sum, CFtest %>% sum, tolerance = 1)
- expect_true(all(CFmales > 0, na.rm = T))
+test_that("smooth_age_5_cf works", {
+ Ages <- seq(0, 80, by = 5)
+ CFmales <- smooth_age_5_cf(pop5m_pasex, Ages, OAG = TRUE)
+
+ CFtest <- c(
+ NA, NA, 346290, 287083, 285855, 261082, 237937,
+ 202809, 162973, 125720, 88730, 67352, 55187, 40657, NA, NA, NA
+ )
+
+ # test
+ expect_equivalent(CFmales, CFtest, tolerance = 1)
+ expect_equal(CFmales %>% sum(), CFtest %>% sum(), tolerance = 1)
+ expect_true(all(CFmales > 0, na.rm = T))
})
# smooth_age_5_kkn ----------------------------------------------------------------
-test_that("smooth_age_5_kkn works",{
-
- Ages <- seq(0, 80, by = 5)
-
- KKNtest <- c(NA, NA, 354871, 278502, 285508, 261429, 236513 ,
- 204233, 162138, 126555, 90094, 65988, 54803, 41041, NA, NA, NA)
-
- KKNmales <- smooth_age_5_kkn(pop5m_pasex, Ages, OAG = TRUE)
-
- # test
- expect_equivalent(KKNmales, KKNtest, tolerance = 1)
- expect_equal(KKNmales %>% sum, KKNtest %>% sum, tolerance = 1)
- expect_true(all(KKNmales > 0, na.rm = T))
-
+test_that("smooth_age_5_kkn works", {
+ Ages <- seq(0, 80, by = 5)
+
+ KKNtest <- c(
+ NA, NA, 354871, 278502, 285508, 261429, 236513,
+ 204233, 162138, 126555, 90094, 65988, 54803, 41041, NA, NA, NA
+ )
+
+ KKNmales <- smooth_age_5_kkn(pop5m_pasex, Ages, OAG = TRUE)
+
+ # test
+ expect_equivalent(KKNmales, KKNtest, tolerance = 1)
+ expect_equal(KKNmales %>% sum(), KKNtest %>% sum(), tolerance = 1)
+ expect_true(all(KKNmales > 0, na.rm = T))
})
# smooth_age_5_arriaga ----------------------------------------------------------------
-test_that("smooth_age_5_arriaga works",{
-
- Ages <- seq(0, 80, by = 5)
-
- Amales <- smooth_age_5_arriaga(Value = pop5m_pasex, Age = Ages, OAG = TRUE)
-
- #' # PAS spreadsheet result:
- Atest <- c(662761, 495126, 345744, 287629, 285919, 261018, 237469, 203277,
- 161733, 126960, 88586, 67496, 54587, 41257, 28790, 17189, 34729)
-
- # test
- expect_equivalent(Amales, Atest, tolerance = 1)
- expect_equal(Amales %>% sum, Atest %>% sum, tolerance = 1)
- expect_true(all(Amales > 0, na.rm = T))
-
+test_that("smooth_age_5_arriaga works", {
+ Ages <- seq(0, 80, by = 5)
+
+ Amales <- smooth_age_5_arriaga(Value = pop5m_pasex, Age = Ages, OAG = TRUE)
+
+ #' # PAS spreadsheet result:
+ Atest <- c(
+ 662761, 495126, 345744, 287629, 285919, 261018, 237469, 203277,
+ 161733, 126960, 88586, 67496, 54587, 41257, 28790, 17189, 34729
+ )
+
+ # test
+ expect_equivalent(Amales, Atest, tolerance = 1)
+ expect_equal(Amales %>% sum(), Atest %>% sum(), tolerance = 1)
+ expect_true(all(Amales > 0, na.rm = T))
})
# smooth_age_5_strong ----------------------------------------------------------------
-test_that("smooth_age_5_strong works",{
-
- Ages <- seq(0, 80, by = 5)
-
- Stest <- c(646617, 511270, 386889, 317345, 273736, 240058, 218645, 188297,
- 153931, 124347, 93254, 71858, 53594, 39721, 27887, 18092, 34729)
-
- Smales <- smooth_age_5_strong(pop5m_pasex, Ages, OAG = TRUE)
-
- # test
- expect_equivalent(Smales, Stest, tolerance = 1)
- expect_equal(Smales %>% sum, Stest %>% sum, tolerance = 1)
- expect_true(all(Smales > 0, na.rm = T))
-
+test_that("smooth_age_5_strong works", {
+ Ages <- seq(0, 80, by = 5)
+
+ Stest <- c(
+ 646617, 511270, 386889, 317345, 273736, 240058, 218645, 188297,
+ 153931, 124347, 93254, 71858, 53594, 39721, 27887, 18092, 34729
+ )
+
+ Smales <- smooth_age_5_strong(pop5m_pasex, Ages, OAG = TRUE)
+
+ # test
+ expect_equivalent(Smales, Stest, tolerance = 1)
+ expect_equal(Smales %>% sum(), Stest %>% sum(), tolerance = 1)
+ expect_true(all(Smales > 0, na.rm = T))
})
+test_that("smooth_age_5_mav works", {
+
+ # Without tails
+ Ages <- seq(0, 80, by = 5)
+
+ Mtest <- c(
+ NA, 505239, 382964, 300570, 274160, 263151, 239782, 202228,
+ 162308, 128489, 92946, 73183, 51717, 41880, 26119, NA, 34729
+ )
+
+ Mmales <- smooth_age_5_mav(pop5m_pasex, Ages, OAG = TRUE, tail = FALSE)
+
+ # test
+ expect_equivalent(Mmales, Mtest, tolerance = 1)
+ expect_equal(Mmales %>% sum(), Mtest %>% sum(), tolerance = 1)
+ expect_true(all(Mmales > 0, na.rm = T))
+
+ # With tails
+ Mtest_tail <- c(
+ 642367, 507810, 382964, 300570, 274160, 263151, 239782, 202228,
+ 162308, 128489, 92946, 73183, 51717, 41880, 27038, 29796, 34729
+ )
+
+ Mmales_tail <- smooth_age_5_mav(pop5m_pasex, Ages, OAG = TRUE, tail = TRUE)
+
+ # test
+ expect_equivalent(Mmales_tail, Mtest_tail, tolerance = 1)
+ expect_equal(Mmales_tail %>% sum(), Mtest_tail %>% sum(), tolerance = 1)
+ expect_true(all(Mmales_tail > 0, na.rm = T))
+
+ # Check tail/no_tail are the same except
+ # for the tails:
+ expect_equivalent(Mmales[3:15], Mmales_tail[3:15], tolerance = 1)
+ # Exclue first/last pair of values because cascading alters both
+ # numbers.
+
+})
+
+
test_that("smooth_age_5_feeney works", {
-
- Pop <- c(2337, 3873, 3882, 3952, 4056, 3685, 3687, 3683, 3611, 3175,
- 3457, 2379, 3023, 2375, 2316, 2586, 2014, 2123, 2584, 1475,
- 3006, 1299, 1236, 1052, 992, 3550, 1334, 1314, 1337, 942,
- 3951, 1128, 1108, 727, 610, 3919, 1221, 868, 979, 637,
- 3409, 887, 687, 533, 313, 2488, 677, 426, 524, 333,
- 2259, 551, 363, 290, 226, 1153, 379, 217, 223, 152,
- 1500, 319, 175, 143, 89, 670, 149, 96, 97, 69,
- 696, 170, 60, 38, 23, 745)
-
- Ages <- c(0:75)
-
- result <- smooth_age_5_feeney(Pop, Ages, maxit = 200, OAG = TRUE)
- # inlcude original unstated age (15)
- result <- rescale_vector(result,sum(result)+15)
- tab2_answer <- c(18004, 17351, 14018, 10927, 8837, 8145, 7823, 7029,
- 5748, 4326, 3289, 2415, 1794, 1197, 982, 741)
- names(tab2_answer) <- seq(0,75,5)
-
-
- testthat:::expect_equal(
- result,
- tab2_answer,
- tolerance = .001) # TR: this on relative scale??
+ Pop <- c(
+ 2337, 3873, 3882, 3952, 4056, 3685, 3687, 3683, 3611, 3175,
+ 3457, 2379, 3023, 2375, 2316, 2586, 2014, 2123, 2584, 1475,
+ 3006, 1299, 1236, 1052, 992, 3550, 1334, 1314, 1337, 942,
+ 3951, 1128, 1108, 727, 610, 3919, 1221, 868, 979, 637,
+ 3409, 887, 687, 533, 313, 2488, 677, 426, 524, 333,
+ 2259, 551, 363, 290, 226, 1153, 379, 217, 223, 152,
+ 1500, 319, 175, 143, 89, 670, 149, 96, 97, 69,
+ 696, 170, 60, 38, 23, 745
+ )
+
+ Ages <- c(0:75)
+
+ result <- smooth_age_5_feeney(Pop, Ages, maxit = 200, OAG = TRUE)
+ # inlcude original unstated age (15)
+ result <- rescale_vector(result, sum(result) + 15)
+ tab2_answer <- c(
+ 18004, 17351, 14018, 10927, 8837, 8145, 7823, 7029,
+ 5748, 4326, 3289, 2415, 1794, 1197, 982, 741
+ )
+ names(tab2_answer) <- seq(0, 75, 5)
+
+
+ testthat:::expect_equal(
+ result,
+ tab2_answer,
+ tolerance = .001
+ ) # TR: this on relative scale??
})
#' Age <- c(0,1,seq(5,90,by=5))
#' # defaults
-#' zz <- zigzag_smth(dth5_zigzag, Age, OAG = TRUE, ageMin = 40, ageMax = 90)
\ No newline at end of file
+#' zz <- zigzag_smth(dth5_zigzag, Age, OAG = TRUE, ageMin = 40, ageMax = 90)
diff --git a/tests/testthat/test_AGEINT.R b/tests/testthat/test_AGEINT.R
index a3b50e0af..9c2fbb0e2 100644
--- a/tests/testthat/test_AGEINT.R
+++ b/tests/testthat/test_AGEINT.R
@@ -62,5 +62,5 @@ test_that("basic interpolation function",{
datesIn = dates,
datesOut = dec.date(1900.5),
extrap = T),
- regexp = "Negative values were turned 0. No accepted in population counts, fertility rates or life table functions.")
+ regexp = "Negative values have been replaced with 0s")
})
diff --git a/version_lookup.R b/version_lookup.R
new file mode 100644
index 000000000..95e84959b
--- /dev/null
+++ b/version_lookup.R
@@ -0,0 +1,138 @@
+# remotes::install_github("lorenzwalthert/gitsum")
+if (system("whoami",intern=TRUE) == "tim"){
+ library(gitsum)
+ library(tidyverse)
+ library(readr)
+ library(lubridate)
+
+ get_versions <- function(hash){
+ D <- readLines(paste0("https://raw.githubusercontent.com/timriffe/DemoTools/",hash,"/DESCRIPTION") )
+ D[grepl(D,pattern = "Version: ")] %>%
+ gsub(pattern = "Version: ", replacement = "")
+ }
+
+ update_lookup <- function(){
+ DESC_changes <-
+ parse_log_detailed() %>%
+ unnest_log() %>%
+ dplyr::filter(changed_file == "DESCRIPTION") %>%
+ group_by(lubridate::as_date(date)) %>%
+ slice(n()) %>%
+ ungroup() %>%
+ mutate(date = as_date(date))
+ #date = paste(year(date),month(date),day(date),sep="-"))
+ # DESC_changes %>%
+ # mutate(get_versions(hash))
+ vers <- list()
+ for (i in 1:nrow(DESC_changes)){
+ # Sys.sleep(1)
+ vers[[i]] <- try(get_versions(DESC_changes$hash[i]))
+ }
+ closeAllConnections()
+ errors <- lapply(vers,class) %>% unlist()
+ errors <- errors == "try-error"
+ vers[errors]<-NA
+ DESC_changes <- cbind(DESC_changes,version=tibble(version=unlist(vers)))
+
+ version_lookup <-
+ DESC_changes %>%
+ filter(!is.na(version)) %>%
+ select(date,version, hash)
+
+ write_csv(version_lookup,"version_lookup.csv")
+
+ }
+}
+
+get_DemoTools_versions <- function(){
+ readr::read_csv("https://raw.githubusercontent.com/timriffe/DemoTools/master/version_lookup.csv")
+}
+
+install_DemoTools_version <- function(version = NULL, date = NULL, hash = NULL){
+
+
+
+ if (is.null(version) & is.null(date) & is.null(hash)){
+ cat("version identifiers include (one of):
+ 1. Version entries in the DESCRIPTION file
+ 2. Date (yyyy-mm-dd)
+ 3. the hash key of the commit\n\n")
+ cat("no identifier given, you can view options with
+ get_DemoTools_version()\n\n")
+ cat("to install the current head, try:
+ remotes::install_github('timriffe/DemoTools'")
+ }
+
+ versions <- get_DemoTools_versions() %>%
+ mutate(date = lubridate::as_date(date))
+
+ out <- NULL
+ if (!is.null(hash)){
+ if (hash %in% version$hash){
+ remotes::install_github("timriffe/DemoTools", ref = hash)
+ } else {
+ stop("hash not found, try using one from the (incomplete) list returned by:\n get_DemoTools_versions()")
+ }
+ out <- 1
+ }
+
+ # try date
+ if (is.null(out)){
+ if (!is.null(date)){
+ if (is.character(date)){
+ date = lubridate::ymd(date)
+ }
+ if (is.na(date)){
+ cat("date didn't parse. It should be yyyy-mm-dd format if given as character\n")
+ stop()
+ }
+ dateK <- date
+ dateL <-
+ versions %>%
+ mutate(dist = abs(dateK - date)) %>%
+ dplyr::filter(dist == min(dist)) %>%
+ dplyr::pull(date) %>%
+ '['(1)
+ hash <-
+ versions %>%
+ dplyr::filter(date == dateL) %>%
+ dplyr::pull(hash)
+ remotes::install_github("timriffe/DemoTools", ref = hash)
+ out <- 1
+ if (date != dateL){
+ cat("date not in the (incomplete) set returned by get_DemoTools_versions()
+ using the closest date in that subset instead:", as.character(dateL),"\n")
+ }
+ }
+ }
+
+ # try version
+ if (is.null(out)){
+ if (!is.null(version)){
+ .version <- version
+ hash <-
+ versions %>%
+ dplyr::filter(version == .version) %>%
+ dplyr::pull(hash)
+ if (length(hash) == 1){
+ remotes::install_github("timriffe/DemoTools", ref = hash)
+ out <- 1
+ } else {
+ cat("version not in the set returned by get_DemoTools_versions()
+ we didn't try approximating. Have a look at the version snapshots available
+ in the lookup table.\n")
+ }
+ }
+ }
+ # catch-all
+ if (is.null(out)){
+ cat("Looks like no installation attempted. For the most recent version try:
+ remotes::install_github('timriffe/DemoTools')
+ otherwise easiest thing to roll back is to pick something
+ out from the lookup table.")
+ }
+
+
+}
+
+
diff --git a/version_lookup.csv b/version_lookup.csv
new file mode 100644
index 000000000..c66c23c36
--- /dev/null
+++ b/version_lookup.csv
@@ -0,0 +1,188 @@
+date,version,hash
+2017-07-28,0.4,5d89f22aa51b9defdc52825c079a1f6127b367c9
+2017-07-30,0.4,14d1aa9c3146c9bb8b181216695835a0ae673448
+2017-08-01,0.43,b685a36e71fb7c914f162504585a432071e66d5c
+2017-08-04,0.46,293e68ea2b279cd12352dbef6073449ab2600544
+2017-08-07,0.48,a92d32060afc56a9b7e10aa4fe433813f6de6026
+2017-08-09,0.481,f43763e8ec50b1576d7d94ab73df1ee83ece75a8
+2017-08-11,0.482,8117248adb4d021fea098ecd7db19ba42d7a30b0
+2017-08-16,0.483,81542b832fb2b2c1177cca928916b43b6cb3e7a5
+2017-08-18,0.5,d46f4f388f2efaa55ab918113165dac0b8cc589c
+2017-09-12,0.5,b63e7b3b425fd34585906416c6be42f50e89eeed
+2017-10-22,0.5.6,99ead2d0b5223f3a5754a7053af24ea0b23b131c
+2017-10-30,0.5.7,1390729b2be7dc555253bf360bcb524af11afd05
+2017-11-02,0.5.8,8b11bd7828f750acd1fe0276c01cec3d76f17f70
+2018-02-28,0.5.9,d9dffb4e9678bf721f847c318bb689daaf8555cb
+2018-03-09,0.5.91,89d75229b7f261f11a18c20b6dc32926ee9cfaf4
+2018-07-18,0.5.92,8932752f59a997d2cb7d5b84b5fc9c01ae55aadf
+2018-07-19,0.5.922,322a3046ae45f0c5b8ce108d385d11b1e70ed4c7
+2018-07-20,0.5.942,760e4992a94b7ddf97201600f94c886a95e3996a
+2018-07-22,0.5.944,7d573defa0912b84349239850d00a69925f0ee44
+2018-07-23,0.5.945,527da8184888cbbfd87a3d24882d43038a875f28
+2018-07-26,0.5.95,79f6417aa7713397c3c2ada895f704cec5eb9fd4
+2018-07-27,0.5.963,d8728f4e409631d1db5e9726886999d2029a1c97
+2018-07-30,0.6.1,bb1b3feb6dadfd8b1942cdb9589179bf52133d39
+2018-07-31,0.6.102,6f2dcaad5f7ef03216b7dc3c38dca8ebe6ad7677
+2018-08-01,0.6.103,f408ac53dbd0304c5a66ca6d4bc4665573871af8
+2018-08-07,0.6.104,4020b0fe366a4572822dce4ca94994121a8135de
+2018-08-10,0.6.112,39ed9760c518aa90be93d58e132a726b4d11ea49
+2018-08-13,0.6.113,794829c4e90c9a9c7a3739fdb4cbeec3994b0788
+2018-08-15,0.6.115,717864ef055bbd6031496327ae0cc8fb48968f84
+2018-08-16,0.07.01,c10d3468cd81562f9b5ea4cfab441f0f29029cfa
+2018-08-17,0.07.02,257cf5d854c7618196aaa1b021895a6abe1c52cf
+2018-08-20,0.08.000,c317b44caf2ffd7c06c8e46ced435490fc64f18d
+2018-08-22,0.09.000,327563e8448bfec62d9e0080c7d4cc48600b5b5f
+2018-09-03,0.10.000,1ed6bbdd115f01480d7ab8740949c278b6623a2a
+2018-09-15,0.10.01,524dd9a6d4a20c73dd7b38cd990dd682a3e58138
+2018-10-04,0.10.03,e84732eece4acf340efac3458d4d74d8ce9ea347
+2018-10-08,0.11.000,6e8b9b5d267d0f79eab4d70ef8973cbaef5e9284
+2018-10-12,0.11.01,2b3559c98db007021e4a0873207446e453655135
+2018-10-13,0.12.000,0e78f4fa94067738993e2b23cd829312477d207c
+2018-11-08,0.12.01,ee1b32830d482aff2c5838f9670e4bdb698b2f8c
+2018-11-09,0.12.10,9660bef6b014af306da3f33b2eb002fe2ac2f920
+2018-11-27,0.12.12,56ea70f2a03bbb0a35540552cd60f08c72906a23
+2018-11-28,0.12.17,dedc05b22d76b5a12eb69127e2820af8f744cb2a
+2018-12-01,0.12.20,b80c8a629969a17519af7aaf620c16cfdd0bf6d0
+2018-12-02,0.12.23,131677090a2e0bff35fd604777a3997ca5126fde
+2018-12-03,0.12.24,bee022667e52e5cad2e988402106362129406b14
+2018-12-04,0.12.26,d01b05c319333992635fcf035aad62875acf1b55
+2018-12-11,0.12.27,913267e6b6c75e27778015c531fb0f7867be3d2d
+2018-12-12,0.12.29,49aa3e9c5700c7c5089dc664ceb5819995abd666
+2018-12-15,0.12.30,963e3f2006f828cca650375f7488e7503774992f
+2018-12-16,0.12.32,8793baac419394156bb5f3c8e80270eedfa125d9
+2018-12-19,0.12.33,271ff75c4018e1de29d58a222a114a7515a437f3
+2018-12-26,0.14.03,7442365b92538466bccef41dc4fb327704fd62fb
+2018-12-27,0.15.05,dcc21d3631838c14bc2233c8c88b3881db0e62f3
+2018-12-28,0.15.15,89d0d9be2fe2d5a3c504daab7627e2f2a2c83696
+2018-12-29,0.15.17,2969a8f4c5d59a4ff33cbe521c06a156a2f788d3
+2019-02-01,0.15.18,6b1325890ad79be7a9ff55b8afc4b28dfe3c6a0d
+2019-02-04,0.15.19,670608b85553b3ba5e1aa79229f52d0bf36a8bf0
+2019-04-24,01.00.05,ce46f5548d71dc2e4caa96ca76f2bd6f9a2b9fd8
+2019-08-14,01.00.05,7ed26d1e3ec3e2ed8741feb678cf2af329f06c9e
+2019-08-26,01.00.10,6eb8dde27879db8d5c9d34cae97abe721d189eda
+2019-08-28,01.00.11,58bb136b5608bab82e227682fcabcb93a7afdfed
+2019-08-29,01.00.13,0fea174320daa15b78b6fb7aabdbc4ff3c519f7d
+2019-10-03,01.00.18,5a9eaf6c1d7956d2f0c2a77e9f4a18fbb8b2c5bb
+2019-10-07,01.01.04,02c6a978ba3eccfcb0d7e86bab1b0dfa5643d02b
+2019-10-18,01.01.06,7e7da564504406520912b71e32a6c67bae54c82f
+2019-10-24,01.01.08,e4a801ad302249c9d99b2e87102c159a640cbb8b
+2019-10-25,01.02.000,f5f6b87f4f15ac08505d74933fafec3e022a6268
+2019-10-27,01.02.03,629ca5161b572704adcf16a989af631ffce26817
+2019-10-28,01.02.04,84b708ad843d568d6e4ac7b7d6729bde4b09dcc2
+2019-11-25,01.02.05,9f13254e1502b24648f02ba25dd3f5e0ed9408cb
+2019-11-27,01.02.05,abc6a98bdc8828f95e5f2979e91493cbffa89572
+2019-11-28,01.03.02,69e1afbad301a15a32777f82780efde6f406b992
+2019-12-02,01.03.03,8f89431ab6154b484a7e3664ed0be19433f82500
+2019-12-03,01.03.09,25b4661628397647937c8aa7991e8ae8390d7039
+2019-12-04,01.03.31,e4633ea699cb7da18dd935375bded84ed9e857df
+2019-12-05,01.03.31,496e19dee824668bfb6cc884ec67526e5950df00
+2019-12-06,01.04.01,f3f1c420437f090ed2a78217272c1c7246d30274
+2019-12-09,01.04.04,d74853d82b802608ae07f1773c04fd926e2e6cbf
+2019-12-12,01.04.05,a8fe4deb19502d31a5bdbd8f478d92ca672a0be8
+2019-12-15,01.04.05,11f5d5d9a257f218778f62832e6be4046ff90e24
+2019-12-18,01.04.05,2a01570fe3ee530e6c17cb3bb612badb495a29a6
+2019-12-19,01.05.000,0ef964956049844febb8edd0877bc1ab5121000e
+2019-12-20,01.05.01,6dc5dce6998fef4c1006b41a8d89151ffcb86ca2
+2019-12-29,01.05.01,f78659ed2a2828bb80dfda1c8a947f00a1f31313
+2019-12-30,01.05.04,24ec5b6348c20726631718b104cf792d0c23caab
+2019-12-31,01.05.07,a91c4ae3840abb591f88937938c1375fcf3765c3
+2020-01-01,01.05.12,d800a87b3f8af2c8fb740590cc95616f425948f3
+2020-01-02,01.05.18,7bd9921bf9d5f603abdec429fa18943014066ff7
+2020-01-05,01.05.20,27ad22a9284c2f484b5cc786b7ceeb24ded39a97
+2020-01-06,01.05.21,d70d29ad488c0e9988397bdecc2f885890315907
+2020-01-07,01.05.23,83f7fa6c767b2616fad3fdc2fde4b7563e94b034
+2020-01-20,01.05.24,f29d5d7b6cfd244d914abd032021a7552f5d699b
+2020-03-07,01.05.25,4c729ffdc77f5dcfc4f51bf6210d54906a9b5a4a
+2020-04-12,01.05.26,ec765efce7ab46602498297ee20863e3e605f65d
+2020-05-01,01.05.26,1f7b673810becd1eec77de4aba0679664df3e6df
+2020-05-04,01.06.01,7c1abfdba8ec9d5f425154f866b8cc4f79a11084
+2020-06-05,01.06.02,49e35004d0e23486d699f7fe9bb4a2acad8ca02c
+2020-07-26,01.07.000,ef91a86c3695d3ff0dbfd83b57193c44c8d962a1
+2020-08-10,01.07.000,6115ce4cdebe31a847de37fc7743ca7cd1a434a9
+2020-08-17,01.08.000,91ed9a82ee48156328f02a3a55867706f3d6c518
+2020-08-18,01.08.000,11ef59ee8b606758ac08482a8ef27b0a968a85c1
+2020-08-30,01.08.02,97793aee1a5bccf62a6ac8637e45c5323b024ce0
+2020-08-31,01.09.000,ee37d54e8114432f618b385444faa645b1f19a28
+2020-09-04,01.09.03,ee2ae544fad2cb9de1d7e1045670ec5bb795eee7
+2020-09-15,01.09.03,cd3b257862c891015713ccbe46700167d74c97ac
+2020-09-28,01.09.05,79e608d477faa6a4075e5803a2082cb2350ab5be
+2020-10-07,01.09.07,cf25113103d0c335640833dc7ba86ed26962babd
+2020-10-08,01.09.09,7fd6be143b694ebe856767f12ce86a9e23d1588a
+2020-10-12,01.09.16,c5d7317a248d04f2aff8bf047770ee123d2a4a1a
+2020-10-13,01.09.17,f97d2916fa3eb693820bfad15b19ec17236db4ee
+2020-10-14,01.09.23,654dc30a42aa436cab6a30df71622a4f9c571e33
+2020-10-21,01.09.23,9ae35cbc06a5205ae69e18399ea39df1aee5cde1
+2020-10-22,01.09.24,e1e5aff505667ab7f7131b30876cdc508c657104
+2020-10-27,01.09.26,1a4c87b38d7c463a3959a0c074dd271eaecf89fb
+2020-10-31,01.09.27,1ea5f1ea57cf4a08387484a780190642860a640a
+2020-11-02,01.09.28,5fd5de5d38e6eb3ccc952d0c92238d603d313480
+2020-11-04,01.09.30,bc12e404edd962d10f75c822acfe352fac2bdf42
+2020-12-22,01.10.01,cde0b39a933ee8fdae560899737aa8f1738f8671
+2020-12-28,01.10.03,44ce3505fa8062d8d6b2682dae54b32e55c9cb48
+2021-01-02,01.10.04,bd983a703e27e22e5c374dcea58630655c21d9aa
+2021-01-03,01.11.000,2cc7b9e58d61026c3f96842e3711354f178d9780
+2021-01-04,01.11.01,8774a55570e7094cc0d946d37acb19b96c193fc2
+2021-01-06,01.11.02,7e1ef09d68106f9302d9d34e3653ce9235502f33
+2021-01-12,01.11.03,efa73e4ed3ddc30d5207a4054ffa12bd567abcce
+2021-01-14,01.11.06,d2bdef1f7054f8175daa82426bf833df0316f635
+2021-01-15,01.12.000,9f7bf4d35e136ecb5fb7c914122acf62f8f8d6a1
+2021-01-16,01.12.02,8e122c1694d5df362cff31a48f8ef6091dff67e6
+2021-01-17,01.12.03,4b3f04bcf7915e976558b7ce15f1df2449cbbc4b
+2021-01-18,01.12.04,61323dfb75a2fb103736aa5c4b42cae2ce2695ca
+2021-01-20,01.12.05,1fdaf568f82c9c889bffb90c36191214600f2c3c
+2021-01-21,01.12.06,2a4b1dda77cbc31a71f95729a50fd1e80b4e6ae2
+2021-01-23,01.12.07,ba910ddc536f18d4686b5eb5f539aab3431ce771
+2021-01-24,01.12.08,cee9abe4c83cbee3e74022b2668664aa412db318
+2021-01-25,01.12.09,bb563c4fbe5e361503c2e85b1fbb2c796c2e0939
+2021-01-26,01.12.12,92599efefc18243ddc480bad6ab6a7a3c117ca14
+2021-01-29,01.13.01,b835680ee7cc022bd1470ac81035c8581a801171
+2021-01-30,01.13.02,492c02156554b3418e6beca5984e5c2d99bd448d
+2021-02-03,01.13.03,2e1035a22748629efe8de908fc196a23ce350fd9
+2021-02-04,01.13.04,521d841e24dec26e388ff27717894a13d6358add
+2021-02-08,01.13.06,2a26ac05e6d9d10d8c09b99e7f1992e46fa33957
+2021-02-09,01.13.09,12bfedaf27f582a1959534c8a64ac6f94587e5eb
+2021-02-10,01.13.11,ca6c5b1bda73385fb96e8f3e8f0a9d74ee507fa9
+2021-02-16,01.13.15,276d10218b5959dc44cfe4fcde4bf77ce1e40696
+2021-02-17,01.13.16,7b3bf9743febe78a103f102b615ad5fb749f43c2
+2021-02-19,01.13.17,ce277f48bc5aef705dfcb4d6694e65ed5d846f0e
+2021-02-22,01.13.19,22c4db04400a147a11622b0b0c8e68408f1a4508
+2021-02-24,01.13.22,ab0ba6c89474bd95104309283c55339a4a107642
+2021-02-26,01.13.25,6679e12ce1a8a749f60890e15ae301030df16a5d
+2021-02-27,01.13.23,45f2550f84b0a939d5557d61481216a19a0d6c97
+2021-03-01,01.13.31,3a57890ea3ab116a0c3071c019b5dcfc57a7823a
+2021-03-02,01.13.31,5d363df8b9d781d47af841d0b8e2a70f70080a1a
+2021-03-03,01.13.32,8efbc78c3131b3834f7bc305e5635e7e2d7f6ffb
+2021-03-04,01.13.34,0b708d5fc9e57a73e444996e07eed0528735c780
+2021-03-22,01.13.37,1d25b20b7e23a76cc437d2cc1659e92706443148
+2021-03-23,01.13.39,3ebc61c48b099e4c98c8cf514d7682950c72d08f
+2021-04-01,01.13.40,f31a317a6f739d5461ee93763cf983ce2fac3735
+2021-04-09,01.13.41,536528f45164b08e0ffd45904ffaa69d7a7cb236
+2021-04-12,01.13.43,02250a51e2b6b9710d873cfee6f065c17ebf42a3
+2021-04-13,01.13.44,574fbe53014dd7b254768f89da74c987654e4199
+2021-04-15,01.13.45,773a23ab43f1ed7bbf60eff8126ef6212aa3f2f4
+2021-04-18,01.13.46,0c1591249eff3e837a2d42f590dfce98491a4765
+2021-04-21,01.13.46,c4167fdbd563a3e777d77e95f078d8d3c1833011
+2021-04-24,01.13.47,0a208f5ff9a0676ea21ea6abdbe6899de5149614
+2021-05-05,01.13.48,c796dffecbd5ae6fd05644a0abbe3a62dd1a6640
+2021-05-07,01.13.49,051390f3c4144420ded0fd78e6683f2eb5b8534f
+2021-05-09,01.13.50,c14678d2bf3a17052e23573d18e8d6d91cba8e44
+2021-05-12,01.13.51,734948faf0c1913e1cd0d111d78b55e1f5d01271
+2021-05-20,01.13.52,164189c8c336845ab001031902485358326c230a
+2021-05-23,01.13.53,0c6090bc281401a1c9d0a61d08602bb932c8f060
+2021-05-27,01.13.54,76d0651fdf0a5c5ed95a51cc6b23ac48101daf09
+2021-05-31,01.13.55,59a0f4e50b7696c185a3c9d4e582426f88aac84f
+2021-06-16,01.13.57,39be92db9c217c29511e653b18ff97d921153605
+2021-06-17,01.13.58,cf2dda3ac1eee11cc32fa18bedfa429b5c7bafa8
+2021-06-23,01.13.59,0f8fff68ace12f07173156e979290db0e1578cea
+2021-07-13,01.13.60,0560660113fa5b79de87e982b755ccb734d3b6a6
+2021-07-14,01.13.61,9cc242cc4496c0798e367e9c77be1ed006f8ba18
+2021-08-18,01.13.62,a5977c76166914e6cc13f94a15c4412f4a735e10
+2021-08-20,01.13.63,2ba8f6aa4435388e400065afabba02992dd765ff
+2021-08-23,01.13.65,9d5b0e407aea81a90c34e400e1c7b755d0c361d5
+2021-08-25,01.13.66,3cfe9a80ec3728703f9009d9088ff9d94b1d8b26
+2021-09-01,01.13.67,a2ac0a223879fb67e0cade10284431262b218df2
+2021-09-03,01.13.68,d7262ee938798cf3317aaef8a51923a6669d849f
+2021-09-15,01.13.70,2f15be434ff7c6849bed9d4e4680c99e8145f71f
+2021-10-09,01.13.73,c5bd735e877cd6579fb1c0ff7d427051e8b6bc86
+2021-11-03,01.13.74,128959fcc11299787ed87d1f16898e741879b57f
+2021-11-23,01.13.75,5b016adde09351da36970a274090c63d809328db
diff --git a/vignettes/Age-heaping_quality_with_Demotools.Rmd b/vignettes/Age-heaping_quality_with_Demotools.Rmd
index c65fb80c4..db392d022 100644
--- a/vignettes/Age-heaping_quality_with_Demotools.Rmd
+++ b/vignettes/Age-heaping_quality_with_Demotools.Rmd
@@ -42,9 +42,9 @@ plot(Age, pop1m_ind, type = 'o')
The figure above has some clear age irregularities. For example, there is a consistent clustering around ages ending in zero and five. In principle, a post-enumeration survey or a sample interview should give more information on the reasons why these irregularities appear in the data [@siegel2004methods]. However, not every country has the resources to conduct a survey or post-enumeration process. Therefore, various arithmetic techniques have been developed for measuring heaping on individual ages, terminal digits, and age ranges. These estimate the degree of bias, but they do not correct it [@booth2015demographic]. The simplest way to analyze age-heaping is by assuming that the true numbers are *rectangularly* distributed over some age range that includes centered the age in question [@siegel2004methods].
-Several indices of age-heaping exist. `DemoTools` implements Whipple [@spoorenberg2007quality], Myers, Bachi [@bachi1951tendency], Coale-Li [@coale1991effect], Noumbissi [@noumbissi1992indice], Spoorenberg [@spoorenberg2007quality], Jdanov [@jdanov2008beyond], and Kannisto [@kannisto1999assessing] age-heaping indices. Although the literal interpretation of these indices may vary, they tend to covary strongly when applied to the same data and age-ranges, so for making quick judgements of the degree of heaping over a large collection of data it may not be necessary to apply more than one or two. We also offer two new measures designed to test for irregularities in data in 5-year age bins.
+Several indices of age-heaping exist. `DemoTools` implements Whipple [@spoorenberg2007quality], Myers, Bachi [@bachi1951tendency], Coale-Li [@coale1991effect], Noumbissi [@noumbissi1992indice], Spoorenberg [@spoorenberg2007quality], Jdanov [@jdanov2008beyond], and Kannisto [@kannisto1999assessing] age-heaping indices. Although the literal interpretation of these indices may vary, they tend to covary strongly when applied to the same data and age-ranges, so for making quick judgement of the degree of heaping over a large collection of data it may not be necessary to apply more than one or two. We also offer two new measures designed to test for irregularities in data in 5-year age bins.
-Often the degree of detected heaping suggests that some smoothing procedure is warranted, but the kind of smoothing procedure may be a function of the particular way in which heaping is manifested. For example, if heaping is light, or just a matter of rounding to the nearest digit divisible by 5, then there will be no major difference between heaping on 0s versus heaping on 5s. In this case, grouping to 5-year age bins (see `groupAges()`) and then graduating in a constrained way (see `sprague()`) may suffice to remove the distortion while maintaining the broad underlying age pattern. However, if heaping is much worse on 0s than on 5s, the age pattern may still be distorted in a regular and pernicious way even after binning in 5-year age groups. In that case, it is advised to select a smoothing procedure designed for 5-year age groups (see `smooth_age_5()` and the vignette on smoothing) before graduating, or else some other more agressive smoothing option (see `agesmth1()`). The present vignette does not offer specific guidelines for such adjustments, but we do offer two new age quality indices that might be useful for deciding whether to resort to agressive smoothing: `zero_pref_sawtooth()` checks for and rates the mentioned jagged pattern in 5-year age groups. `five_year_roughness()` gives a total measure of noise for data in 5-year age groups, but does not look for a particular pattern to it. This second measure should not be used in isolation, but together with visual assessment. More details about such adjustments and decisions can be found in a second vignette on smoothing.
+Often the degree of detected heaping suggests that some smoothing procedure is warranted, but the kind of smoothing procedure may be a function of the particular way in which heaping is manifested. For example, if heaping is light, or just a matter of rounding to the nearest digit divisible by 5, then there will be no major difference between heaping on 0s versus heaping on 5s. In this case, grouping to 5-year age bins (see `groupAges()`) and then graduating in a constrained way (see `sprague()`) may suffice to remove the distortion while maintaining the broad underlying age pattern. However, if heaping is much worse on 0s than on 5s, the age pattern may still be distorted in a regular and pernicious way even after binning in 5-year age groups. In that case, it is advised to select a smoothing procedure designed for 5-year age groups (see `smooth_age_5()` and the vignette on smoothing) before graduating, or else some other more aggressive smoothing option (see `agesmth1()`). The present vignette does not offer specific guidelines for such adjustments, but we do offer two new age quality indices that might be useful for deciding whether to resort to aggressive smoothing: `zero_pref_sawtooth()` checks for and rates the mentioned jagged pattern in 5-year age groups. `five_year_roughness()` gives a total measure of noise for data in 5-year age groups, but does not look for a particular pattern to it. This second measure should not be used in isolation, but together with visual assessment. More details about such adjustments and decisions can be found in a second vignette on smoothing.
### Whipple Index
@@ -105,7 +105,7 @@ Bi
This method, based on the underlying principles and assumptions of the original Whipple's index, improves by extending its basic principle to all ten digits. It compares single terminal digit numerators to denominators consisting in 5-year age groups centered on the terminal digit of age in question [@noumbissi1992indice]. It relies on a weaker assumption of linearity over an age range of five years rather than ten. It is based once more on the underlying principles and assumptions of the original Whipple's index and introduces the following formulas to measure age heaping:
\begin{align}
-\label{eq:Noumbussi}
+\label{eq:Noumbissi}
W_0 =5\frac{P_{30}+P_{40}+P_{50}+P_{60}}{_5P_{28}+\, _5P_{38}+\, _5P_{48}+\, _5P_{58}} \\
W_5 =5\frac{P_{25}+P_{35}+P_{45}+P_{55}}{_5P_{23}+\, _5P_{33}+\, _5P_{43}+\, _5P_{53}} \\
\end{align}
@@ -201,4 +201,4 @@ r5
Values greater than 0.1 coupled with a sawtooth value greater than 0 already suggest that some smoothing is warranted. If there is no detected sawtooth pattern, then five-year-roughness indices should probably need to be higher and visually confirmed before deciding to smooth. Further guidelines can be found in the vignette on smoothing.
-##References
+# References
diff --git a/vignettes/case_study_1.Rmd b/vignettes/case_study_1.Rmd
index a14ef53f3..48404245e 100644
--- a/vignettes/case_study_1.Rmd
+++ b/vignettes/case_study_1.Rmd
@@ -22,7 +22,7 @@ knitr::opts_chunk$set(
```
## Intro
-Here the idea is use API to read in death count and pop count data, evaluate them, adjust them, get to single ages, and make a single age lifetable. Or Abrdiged lifetable. Or both and compare. Show different closeout options too.
+Here the idea is use API to read in death count and pop count data, evaluate them, adjust them, get to single ages, and make a single age lifetable. Or Abridged lifetable. Or both and compare. Show different closeout options too.
##
diff --git a/vignettes/graduation_with_demotools.Rmd b/vignettes/graduation_with_demotools.Rmd
index 40061955c..672b64d01 100644
--- a/vignettes/graduation_with_demotools.Rmd
+++ b/vignettes/graduation_with_demotools.Rmd
@@ -28,7 +28,7 @@ knitr::opts_chunk$set(
## How to use `DemoTools` to graduate counts in grouped ages
### What is graduation?
-Graduation is a practice used to derive figures for _n_-year age groups, for example 5-year age groups from census data, that are corrected for net reporting error [@siegel2004methods]. The basic idea is to fit different curves to the original _n_-year and redistribute them into single-year values. These techniques are designed so that the sum of the interpolated single-year values is consistent with the total for the groups as a whole. Among the major graduation methods are the Sprague [@sprague1880explanation] and Beers [@beers1945modified] oscilatory methods, monotone spline and the uniform distribution. More recently, the penalized composite link model (pclm) was proposed to ungroup coarsely aggregated data [@rizzi2015efficient]. All of these methodologies are implemented in `DemoTools`.
+Graduation is a practice used to derive figures for _n_-year age groups, for example 5-year age groups from census data, that are corrected for net reporting error [@siegel2004methods]. The basic idea is to fit different curves to the original _n_-year and redistribute them into single-year values. These techniques are designed so that the sum of the interpolated single-year values is consistent with the total for the groups as a whole. Among the major graduation methods are the Sprague [@sprague1880explanation] and Beers [@beers1945modified] oscillatory methods, monotone spline and the uniform distribution. More recently, the penalized composite link model (pclm) was proposed to ungroup coarsely aggregated data [@rizzi2015efficient]. All of these methodologies are implemented in `DemoTools`.
### Why graduate?
One of the main purposes of graduation is to refine the detail of available data in published statistics or surveys. This allows to estimate numbers of persons in single years of age from data originally in 5-year age groups. In practice, graduation of mortality curves has been very important to estimate reliable life tables, but also in fertility studies graduation is useful for the analysis of data from populations where demographic records are defective [@brass1960graduation].
@@ -80,7 +80,7 @@ sum(Value)
```
### Beers
-Following a similar idea, Beers interpolated two overlapping curves minimizing the squares of the differences within the interpolation range [@beers1945modified]. Specifically, Beers did this by minimizing fifth differences for a six-term formula, refered to as the 'Ordinary' Beers method [@beers1945six]. Subsequently, the ordinay formula was modified to relax the requirement that the given value be reproduced and yield smoother interpolated results, refered to as 'Modified' Beers method [@beers1945modified].
+Following a similar idea, Beers interpolated two overlapping curves minimizing the squares of the differences within the interpolation range [@beers1945modified]. Specifically, Beers did this by minimizing fifth differences for a six-term formula, refered to as the 'Ordinary' Beers method [@beers1945six]. Subsequently, the ordinary formula was modified to relax the requirement that the given value be reproduced and yield smoother interpolated results, referred to as 'Modified' Beers method [@beers1945modified].
```{r,out.width='\\textwidth', fig.width= 6, fig.height=6, fig.align='center'}
beers.ordinary <- graduate(Value, Age, method = "beers(ord)")
@@ -101,7 +101,7 @@ sum(Value)
```
### Monotone spline
-Graduation by a monotone spline is another option available in `DemoTools`. This option offers an algorithm to produce interpolants that preserve properties such as monotonicity or convexity that are present in the data [@fritsch1980monotone]. These are desirable conditions to not introduce biased details that cannot be ascertained from the data [@hyman1983accurate]. To run this algorithm in `DemoTools` the option `mono` should be selected as follows:
+Graduation by a monotone spline is another option available in `DemoTools`. This option offers an algorithm to produce interpolations that preserve properties such as monotonicity or convexity that are present in the data [@fritsch1980monotone]. These are desirable conditions to not introduce biased details that cannot be ascertained from the data [@hyman1983accurate]. To run this algorithm in `DemoTools` the option `mono` should be selected as follows:
```{r,out.width='\\textwidth', fig.width= 6, fig.height=6, fig.align='center'}
mono.grad <- graduate(Value, Age, method = "mono")
diff --git a/vignettes/lifetables_with_demotools.Rmd b/vignettes/lifetables_with_demotools.Rmd
index 34dbf5c75..416b71329 100644
--- a/vignettes/lifetables_with_demotools.Rmd
+++ b/vignettes/lifetables_with_demotools.Rmd
@@ -32,14 +32,14 @@ The lifetable is one of the most used tools in demographic research. It is desig
### The column $_nA_x$
-The column $_nA_x$ refers to the average number of person-years lived in the interval $x$ to $x+n$ by those who died in the interval. This is a key column when calculating period lifetables because it allows to convert from age-specific death rates $_nM_x$ to the probabiliy of dying between ages $x$ and $x+n$ denoted as $_nq_x$. There are several ways to calculate this column. `DemoTools` provides two options to calculate this column from which the user can choose, `PAS` and `UN`. The `PAS` option relies on following the rule of thumb given by Coale and Demeny for ages 0, and 1-4, and assumes interval midpoints in higher ages. This choice relies on the fact that the lower the level of mortality, the more heavily will infant deaths be concentrated at the earliest stages of infancy [@preston2000demography], and therefore the values of $_1A_0$ and $_4A_1$ depend on the values of $_1q_0$ and $_4q_1$ [@Coale1983]. For age 0, there are two choices for the rule of thumb to apply: Andreev-Kingkade is the default, but the Coale-Demeny model pattern is also available. The `UN` option also relies on the Coale-Demeny rule of thumb for ages 1-4. For ages 5-9 onwards, it uses the formula developed by @greville1977short.
+The column $_nA_x$ refers to the average number of person-years lived in the interval $x$ to $x+n$ by those who died in the interval. This is a key column when calculating period lifetables because it allows to convert from age-specific death rates $_nM_x$ to the probability of dying between ages $x$ and $x+n$ denoted as $_nq_x$. There are several ways to calculate this column. `DemoTools` provides two options to calculate this column from which the user can choose, `PAS` and `UN`. The `PAS` option relies on following the rule of thumb given by Coale and Demeny for ages 0, and 1-4, and assumes interval midpoints in higher ages. This choice relies on the fact that the lower the level of mortality, the more heavily will infant deaths be concentrated at the earliest stages of infancy [@preston2000demography], and therefore the values of $_1A_0$ and $_4A_1$ depend on the values of $_1q_0$ and $_4q_1$ [@Coale1983]. For age 0, there are two choices for the rule of thumb to apply: Andreev-Kingkade is the default, but the Coale-Demeny model pattern is also available. The `UN` option also relies on the Coale-Demeny rule of thumb for ages 1-4. For ages 5-9 onwards, it uses the formula developed by @greville1977short.
## Graduation as a lifetable step
Abridged lifetables can be constructed with their own consequential $_nA_x$ assumptions, or they can be constructed by single ages and make a simpler lifetables after graduation. In the latter case the $_nA_x$ assumption is partly outsourced to the graduation and partly simplified by assuming midpoints (usually). The basic idea is that graduation followed by single age lifetable will produce an implied $nAx$, which can be backed out. The alternative is to do something sophisticated to get nAx well behaved at the level of abridged lifetables.
## Closing the lifetable
-An important step in the construction of lifetables is the choice to deal with the open-ended age interval. A standard way to deal with this final age group is by taking the inverse of the final $_nM_x$ as life expectancy for that age group. `DemoTools` instead offers variety of extrapolation methods implemented in the `MortalityLaws` package. For this, a desired open age must be specified, defaulting to the present open age group, but which can not exceed 110 in the present implementation. By default, the extrapolation model is fit to ages 60 and higher, but the user can control this using the `extrapFit` argument (give the vector of ages, which must be a subset of `Age`). By default extrapolated values are used starting with the input open age, but you can lower this age using the `extrapFrom` argument. The choices given in this implementation are: Kannisto, Kannisto-Makeham, Makeham, Gompertz, Gamma-Gompertz, Beard, Beard-Makeham and the Quadratic models. Documentation for these models can be foun in `MortalityLaws`.
+An important step in the construction of lifetables is the choice to deal with the open-ended age interval. A standard way to deal with this final age group is by taking the inverse of the final $_nM_x$ as life expectancy for that age group. `DemoTools` instead offers variety of extrapolation methods implemented in the `MortalityLaws` package. For this, a desired open age must be specified, defaulting to the present open age group, but which can not exceed 110 in the present implementation. By default, the extrapolation model is fit to ages 60 and higher, but the user can control this using the `extrapFit` argument (give the vector of ages, which must be a subset of `Age`). By default extrapolated values are used starting with the input open age, but you can lower this age using the `extrapFrom` argument. The choices given in this implementation are: Kannisto, Kannisto-Makeham, Makeham, Gompertz, Gamma-Gompertz, Beard, Beard-Makeham and the Quadratic models. Documentation for these models can be found in `MortalityLaws`.
## Example with `DemoTools`
@@ -66,9 +66,9 @@ library(DT)
```
-In this output, the `Age` column indicates the lower bound of each age group. The column `nMx` are the observed death rates, usually result from dividing the deaths observed in a calendar year by the mid-year population. The `nAX`, as described above, indicates the average number of person-years lived in the age-group by those who died. The column `lx` refers to the survival function. If divided by the initial value, i.e. 100,000, indicates the probability of surviving from birth to a given age. `ndx` is the age-at-death distribution and `Sx` are the survivorship ratios. Finally, `ex` is the life expectancy. For this example, males in Mexico had a life expectancty at birth of 52.58.
+In this output, the `Age` column indicates the lower bound of each age group. The column `nMx` are the observed death rates, usually result from dividing the deaths observed in a calendar year by the mid-year population. The `nAX`, as described above, indicates the average number of person-years lived in the age-group by those who died. The column `lx` refers to the survival function. If divided by the initial value, i.e. 100,000, indicates the probability of surviving from birth to a given age. `ndx` is the age-at-death distribution and `Sx` are the survivorship ratios. Finally, `ex` is the life expectancy. For this example, males in Mexico had a life expectancy at birth of 52.58.
-## References
+# References
diff --git a/vignettes/migration_with_demotools.Rmd b/vignettes/migration_with_demotools.Rmd
index 6ee13469f..db005db39 100644
--- a/vignettes/migration_with_demotools.Rmd
+++ b/vignettes/migration_with_demotools.Rmd
@@ -206,6 +206,6 @@ rc_res[["fit_df"]] %>%
#### Comments about warnings
-When running `mc_estimate_rc` it is common to see warnings from Stan, particularly when the retirement and post-retirement families are included in the model. Warnings about divergent transitions, low ESS and maximum treedepth can often be eliminated by changing the Stan settings to have a larger `adapt_delta`, `iter` and `maximum_treedepth`, respectively. If warnings persist it is not the end of the world; the results are probably still fine. The warnings are largely a consequence of the complexity of the function being fitted and the strong correlations that exist between estimates of different parameters. Work is still on going investigating different prior set-ups to improve the efficiency of fit.
+When running `mc_estimate_rc` it is common to see warnings from Stan, particularly when the retirement and post-retirement families are included in the model. Warnings about divergent transitions, low ESS and maximum tree depth can often be eliminated by changing the Stan settings to have a larger `adapt_delta`, `iter` and `maximum_treedepth`, respectively. If warnings persist it is not the end of the world; the results are probably still fine. The warnings are largely a consequence of the complexity of the function being fitted and the strong correlations that exist between estimates of different parameters. Work is still on going investigating different prior set-ups to improve the efficiency of fit.
## References
diff --git a/vignettes/smoothing_with_demotools.Rmd b/vignettes/smoothing_with_demotools.Rmd
index fff36a8e6..8206937d9 100644
--- a/vignettes/smoothing_with_demotools.Rmd
+++ b/vignettes/smoothing_with_demotools.Rmd
@@ -23,7 +23,7 @@ knitr::opts_chunk$set(
## How to use `DemoTools` to smooth population counts
-Smoothing data over age is traditionally intended to have plausible/corrected estimates of population counts from census data. Smoothing procedures help to derive figures that are corrected primarly for net error by fitting different curves to the original 5 or 10-year totals, modifying the original counts [@siegel2004methods]. Several methods have been developed for this aim and the major smoothing methods are included in `DemoTools`. Including the Carrier-Farrag [@carrier1959reduction], Arriaga [@arriaga1994population], Karup-King-Newton, United Stations [@united1955manual], Spencer [@spencer1987improvements] and Zelnik methods. Below we briefly give an overview of the method and apply them to the male Indian population in 1991.
+Smoothing data over age is traditionally intended to have plausible/corrected estimates of population counts from census data. Smoothing procedures help to derive figures that are corrected primarily for net error by fitting different curves to the original 5 or 10-year totals, modifying the original counts [@siegel2004methods]. Several methods have been developed for this aim and the major smoothing methods are included in `DemoTools`. Including the Carrier-Farrag [@carrier1959reduction], Arriaga [@arriaga1994population], Karup-King-Newton, United Stations [@united1955manual], Spencer [@spencer1987improvements] and Zelnik methods. Below we briefly give an overview of the method and apply them to the male Indian population in 1991.
```{r,out.width='\\textwidth', fig.width= 6, fig.height=6, fig.align='center'}
library(DemoTools)
@@ -42,7 +42,7 @@ plot(Age, Value/sum(Value), type = 'l',
### Carrier-Farrag
-This method considers the ratio, $K$, of the population in one quinary age-group to the next one [@carrier1959reduction]. If $v_0$ is a denary age-group, and $v_{-2}$ and $v_2$ are the preceding and succeeding age-groups, respectively, and if $K^4 = v_{-2}/v_2$. Then, the older quinary group $v_1$ can be estimated by $v_0/(1+K)$. This equation connects the population in two ten-year age groups separated by an interval of ten years. Therefore the value $K$ can be seen as the middle point between the two ten-year age groups. To run this method in `DemoTools` the function `afesmth` is used with the option 'Carrier-Farrag'. The figure below shows the smoothed population by five-year age groups.
+This method considers the ratio, $K$, of the population in one five-year age-group to the next one [@carrier1959reduction]. If $v_0$ is a ten-year age-group, and $v_{-2}$ and $v_2$ are the preceding and succeeding age-groups, respectively, and if $K^4 = v_{-2}/v_2$. Then, the older five-year group $v_1$ can be estimated by $v_0/(1+K)$. This equation connects the population in two ten-year age groups separated by an interval of ten years. Therefore the value $K$ can be seen as the middle point between the two ten-year age groups. To run this method in `DemoTools` the function `afesmth` is used with the option 'Carrier-Farrag'. The figure below shows the smoothed population by five-year age groups.
```{r,out.width='\\textwidth', fig.width= 6, fig.height=6, fig.align='center'}
@@ -122,7 +122,7 @@ and
_{5}P_{x+5} = _{10}P_{x} - _{5}P_{x}.
\end{equation*}
-To implement this smoothing process select the `KKN` inthe `agesmth` function.
+To implement this smoothing process select the `KKN` in the `agesmth` function.
```{r,out.width='\\textwidth', fig.width= 6, fig.height=6, fig.align='center', eval = FALSE}
# TODO smooth_age_5 is throwing an internal error
@@ -168,7 +168,7 @@ The Strong formula adjusts proportionally the smoothed 10-year age groups to the
_{10}\hat{P}_x = \frac{_{10}P_{x-10} + 2\, _10P_{x} + _{10}P_{x+10}}{4}
\end{equation}
-where $_{10}\hat{P}_x$ represents the smoothed poopulation ages $x$ to $x+9$. It is implemented in `DemoTools` as follows
+where $_{10}\hat{P}_x$ represents the smoothed population ages $x$ to $x+9$. It is implemented in `DemoTools` as follows
```{r,out.width='\\textwidth', fig.width= 6, fig.height=6, fig.align='center'}
@@ -183,7 +183,7 @@ axis(1, labels = paste0(seq(0,100,5),'-',seq(4,104,5)), at =seq(0,100,5))
### Zigzag
-The implementatios of this methods assumes that persons incorrectly reported in peak age groups are evenly divided between the two adjacent age groups [@feeney2013]. It relies in minimizing a measure of "roughness":
+The implementations of this methods assumes that persons incorrectly reported in peak age groups are evenly divided between the two adjacent age groups [@feeney2013]. It relies in minimizing a measure of "roughness":
consider the difference $R[i]$ between the number of persons in the $i$-th age group and the
average of the numbers in adjacent age groups, $R[i] = N[i] - (N[i -1] + N[i + 1])/2$.
If the distribution displays zigzag, the $R[i]$ will relatively large. If the distribution is smooth,
@@ -203,7 +203,7 @@ axis(1, labels = paste0(seq(0,100,5),'-',seq(4,104,5)), at =seq(0,100,5))
### Polynomial
-A more general way is to smooth trhough linear models. Polynomial fitting is used to mooth data over age or time fitting linear models. It can be tweaked by changing the degree and by either log or power transforming and can be used on any age groups, including irregularly spaced, single age, or 5-year age groups. It can be implemented in `DemoTools` with the function `agesmth1` and the option `poly` as follows
+A more general way is to smooth through linear models. Polynomial fitting is used to smooth data over age or time fitting linear models. It can be tweaked by changing the degree and by either log or power transforming and can be used on any age groups, including irregularly spaced, single age, or 5-year age groups. It can be implemented in `DemoTools` with the function `agesmth1` and the option `poly` as follows
```{r,out.width='\\textwidth', fig.width= 6, fig.height=6, fig.align='center'}