diff --git a/.RData b/.RData index 4f6cf766..bc879d38 100644 Binary files a/.RData and b/.RData differ diff --git a/NAMESPACE b/NAMESPACE index 4dce798f..4df356b1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,7 @@ export(check_heaping_sawtooth) export(check_heaping_spoorenberg) export(check_heaping_whipple) export(dec.date) +export(downloadAsfr) export(downloadSRB) export(downloadnLx) export(fetch_wpp_births) diff --git a/R/utils_downloads.R b/R/utils_downloads.R index c2aac3e5..8f671ad7 100644 --- a/R/utils_downloads.R +++ b/R/utils_downloads.R @@ -3,19 +3,42 @@ # and potentially others. #' Extract Lx estimates from WPP2019 -#' @description We use the `FetchLifeTableWpp2019` function of the `fertestr` to extract `Lx` from `wpp2019`, interpolated to an exact date. -#' @param nLx either `NULL` or a numeric vector of lifetable exposure. If it's the second then we just pass it back. -#' @param location UN Pop Div `LocName` or `LocID` -#' @param gender `"male"`, `"female"`, or `"both"` -#' @param nLxDatesIn numeric vector of three decimal dates produced by (or passed through) `basepop_five()` +#' @description We extract `Lx` from `wpp2019`, interpolated to exact dates. Different methods availables. +#' A vector of countries can handle, but with an unique sex. Row names are not indicative of countries. +#' @param nLx numeric. either `NULL` or a numeric vector of lifetable exposure. If it's the second then we just pass it back. +#' @param location vector. UN Pop Div `LocName` or `LocID` +#' @param gender character. `"male"`, `"female"`, or `"both"` +#' @param nLxDatesIn numeric. Vector of three decimal dates produced by (or passed through) `basepop_five()` +#' @param method character. Could be `"linear"`, `"exponential"`, or `"power"` #' #' @return numeric matrix of `nLx` with `length(nLxDatesIn)` and abrdiged ages in rows. #' @export -#' -downloadnLx <- function(nLx, location, gender, nLxDatesIn) { +#' @examples +#' # life expectancy calculated from Lx downloaded from WPP19. Using names or codes. +#' Lxs_name <- downloadnLx(nLx=NULL, location = "Argentina", +#' gender = "both", nLxDatesIn = 1950:2030) +#' Lxs_code <- downloadnLx(nLx=NULL, location = "32", +#' gender = "both", nLxDatesIn = 1950:2030) +#' \dontrun{ +#' plot(1950:2030, as.numeric(colSums(Lxs_name)), xlab = "Year", ylab="e0") +#' lines(1950:2030, as.numeric(colSums(Lxs_code))) +#' } +#' # life expectancy for different countries +#' Lxs_countries <- downloadnLx(nLx=NULL, location = c("Argentina","Brazil","Uruguay"), +#' gender = "both", nLxDatesIn = 1950:2025) +#' \dontrun{ +#' plot(1950:2025, as.numeric(colSums(Lxs_countries[1:22,])), +#' t="l", xlab = "Year", ylab="e0", ylim = c(40,80)) +#' lines(1950:2025, as.numeric(colSums(Lxs_countries[23:44,])), col=2) +#' lines(1950:2025, as.numeric(colSums(Lxs_countries[45:64,])), col=3) +#' legend("bottomright",c("Argentina","Brazil","Uruguay"),lty=1,col=1:3) +#' } +downloadnLx <- function(nLx, location, gender, nLxDatesIn, method="linear") { + requireNamespace("fertestr", quietly = TRUE) requireNamespace("magrittr", quietly = TRUE) verbose <- getOption("basepop_verbose", TRUE) + if (!is.null(nLx)) { # TR: ensure colnames passed nLx <- as.matrix(nLx) @@ -28,55 +51,88 @@ downloadnLx <- function(nLx, location, gender, nLxDatesIn) { if (is.null(nLx)){ - if (is.null(location)) stop("You need to provide a location to download the data for nLx") - + # stop/warnings + if (is.null(location)){ + stop("You need to provide a location to download the data for nLx") + } + if (!any(is_LocID(location))) { + location_code <- get_location_code(location) + }else { + location_code <- as.integer(location) + } + if (verbose) { cat(paste0("Downloading nLx data for ", location, ", years ", paste(nLxDatesIn,collapse=", "), ", gender ", gender), sep = "\n") } + if(any(nLxDatesIn<1950,nLxDatesIn>2025)){ + cat("Careful, extrapolating beyond range 1950-2025") + } - . <- NULL + # handle sex + sex_code <- ifelse(tolower(gender) == "both", "b", + ifelse(tolower(gender) == "female", "f", + ifelse(tolower(gender) == "male", "m", NA))) + Sex_mortlaws <- ifelse(sex_code == "b", "total", tolower(gender)) + stopifnot(`Invalid sex name, please set it to 'both', 'male' or 'female'` = !is.na(sex_code)) - ind_invalidyear <- which(nLxDatesIn < 1955) - if (length(ind_invalidyear) != 0) { - invalid_yrs <- paste0(nLxDatesIn[ind_invalidyear], collapse = ", ") - cat("nLxDate(s)", invalid_yrs, "is/are below 1955. Capping at 1955\n") - nLxDatesIn[ind_invalidyear] <- 1955 - } + # initial data + lt_wpp19 <-DemoToolsData::WPP2019_lt - nLx <- - lapply(nLxDatesIn, function(x) { - fertestr::FetchLifeTableWpp2019(location, x, gender)$Lx - }) %>% - do.call("cbind", .) %>% - as.matrix() + # filter and matrix shape + lt_ctry <- lt_wpp19[lt_wpp19$LocID %in% location_code & + lt_wpp19$Sex %in% sex_code,] %>% as.data.frame() %>% + reshape(data = ., + direction = "wide", idvar = c("LocID","AgeStart","Sex"), + timevar = "Year", v.names = "mx", drop = c("AgeSpan","lx")) + + # intert/extrap rates and built life tables for each combination location/Sex/Year + .<-NULL + out <- cbind(lt_ctry[,c(1:3)], + interp(lt_ctry[,-c(1:3)], + seq(1953,2023,5), as.numeric(nLxDatesIn), + extrap = TRUE, method = method) %>% + as.data.frame() %>% + setNames(as.character(nLxDatesIn)) + ) %>% + split(., list(lt_ctry$LocID, lt_ctry$Sex)) %>% + lapply(function(X){ + Age <- X[["AgeStart"]] + apply(X[,-c(1:3)] %>% + as.data.frame()%>% setNames(as.character(nLxDatesIn)), 2, + function(S){ + MortalityLaws::LifeTable(x = Age, + mx = S, + lx0 = 1, + sex = Sex_mortlaws)$lt$Lx + }) + }) %>% + do.call("rbind", .) - colnames(nLx) <- nLxDatesIn - n <- nrow(nLx) - Age <- c(0,1,seq(5,(n-2)*5,by=5)) - rownames(nLx) <- Age - return(nLx) + # combination as rowname + rownames(out) <- lt_ctry$AgeStart + + return(out) } } #' Extract ASFR estimates from WPP2019 -#' @description We use the `FetchFertilityWpp2019` function of the `fertestr` to extract `asfr` from `wpp2019`, interpolated to an exact date. -#' @param location UN Pop Div `LocName` or `LocID` -#' @param AsfrDatesIn numeric vector of decimal dates. -#' @param extrap_50s logical, shall we extrapolate between 1950 and 1955? +#' @description We extract `ASFRx` from `wpp2019`, interpolated to exact dates. Different methods availables. +#' A vector of countries can handle, but with an unique sex. Row names are not indicative of countries. +#' @param Asfrmat numeric. +#' @param location vector. UN Pop Div `LocName` or `LocID` +#' @param AsfrDatesIn numeric. Vector of decimal dates. +#' @param method character. Could be `"linear"`, `"exponential"`, or `"power"` #' #' @return numeric matrix interpolated asfr #' @export #' @examples -#' # Kenya example -#' kenya_noext <- downloadAsfr(Asfrmat = NULL, location = "Kenya", AsfrDatesIn = 1950:2050) -#' kenya_ext <- downloadAsfr(Asfrmat = NULL, location = "Kenya", AsfrDatesIn = 1950:2050, T) +#' # Total fertility ratio calculated from ASFRx downloaded from WPP19. +#' # See `downloadnLx` for analogous examples on multiple countries or using codes instead of names. +#' ASFR_Arg <- downloadAsfr(Asfrmat = NULL, location = "Argentina", AsfrDatesIn = 1950:2025) #' \dontrun{ -#' plot(1950:2050, colSums(kenya_noext) * 5, t="l", main="TFR Kenya") -#' lines(1950:2050, colSums(kenya_ext) * 5, col=2) -#' legend("topright",c("NoExtr","Extr"),lty=1,col = 1:2) +#' plot(1950:2025, as.numeric(colSums(ASFR_Arg))*5, xlab = "Year", ylab="TFR", ylim=c(1.5,4), t="l") #' } - -downloadAsfr <- function(Asfrmat, location = NULL, AsfrDatesIn, extrap_50s = FALSE) { +downloadAsfr <- function(Asfrmat, location = NULL, AsfrDatesIn, method="linear") { requireNamespace("fertestr", quietly = TRUE) verbose <- getOption("basepop_verbose", TRUE) @@ -85,47 +141,43 @@ downloadAsfr <- function(Asfrmat, location = NULL, AsfrDatesIn, extrap_50s = FAL return(Asfrmat) } - if (is.null(location)) stop("You need to provide a location to download the data for Asfrmat") - - ind_invalidyear <- which(AsfrDatesIn < 1955) - if (length(ind_invalidyear) != 0) { - invalid_yrs <- paste0(AsfrDatesIn[ind_invalidyear], collapse = ", ") - if(!extrap_50s) cat("AsfrDate(s)", invalid_yrs, "is/are below 1955. Capping at 1955\n") - AsfrDatesIn[ind_invalidyear] <- 1955 + # stop/warnings + if (is.null(location)){ + stop("You need to provide a location to download the data for Asfrmat") + } + if (!any(is_LocID(location))) { + location_code <- get_location_code(location) + }else { + location_code <- as.integer(location) } - if (verbose) { - cat(paste0("Downloading Asfr data for ", - loc_message(location), - ", years ", - paste0(AsfrDatesIn),collapse=", "), sep = "\n") + cat(paste0("Downloading ASFR data for ", location, ", years ", paste(AsfrDatesIn,collapse=", ")), sep = "\n") } - - tmp <- - lapply(AsfrDatesIn, function(x) { - res <- fertestr::FetchFertilityWpp2019(location, x)["asfr"] - names(res) <- NULL - as.matrix(res)[2:nrow(res), , drop = FALSE] - }) - - Asfrmat <- do.call(cbind, tmp) - colnames(Asfrmat) <- AsfrDatesIn - - # IW: add extrap option extrap_50s - # IW: maybe no need to call fertestr::FetchFertilityWpp2019 and then fix first interval. - # just load all WPP2019_asfr, select location, interpolate with interp(...,extrap=TRUE), - # and return only requested years. What do you think? - if(extrap_50s){ - minor_years <- sort(unique(AsfrDatesIn))[1:2] - if(sum(1955:1960 %in% minor_years)==1) stop("Need two points at least between 1955 and 1960 in AsfrDatesIn to extrapolate.") - Asfrmat_1950 <- interp(Asfrmat[,as.character(minor_years)], - minor_years, - as.numeric(1950:1955), extrap = TRUE) - Asfrmat <- cbind(Asfrmat_1950, - Asfrmat[,as.numeric(colnames(Asfrmat))>1955]) + if(any(AsfrDatesIn<1950,AsfrDatesIn>2025)){ + cat("Careful, extrapolating beyond range 1950-2025") } - Asfrmat + # initial data + asfr_wpp19 <-DemoToolsData::WPP2019_asfr + + # spread format + asfr_ctry <- asfr_wpp19[asfr_wpp19$LocID %in% location_code,] %>% + as.data.frame() %>% + reshape(direction = "wide", idvar = c("LocID","AgeStart"), + timevar = "Year", v.names = "ASFR") + + # interp/extrap + out <- interp(asfr_ctry[,-c(1:3)], seq(1953,2023,5), + as.numeric(AsfrDatesIn), + extrap = TRUE, method = method) %>% + as.data.frame() %>% + setNames(as.character(AsfrDatesIn)) %>% + as.matrix() + + # combination as rowname + rownames(out) <- asfr_ctry$AgeStart + + return(out) } #' Extract SRB estimates from WPP2019 diff --git a/man/downloadAsfr.Rd b/man/downloadAsfr.Rd new file mode 100644 index 00000000..e48ab2ad --- /dev/null +++ b/man/downloadAsfr.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_downloads.R +\name{downloadAsfr} +\alias{downloadAsfr} +\title{Extract ASFR estimates from WPP2019} +\usage{ +downloadAsfr(Asfrmat, location = NULL, AsfrDatesIn, method = "linear") +} +\arguments{ +\item{Asfrmat}{numeric.} + +\item{location}{vector. UN Pop Div \code{LocName} or \code{LocID}} + +\item{AsfrDatesIn}{numeric. Vector of decimal dates.} + +\item{method}{character. Could be \code{"linear"}, \code{"exponential"}, or \code{"power"}} +} +\value{ +numeric matrix interpolated asfr +} +\description{ +We extract \code{ASFRx} from \code{wpp2019}, interpolated to exact dates. Different methods availables. +A vector of countries can handle, but with an unique sex. Row names are not indicative of countries. +} +\examples{ +# Total fertility ratio calculated from ASFRx downloaded from WPP19. +# See `downloadnLx` for analogous examples on multiple countries or using codes instead of names. +ASFR_Arg <- downloadAsfr(Asfrmat = NULL, location = "Argentina", AsfrDatesIn = 1950:2025) +\dontrun{ +plot(1950:2025, as.numeric(colSums(ASFR_Arg))*5, xlab = "Year", ylab="TFR", ylim=c(1.5,4), t="l") +} +} diff --git a/man/downloadnLx.Rd b/man/downloadnLx.Rd index cf7895ad..c95a7575 100644 --- a/man/downloadnLx.Rd +++ b/man/downloadnLx.Rd @@ -4,20 +4,44 @@ \alias{downloadnLx} \title{Extract Lx estimates from WPP2019} \usage{ -downloadnLx(nLx, location, gender, nLxDatesIn) +downloadnLx(nLx, location, gender, nLxDatesIn, method = "linear") } \arguments{ -\item{nLx}{either \code{NULL} or a numeric vector of lifetable exposure. If it's the second then we just pass it back.} +\item{nLx}{numeric. either \code{NULL} or a numeric vector of lifetable exposure. If it's the second then we just pass it back.} -\item{location}{UN Pop Div \code{LocName} or \code{LocID}} +\item{location}{vector. UN Pop Div \code{LocName} or \code{LocID}} -\item{gender}{\code{"male"}, \code{"female"}, or \code{"both"}} +\item{gender}{character. \code{"male"}, \code{"female"}, or \code{"both"}} -\item{nLxDatesIn}{numeric vector of three decimal dates produced by (or passed through) \code{basepop_five()}} +\item{nLxDatesIn}{numeric. Vector of three decimal dates produced by (or passed through) \code{basepop_five()}} + +\item{method}{character. Could be \code{"linear"}, \code{"exponential"}, or \code{"power"}} } \value{ numeric matrix of \code{nLx} with \code{length(nLxDatesIn)} and abrdiged ages in rows. } \description{ -We use the \code{FetchLifeTableWpp2019} function of the \code{fertestr} to extract \code{Lx} from \code{wpp2019}, interpolated to an exact date. +We extract \code{Lx} from \code{wpp2019}, interpolated to exact dates. Different methods availables. +A vector of countries can handle, but with an unique sex. Row names are not indicative of countries. +} +\examples{ +# life expectancy calculated from Lx downloaded from WPP19. Using names or codes. +Lxs_name <- downloadnLx(nLx=NULL, location = "Argentina", + gender = "both", nLxDatesIn = 1950:2030) +Lxs_code <- downloadnLx(nLx=NULL, location = "32", + gender = "both", nLxDatesIn = 1950:2030) +\dontrun{ +plot(1950:2030, as.numeric(colSums(Lxs_name)), xlab = "Year", ylab="e0") +lines(1950:2030, as.numeric(colSums(Lxs_code))) +} +# life expectancy for different countries +Lxs_countries <- downloadnLx(nLx=NULL, location = c("Argentina","Brazil","Uruguay"), +gender = "both", nLxDatesIn = 1950:2025) +\dontrun{ +plot(1950:2025, as.numeric(colSums(Lxs_countries[1:22,])), + t="l", xlab = "Year", ylab="e0", ylim = c(40,80)) +lines(1950:2025, as.numeric(colSums(Lxs_countries[23:44,])), col=2) +lines(1950:2025, as.numeric(colSums(Lxs_countries[45:64,])), col=3) +legend("bottomright",c("Argentina","Brazil","Uruguay"),lty=1,col=1:3) +} } diff --git a/tests/testthat/test-basepop.R b/tests/testthat/test-basepop.R index 9b55f3ba..287a0691 100644 --- a/tests/testthat/test-basepop.R +++ b/tests/testthat/test-basepop.R @@ -652,46 +652,46 @@ test_that("basepop works well with SRBDatesIn", { }) - -test_that("basepop caps nLxDatesIn to 1955 when provided a date below that", { - - tmp_nlx <- c(1954, 1960) - expect_output( - tmp <- - basepop_five( - location = "Spain", - refDate = 1960, - Males_five = smoothed_males, - Females_five = smoothed_females, - SRB = sex_ratio, - nLxDatesIn = tmp_nlx, - AsfrDatesIn = c(1955, 1960), - method = "linear", - radix = 100000 - ), - regexp = "nLxDate\\(s\\) 1954 is/are below 1955\\. Capping at 1955", - all = FALSE - ) - - tmp_asfr <- c(1954, 1960) - expect_output( - tmp <- - basepop_five( - location = "Spain", - refDate = 1960, - Males_five = smoothed_males, - Females_five = smoothed_females, - SRB = sex_ratio, - nLxDatesIn = c(1955, 1960), - AsfrDatesIn = tmp_asfr, - method = "linear", - radix = 100000 - ), - regexp = "AsfrDate\\(s\\) 1954 is/are below 1955\\. Capping at 1955", - all = FALSE - ) - -}) +# IW: no need capping at 1955 +# test_that("basepop caps nLxDatesIn to 1955 when provided a date below that", { +# +# tmp_nlx <- c(1954, 1960) +# expect_output( +# tmp <- +# basepop_five( +# location = "Spain", +# refDate = 1960, +# Males_five = smoothed_males, +# Females_five = smoothed_females, +# SRB = sex_ratio, +# nLxDatesIn = tmp_nlx, +# AsfrDatesIn = c(1955, 1960), +# method = "linear", +# radix = 100000 +# ), +# regexp = "nLxDate\\(s\\) 1954 is/are below 1955\\. Capping at 1955", +# all = FALSE +# ) +# +# tmp_asfr <- c(1954, 1960) +# expect_output( +# tmp <- +# basepop_five( +# location = "Spain", +# refDate = 1960, +# Males_five = smoothed_males, +# Females_five = smoothed_females, +# SRB = sex_ratio, +# nLxDatesIn = c(1955, 1960), +# AsfrDatesIn = tmp_asfr, +# method = "linear", +# radix = 100000 +# ), +# regexp = "AsfrDate\\(s\\) 1954 is/are below 1955\\. Capping at 1955", +# all = FALSE +# ) +# +# }) test_that("basepop fails when it implies an extrapolation of > 5 years", {