Skip to content

Commit

Permalink
solving cheks for download_utils
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanWilli committed Apr 3, 2021
1 parent fa427d2 commit fc812e7
Show file tree
Hide file tree
Showing 6 changed files with 230 additions and 121 deletions.
Binary file modified .RData
Binary file not shown.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export(check_heaping_sawtooth)
export(check_heaping_spoorenberg)
export(check_heaping_whipple)
export(dec.date)
export(downloadAsfr)
export(downloadSRB)
export(downloadnLx)
export(fetch_wpp_births)
Expand Down
202 changes: 127 additions & 75 deletions R/utils_downloads.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,42 @@
# and potentially others.

#' Extract Lx estimates from WPP2019
#' @description We use the `FetchLifeTableWpp2019` function of the `fertestr` to extract `Lx` from `wpp2019`, interpolated to an exact date.
#' @param nLx either `NULL` or a numeric vector of lifetable exposure. If it's the second then we just pass it back.
#' @param location UN Pop Div `LocName` or `LocID`
#' @param gender `"male"`, `"female"`, or `"both"`
#' @param nLxDatesIn numeric vector of three decimal dates produced by (or passed through) `basepop_five()`
#' @description We extract `Lx` from `wpp2019`, interpolated to exact dates. Different methods availables.
#' A vector of countries can handle, but with an unique sex. Row names are not indicative of countries.
#' @param nLx numeric. either `NULL` or a numeric vector of lifetable exposure. If it's the second then we just pass it back.
#' @param location vector. UN Pop Div `LocName` or `LocID`
#' @param gender character. `"male"`, `"female"`, or `"both"`
#' @param nLxDatesIn numeric. Vector of three decimal dates produced by (or passed through) `basepop_five()`
#' @param method character. Could be `"linear"`, `"exponential"`, or `"power"`
#'
#' @return numeric matrix of `nLx` with `length(nLxDatesIn)` and abrdiged ages in rows.
#' @export
#'
downloadnLx <- function(nLx, location, gender, nLxDatesIn) {
#' @examples
#' # life expectancy calculated from Lx downloaded from WPP19. Using names or codes.
#' Lxs_name <- downloadnLx(nLx=NULL, location = "Argentina",
#' gender = "both", nLxDatesIn = 1950:2030)
#' Lxs_code <- downloadnLx(nLx=NULL, location = "32",
#' gender = "both", nLxDatesIn = 1950:2030)
#' \dontrun{
#' plot(1950:2030, as.numeric(colSums(Lxs_name)), xlab = "Year", ylab="e0")
#' lines(1950:2030, as.numeric(colSums(Lxs_code)))
#' }
#' # life expectancy for different countries
#' Lxs_countries <- downloadnLx(nLx=NULL, location = c("Argentina","Brazil","Uruguay"),
#' gender = "both", nLxDatesIn = 1950:2025)
#' \dontrun{
#' plot(1950:2025, as.numeric(colSums(Lxs_countries[1:22,])),
#' t="l", xlab = "Year", ylab="e0", ylim = c(40,80))
#' lines(1950:2025, as.numeric(colSums(Lxs_countries[23:44,])), col=2)
#' lines(1950:2025, as.numeric(colSums(Lxs_countries[45:64,])), col=3)
#' legend("bottomright",c("Argentina","Brazil","Uruguay"),lty=1,col=1:3)
#' }
downloadnLx <- function(nLx, location, gender, nLxDatesIn, method="linear") {

requireNamespace("fertestr", quietly = TRUE)
requireNamespace("magrittr", quietly = TRUE)
verbose <- getOption("basepop_verbose", TRUE)

if (!is.null(nLx)) {
# TR: ensure colnames passed
nLx <- as.matrix(nLx)
Expand All @@ -28,55 +51,88 @@ downloadnLx <- function(nLx, location, gender, nLxDatesIn) {

if (is.null(nLx)){

if (is.null(location)) stop("You need to provide a location to download the data for nLx")

# stop/warnings
if (is.null(location)){
stop("You need to provide a location to download the data for nLx")
}
if (!any(is_LocID(location))) {
location_code <- get_location_code(location)
}else {
location_code <- as.integer(location)
}

if (verbose) {
cat(paste0("Downloading nLx data for ", location, ", years ", paste(nLxDatesIn,collapse=", "), ", gender ", gender), sep = "\n")
}
if(any(nLxDatesIn<1950,nLxDatesIn>2025)){
cat("Careful, extrapolating beyond range 1950-2025")
}

. <- NULL
# handle sex
sex_code <- ifelse(tolower(gender) == "both", "b",
ifelse(tolower(gender) == "female", "f",
ifelse(tolower(gender) == "male", "m", NA)))
Sex_mortlaws <- ifelse(sex_code == "b", "total", tolower(gender))
stopifnot(`Invalid sex name, please set it to 'both', 'male' or 'female'` = !is.na(sex_code))

ind_invalidyear <- which(nLxDatesIn < 1955)
if (length(ind_invalidyear) != 0) {
invalid_yrs <- paste0(nLxDatesIn[ind_invalidyear], collapse = ", ")
cat("nLxDate(s)", invalid_yrs, "is/are below 1955. Capping at 1955\n")
nLxDatesIn[ind_invalidyear] <- 1955
}
# initial data
lt_wpp19 <-DemoToolsData::WPP2019_lt

nLx <-
lapply(nLxDatesIn, function(x) {
fertestr::FetchLifeTableWpp2019(location, x, gender)$Lx
}) %>%
do.call("cbind", .) %>%
as.matrix()
# filter and matrix shape
lt_ctry <- lt_wpp19[lt_wpp19$LocID %in% location_code &
lt_wpp19$Sex %in% sex_code,] %>% as.data.frame() %>%
reshape(data = .,
direction = "wide", idvar = c("LocID","AgeStart","Sex"),
timevar = "Year", v.names = "mx", drop = c("AgeSpan","lx"))

# intert/extrap rates and built life tables for each combination location/Sex/Year
.<-NULL
out <- cbind(lt_ctry[,c(1:3)],
interp(lt_ctry[,-c(1:3)],
seq(1953,2023,5), as.numeric(nLxDatesIn),
extrap = TRUE, method = method) %>%
as.data.frame() %>%
setNames(as.character(nLxDatesIn))
) %>%
split(., list(lt_ctry$LocID, lt_ctry$Sex)) %>%
lapply(function(X){
Age <- X[["AgeStart"]]
apply(X[,-c(1:3)] %>%
as.data.frame()%>% setNames(as.character(nLxDatesIn)), 2,
function(S){
MortalityLaws::LifeTable(x = Age,
mx = S,
lx0 = 1,
sex = Sex_mortlaws)$lt$Lx
})
}) %>%
do.call("rbind", .)

colnames(nLx) <- nLxDatesIn
n <- nrow(nLx)
Age <- c(0,1,seq(5,(n-2)*5,by=5))
rownames(nLx) <- Age
return(nLx)
# combination as rowname
rownames(out) <- lt_ctry$AgeStart

return(out)
}
}

#' Extract ASFR estimates from WPP2019
#' @description We use the `FetchFertilityWpp2019` function of the `fertestr` to extract `asfr` from `wpp2019`, interpolated to an exact date.
#' @param location UN Pop Div `LocName` or `LocID`
#' @param AsfrDatesIn numeric vector of decimal dates.
#' @param extrap_50s logical, shall we extrapolate between 1950 and 1955?
#' @description We extract `ASFRx` from `wpp2019`, interpolated to exact dates. Different methods availables.
#' A vector of countries can handle, but with an unique sex. Row names are not indicative of countries.
#' @param Asfrmat numeric.
#' @param location vector. UN Pop Div `LocName` or `LocID`
#' @param AsfrDatesIn numeric. Vector of decimal dates.
#' @param method character. Could be `"linear"`, `"exponential"`, or `"power"`
#'
#' @return numeric matrix interpolated asfr
#' @export
#' @examples
#' # Kenya example
#' kenya_noext <- downloadAsfr(Asfrmat = NULL, location = "Kenya", AsfrDatesIn = 1950:2050)
#' kenya_ext <- downloadAsfr(Asfrmat = NULL, location = "Kenya", AsfrDatesIn = 1950:2050, T)
#' # Total fertility ratio calculated from ASFRx downloaded from WPP19.
#' # See `downloadnLx` for analogous examples on multiple countries or using codes instead of names.
#' ASFR_Arg <- downloadAsfr(Asfrmat = NULL, location = "Argentina", AsfrDatesIn = 1950:2025)
#' \dontrun{
#' plot(1950:2050, colSums(kenya_noext) * 5, t="l", main="TFR Kenya")
#' lines(1950:2050, colSums(kenya_ext) * 5, col=2)
#' legend("topright",c("NoExtr","Extr"),lty=1,col = 1:2)
#' plot(1950:2025, as.numeric(colSums(ASFR_Arg))*5, xlab = "Year", ylab="TFR", ylim=c(1.5,4), t="l")
#' }

downloadAsfr <- function(Asfrmat, location = NULL, AsfrDatesIn, extrap_50s = FALSE) {
downloadAsfr <- function(Asfrmat, location = NULL, AsfrDatesIn, method="linear") {
requireNamespace("fertestr", quietly = TRUE)
verbose <- getOption("basepop_verbose", TRUE)

Expand All @@ -85,47 +141,43 @@ downloadAsfr <- function(Asfrmat, location = NULL, AsfrDatesIn, extrap_50s = FAL
return(Asfrmat)
}

if (is.null(location)) stop("You need to provide a location to download the data for Asfrmat")

ind_invalidyear <- which(AsfrDatesIn < 1955)
if (length(ind_invalidyear) != 0) {
invalid_yrs <- paste0(AsfrDatesIn[ind_invalidyear], collapse = ", ")
if(!extrap_50s) cat("AsfrDate(s)", invalid_yrs, "is/are below 1955. Capping at 1955\n")
AsfrDatesIn[ind_invalidyear] <- 1955
# stop/warnings
if (is.null(location)){
stop("You need to provide a location to download the data for Asfrmat")
}
if (!any(is_LocID(location))) {
location_code <- get_location_code(location)
}else {
location_code <- as.integer(location)
}

if (verbose) {
cat(paste0("Downloading Asfr data for ",
loc_message(location),
", years ",
paste0(AsfrDatesIn),collapse=", "), sep = "\n")
cat(paste0("Downloading ASFR data for ", location, ", years ", paste(AsfrDatesIn,collapse=", ")), sep = "\n")
}

tmp <-
lapply(AsfrDatesIn, function(x) {
res <- fertestr::FetchFertilityWpp2019(location, x)["asfr"]
names(res) <- NULL
as.matrix(res)[2:nrow(res), , drop = FALSE]
})

Asfrmat <- do.call(cbind, tmp)
colnames(Asfrmat) <- AsfrDatesIn

# IW: add extrap option extrap_50s
# IW: maybe no need to call fertestr::FetchFertilityWpp2019 and then fix first interval.
# just load all WPP2019_asfr, select location, interpolate with interp(...,extrap=TRUE),
# and return only requested years. What do you think?
if(extrap_50s){
minor_years <- sort(unique(AsfrDatesIn))[1:2]
if(sum(1955:1960 %in% minor_years)==1) stop("Need two points at least between 1955 and 1960 in AsfrDatesIn to extrapolate.")
Asfrmat_1950 <- interp(Asfrmat[,as.character(minor_years)],
minor_years,
as.numeric(1950:1955), extrap = TRUE)
Asfrmat <- cbind(Asfrmat_1950,
Asfrmat[,as.numeric(colnames(Asfrmat))>1955])
if(any(AsfrDatesIn<1950,AsfrDatesIn>2025)){
cat("Careful, extrapolating beyond range 1950-2025")
}

Asfrmat
# initial data
asfr_wpp19 <-DemoToolsData::WPP2019_asfr

# spread format
asfr_ctry <- asfr_wpp19[asfr_wpp19$LocID %in% location_code,] %>%
as.data.frame() %>%
reshape(direction = "wide", idvar = c("LocID","AgeStart"),
timevar = "Year", v.names = "ASFR")

# interp/extrap
out <- interp(asfr_ctry[,-c(1:3)], seq(1953,2023,5),
as.numeric(AsfrDatesIn),
extrap = TRUE, method = method) %>%
as.data.frame() %>%
setNames(as.character(AsfrDatesIn)) %>%
as.matrix()

# combination as rowname
rownames(out) <- asfr_ctry$AgeStart

return(out)
}

#' Extract SRB estimates from WPP2019
Expand Down
32 changes: 32 additions & 0 deletions man/downloadAsfr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 30 additions & 6 deletions man/downloadnLx.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit fc812e7

Please sign in to comment.