From 8332200fd238df804b9289816622eaf2ab4b8bb4 Mon Sep 17 00:00:00 2001 From: Edouard Date: Fri, 13 Oct 2023 16:29:56 -0500 Subject: [PATCH] rev add dummy data function --- README.Rmd | 19 +- README.md | 43 ++- dev/indicators.Rmd | 42 ++- dev/utilities.Rmd | 707 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 785 insertions(+), 26 deletions(-) create mode 100644 dev/utilities.Rmd diff --git a/README.Rmd b/README.Rmd index c6be83d..be03d6f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,15 +18,23 @@ knitr::opts_chunk$set( -The goal of {IndicatorCalc} is to ease the implementation of standard calculation for survey indicators related to Forcibly Displaced Population. +There is broad consensus around the key indicators used to measure, inform and monitor progress towards global development objectives, as exemplified by the Sustainable Development Goals and related efforts of the MICS, DHS, IHSN, together with national governments. UNHCR's objectives are largely aligned with these frameworks. [UNHCR Results Monitoring Surveys (RMS)](https://intranet.unhcr.org/en/support-services/common-good-data-initiatives/household-surveys/Results-Monitoring-Surveys.html) are household-level surveys with standard questionnaires following context-appropriate methodological approaches. They can be implemented across UNHCR operations to monitor changes in the lives of all relevant groups of persons of concern (impacts) and in UNHCR's key areas of engagement (outcomes). RMS help us to calculate impact and outcome indicators in a standardized way to have a global understanding of the results. Both indicators and questionnaire is also largely aligned with MICS, DHS, IHSN, national household surveys and other UNHCR standardized surveys. -The package is designed to work based on dataset standard backup format exported from [kobotoolbox](http://http://kobo.unhcr.org) within [UNHCR internal data repository](http://ridl.unhcr.org). It is adapted from the initial [RMS indicator script](https://github.com/bozdagilgi/UNHCR-RMS-Indicators) +The goal of {IndicatorCalc} is to ease the implementation of standard calculations for survey indicators related to Forcibly Displaced Population. -Each calculation is implemented as a function with in-built check to identify whether the expected variables and modalities are within the dataset and a `mapper` to transform the data in the expected format if needs be. You can check each [function reference](/reference/index.html) to see in details all documented calculations +The package is designed to work based on dataset standard backup format exported from [kobotoolbox](http://http://kobo.unhcr.org) within [UNHCR internal data repository](http://ridl.unhcr.org). It is adapted from the initial [indicator script](https://github.com/bozdagilgi/UNHCR-RMS-Indicators) version. + +Each calculation is implemented as a function with in-built check to identify whether the expected variables and modalities are within the dataset and a `mapper` to transform the data in the expected format in case of divergence of data structure between what was collected and what is expected. You can check each [function reference](/reference/index.html) to see in details all documented calculations + + + + +*Population* refers to survey population in this guidance for the calculation of indicators as shown by enumerator and denominator. +*Denominators* that are representing the households will be obtained by weighting the number of households by the number of household members at the end of the analysis. If there are no weights used it will be used as 'weight' variable for household level indicators. ## Usage -The easiest way to use the package is through the [shiny interface](http://rstudio.unhcr.org/IndicatorCalc) and then follow the instruction from there +The easiest way to use the package is through the [shiny interface](http://rstudio.unhcr.org/IndicatorCalc) and then follow the instruction from there. ## Developpers @@ -41,7 +49,8 @@ devtools::install_github("unhcr-americas/IndicatorCalc") The package development roadmap includes: - - a wrapper to chain all single calculations + - a dummy data generator + - an SDG comparison plot.. - a report template to output a quick exploration of all indicators results - a shiny app to provide access to the package online with Rstudio connect (versus offline rstudio) diff --git a/README.md b/README.md index 456d977..3cbba90 100644 --- a/README.md +++ b/README.md @@ -6,27 +6,53 @@ +There is broad consensus around the key indicators used to measure, +inform and monitor progress towards global development objectives, as +exemplified by the Sustainable Development Goals and related efforts of +the MICS, DHS, IHSN, together with national governments. UNHCR’s +objectives are largely aligned with these frameworks. [UNHCR Results +Monitoring Surveys +(RMS)](https://intranet.unhcr.org/en/support-services/common-good-data-initiatives/household-surveys/Results-Monitoring-Surveys.html) +are household-level surveys with standard questionnaires following +context-appropriate methodological approaches. They can be implemented +across UNHCR operations to monitor changes in the lives of all relevant +groups of persons of concern (impacts) and in UNHCR’s key areas of +engagement (outcomes). RMS help us to calculate impact and outcome +indicators in a standardized way to have a global understanding of the +results. Both indicators and questionnaire is also largely aligned with +MICS, DHS, IHSN, national household surveys and other UNHCR standardized +surveys. + The goal of {IndicatorCalc} is to ease the implementation of standard -calculation for survey indicators related to Forcibly Displaced +calculations for survey indicators related to Forcibly Displaced Population. The package is designed to work based on dataset standard backup format exported from [kobotoolbox](http://http://kobo.unhcr.org) within [UNHCR internal data repository](http://ridl.unhcr.org). It is adapted from the -initial [RMS indicator -script](https://github.com/bozdagilgi/UNHCR-RMS-Indicators) +initial [indicator +script](https://github.com/bozdagilgi/UNHCR-RMS-Indicators) version. Each calculation is implemented as a function with in-built check to identify whether the expected variables and modalities are within the -dataset and a `mapper` to transform the data in the expected format if -needs be. You can check each [function reference](/reference/index.html) -to see in details all documented calculations +dataset and a `mapper` to transform the data in the expected format in +case of divergence of data structure between what was collected and what +is expected. You can check each [function +reference](/reference/index.html) to see in details all documented +calculations + +*Population* refers to survey population in this guidance for the +calculation of indicators as shown by enumerator and denominator. +*Denominators* that are representing the households will be obtained by +weighting the number of households by the number of household members at +the end of the analysis. If there are no weights used it will be used as +‘weight’ variable for household level indicators. ## Usage The easiest way to use the package is through the [shiny interface](http://rstudio.unhcr.org/IndicatorCalc) and then follow the -instruction from there +instruction from there. ## Developpers @@ -40,7 +66,8 @@ devtools::install_github("unhcr-americas/IndicatorCalc") The package development roadmap includes: -- a wrapper to chain all single calculations +- a dummy data generator +- an SDG comparison plot.. - a report template to output a quick exploration of all indicators results - a shiny app to provide access to the package online with Rstudio diff --git a/dev/indicators.Rmd b/dev/indicators.Rmd index 307e393..c19ad56 100644 --- a/dev/indicators.Rmd +++ b/dev/indicators.Rmd @@ -17,6 +17,21 @@ library(testthat) pkgload::load_all(export_all = FALSE) ``` +Indicator functions are designed to work based on data stored as a list - which is + the default structure for a complex hierarchical survey dataset with nested tables + + The default export format from kobotoolbox includes variables names generated as a concatenation of groups and names. + + The indicators calculation are based on specific patterns to be identified within + the variable names. This allow to handle cases where variables and questions would + have been shifted within the sequence of the questionnaire and through different + questions groups. + + The indicator functions also check that the data content is the one expected. + A check log is written to keep track of all issues + + + # Data Wrangling Each indicator calculation is based on predefined frame, variable name and variable value. @@ -1478,22 +1493,23 @@ datalist <- kobocruncher::kobo_data( system.file("test.xlsx", ## mpper mapper <- list( - hierarchy = "main", + hierarchy = "ind", variablemap = data.frame( label = c( - "In general, when anyone in your household is sick, - where do they go to seek care?", - "How long does it take to go there when you use the mode of transport - that you mentioned above?"), - variable = c("HEA01", - "HEA03"), - mappattern = c("HEA01", - "HEA03") ), + "In the past 3 months, did ${name_individual} need to see a health professional for any reason?", + "In the past 3 months, did you receive medical care when needed for the reason above?", + "Why have you been unable to access a medical care in the past 3 months?"), + variable = c("HACC01", + "HACC03", + "HACC04"), + mappattern = c("HACC01", + "HACC03", + "HACC04") ), modalitymap = data.frame( - variable = c( "HEA01", "HEA01" ), - label = c( "Other, specify", "Don't know"), - standard = c("96", "98" ), - map = c("96", "98" ))) + variable = c( "HACC01", "HACC03" ), + label = c( "yes", "yes"), + standard = c( "1","1" ), + map = c("1","1" ))) ## Apply indicator function on datalist datalist <- impact2_3(datalist, mapper ) diff --git a/dev/utilities.Rmd b/dev/utilities.Rmd new file mode 100644 index 0000000..2917920 --- /dev/null +++ b/dev/utilities.Rmd @@ -0,0 +1,707 @@ +--- +title: "Get Started" +output: html_document +editor_options: + chunk_output_type: console +--- + + + + +```{r development, include=FALSE} +library(testthat) +``` + + + + + +```{r development-load} +# Load already included functions if relevant +pkgload::load_all(export_all = FALSE) +``` + + + +```{r development-dataset} +# Run all this chunk in the console directly +# There already is a dataset in the "inst/" directory +# Make the dataset file available to the current Rmd during development +pkgload::load_all(path = here::here(), export_all = FALSE) + + +``` + + +# Generating data from form + +One key function is to generate a dummy dataset based on a specific form structure + +To demonstrate the package we will use the standard questionnaire and then apply + each indicator function to demonstrate them. + +We can then also re-use the function to create dummy data based on any form and then apply + each indicator function to actually verify which indicators can be created based + on the form content. + +## var_dummy + +```{r function-var_dummy} +#' var_dummy +#' +#' Append to an existing dataframe new Generate the variable based on 1. type, 2. constraint and 3.relevance +#' +#' @param frame dataframe object where new dummy varaible will be created... +#' @param name name of the variable to be created +#' @param type type of the variable to be created in line with xlsform - +#' one of the following 6 options: +#' "select_one", "select_multiple", "text", "integer", "numeric", "date" +#' +#' "start ", "end", "calculate", "geopoint","acknowledge", +#' "begin_group", "end_group", "begin_repeat", "end_repeat" , "note" +#' +#' @param list_opt a vector with all possible values for select_one and select_multiple +#' - can be null +#' @param constraint specific constraint - used for for numeric or integer +## @param seed random seed +#' +#' @return dataframe +#' +#' @importFrom withr with_seed +#' @importFrom dplyr mutate n tibble rename +#' @importFrom stats rlnorm +#' @importFrom tidyr unnest +#' +#' @noRd +#' +#' @export +var_dummy <- function( frame, + name, + type, + list_opt, + constraint #, + # seed = 1976 + ) { + ## number of records to be generated depends on the initial frame.. + n <- nrow(frame) + #withr::with_seed(seed = seed) + # withr::with_seed( + # seed = seed, + # suppressWarnings( + + # no content ########### + if( type %in% c("begin_group", "end_group", "begin_repeat", "end_repeat" , "note" )) { + res <- dplyr::tibble( + thisvar = NA) + names(res)[1] <- name + frame <- cbind(frame,res) + } + + + ## acknowledge ########### + if( type %in% c("acknowledge")) { + res <- dplyr::tibble( + thisvar = sample(c(0,1), n, replace = TRUE)) + names(res)[1] <- name + frame <- cbind(frame,res) + } + + + ## select one ########### + if( type %in% c("select_one") ) { + res <- dplyr::tibble( thisvar = sample(c(list_opt), n, replace = TRUE) ) + ## Now rename the variable + names(res)[1] <- name + frame <- cbind(frame,res) + } + + ## select multiple ########### + if( type %in% c("select_multiple")) { + for (modality in list_opt) { + res <- dplyr::tibble( + thisvar = sample(c(0,1), n, replace = TRUE)) + names(res)[1] <- paste0(name, "_",modality ) + frame <- cbind(frame,res) + } + } + + ## stext ########### + if( type %in% c("text")) { + res <- dplyr::tibble( + thisvar = paste0( "FreeText_", + purrr::as_vector( + purrr::map(n, sample(LETTERS, 6)) |> + purrr::map(paste0, collapse = "")), + "-", + formatC(1:n, width = nchar(n) + 1, flag = "0") + )) + ## Now rename the variable + names(res)[1] <- name + frame <- cbind(frame,res) + } + + ## numeric ########### + if( type %in% c("numeric", "integer", "calculate", "geopoint")) { + ## TODO extract elements from constraint to define num1 an num2 + num1 <- 18 + num2 <- 95 + + res <- dplyr::tibble( + thisvar = sample(num1:num2, n, replace = TRUE)) + ## Now rename the variable + names(res)[1] <- name + frame <- cbind(frame,res) + } + + ## date ########### + if( type %in% c("date", "start ", "end")) { + res <- dplyr::tibble( + thisvar = Sys.time() - abs(rnorm(n, 0, sd = 2) * 365 * 24 * 3600)) + ## Now rename the variable + names(res)[1] <- name + frame <- cbind(frame,res) + } + + # ) + # ) + + return(frame) +} +``` + +```{r example-var_dummy} + +## let's initiate a dataframe with an index of n r records +n <- 384 +frame <- dplyr::tibble( + index = paste0( "ID-", + purrr::as_vector( + purrr::map(n, sample(LETTERS, 4)) |> + purrr::map(paste0, collapse = "")), + "-", + formatC(1:n, width = nchar(n) + 1, flag = "0") + )) + +## test inject select_one +frame <- var_dummy( + frame = frame, + name = "testselect_one", + type = "select_one", + list_opt = c("alpha", "beta", "delta"), + constraint = NULL + ) +## test inject select_multiple in main +frame <- var_dummy( + frame = frame, + name = "testselect_multiple", + type = "select_multiple", + list_opt = c("alpha", "beta", "delta"), + constraint = NULL + ) + +## test inject text +frame <- var_dummy( + frame = frame, + name = "testtext", + type = "text", + list_opt = NULL, + constraint = NULL + ) + + +## test inject numeric +frame <- var_dummy( + frame = frame, + name = "testnumeric", + type = "numeric", + list_opt = NULL, + constraint = NULL + ) + +## test inject date +frame <- var_dummy( + frame = frame, + name = "testdate", + type = "date", + list_opt = NULL, + constraint = NULL + ) + +## Preview out out +knitr::kable(head(frame, 5)) +``` + +```{r tests-var_dummy} +test_that("var_dummy works", { + expect_true(inherits(var_dummy, "function")) +}) +``` + + +## kobo_dummy + + +```{r function-kobo_dummy} +#' kobo_dummy +#' +#' @description Automatically produce an dummy dataset in line with the structure +#' of an XlsForm +#' Making decisions about research design and analysis strategies is often difficult before data is collected, +#' because it is hard to imagine the exact form data will take. +#' This function helps imagine what data will look like before they collect it. +#' samplesize is set per defautl at 500 records +#' +#' Supported Features: +#' - Generate a data set +#' - respects ODK structure "`relevant`" skip logic +#' (Some advanced functionality such as "coalesce()" not covered) +#' - respects "`constraint`" +#' - respects "`repeat`" adds ID column to link hierarchical data based on "`repeat_count`" + +#' @reference the function is a rewriting of +#' https://unhcr.github.io/koboloadeR/docs/reference/kobo_dummy.html +#' it also build on https://thinkr-open.github.io/fakir/ and +#' https://docs.ropensci.org/charlatan/ +#' +#' @param form file path of the XlsForm +#' @param n number of main records to be generated +#' @param nrepeat max random number of repeat records to be generated +#' when repeat_count is not mentionned +#' @param file file as xlsx where to save to the +#' +#' @importFrom kobocruncher kobo_dico +#' +#' +#' @return a data list with a series of dummy data +#' +#' @export +kobo_dummy <- function(form, + n = 384, + file){ + + dico <- kobocruncher::kobo_dico(xlsformpath = form) + + ## Get the variables to extract + frame <- "main" + + conf <- data.frame( + name <- dico[["variables"]] |> + dplyr::filter(repeatvar == frame) |> + dplyr::select(name), + + # dico[["variables"]]|> + # dplyr::filter(repeatvar == frame) |> + # dplyr::distinct(type) + type <- dico[["variables"]]|> + dplyr::filter(repeatvar == frame) |> + dplyr::select(type), + + ## pulling list options... + list_opt <- dico[["variables"]] |> + dplyr::filter(repeatvar == frame) |> + dplyr::select(list_name) , + #dplyr::left_join(modal, by = c("list_name")) |> + # dplyr::select( list_opt) + + + constraint <- dico[["variables"]]|> + dplyr::filter(repeatvar == frame) |> + dplyr::select(constraint) + ) + + ## We start with the generation of the main frame + ## Build the main table and initiate it an index..... + main <- dplyr::tibble( + index = paste0( "ID-", + purrr::as_vector( + purrr::map(n, sample(LETTERS, 4)) |> + purrr::map(paste0, collapse = "")), + "-", + formatC(1:n, width = nchar(n) + 1, flag = "0") + )) + + ## then apply var_dummy interactively... + for(i in (1:nrow(conf)) ) { + # i <- 6 + cat(paste0(i, "-", conf[i, c("type")], "-", conf[i, c("name")], "\n")) + + ## manage specific case when list name is not defined in ccoices but pulled from data.. + ## in such case we replace type by select_one by text + this.type <- conf[i, c("type")] + this.name <- conf[i, c("name")] + this.listname <- conf[i , c("list_name")] + this.constraint = conf[i, c("constraint")] + + if( !(this.listname %in% c( dico[["modalities"]] |> dplyr::pull(list_name))) ) { + this.listname <- NULL + this.type <- "text" + } + + main <- var_dummy( + frame = main, + name = this.name, + type = this.type, + list_opt = c(dico[["modalities"]] |> + dplyr::filter(list_name == this.listname ) |> + dplyr::pull(name) ), + constraint = this.constraint ) + } + + ## Still struggling with purrr + # test <- purrr::map( main, + # name, + # type, + # list_opt$list_opt , + # constraint , + # var_dummy) + + ## TODO -- apply the relevance statement... + ## define when the variable should exist based on other elements + ## Need to convert the xlsform statement into R syntax + # relevant <- dico[["variables"]]|> + # dplyr::filter(repeatvar == "main") |> + # dplyr::pull(relevant) + + ## replace \" by ' + ## replace '=' by '==" + ## replace '!==' by '!=" (to fix previsous replace) + ## replace '${' '}' with nothing.. + ## replace 'or' with '|'.. + ## replace 'and' with '&'.. + + ## Now store in the list... + datalist <- list ( "main" = main) + + ## treat cases with repeat table.. ##### + repeatvar <- dico[["variables"]] |> + dplyr::select(repeatvar) |> + dplyr::filter(repeatvar != "main") |> + dplyr::distinct() |> + dplyr::pull() + ## need to separate the different repeat elements wthin the form... + for ( rep in repeatvar ) { + + confrep <- data.frame( + name <- dico[["variables"]] |> + dplyr::filter(repeatvar == rep) |> + dplyr::select(name), + + # dico[["variables"]]|> + # dplyr::filter(repeatvar == rep) |> + # dplyr::distinct(type) + type <- dico[["variables"]]|> + dplyr::filter(repeatvar == rep) |> + dplyr::select(type), + + ## pulling list options... + list_opt <- dico[["variables"]] |> + dplyr::filter(repeatvar == rep) |> + dplyr::select(list_name) , + #dplyr::left_join(modal, by = c("list_name")) |> + # dplyr::select( list_opt) + + + constraint <- dico[["variables"]]|> + dplyr::filter(repeatvar == rep) |> + dplyr::select(constraint) + ) + + ## We start with the generation of the main frame + ## Build the main table and initiate it an index..... + repframe <- dplyr::tibble( + index = paste0( "ID-", + purrr::as_vector( + purrr::map(n, sample(LETTERS, 4)) |> + purrr::map(paste0, collapse = "")), + "-", + formatC(1:n, width = nchar(n) + 1, flag = "0") + )) + + ## then apply var_dummy interactively... + for(i in (1:nrow(confrep)) ) { + # i <- 6 + cat(paste0(i, " in repeat -", rep, " ///", confrep[i, c("type")], "-", confrep[i, c("name")], "\n")) + + ## manage specific case when list name is not defined in ccoices but pulled from data.. + ## in such case we replace type by select_one by text + this.type <- confrep[i, c("type")] + this.name <- confrep[i, c("name")] + this.listname <- confrep[i , c("list_name")] + this.constraint = confrep[i, c("constraint")] + + if( !(this.listname %in% c( dico[["modalities"]] |> dplyr::pull(list_name))) ) { + this.listname <- NULL + this.type <- "text" + } + + repframe <- var_dummy( + frame = repframe, + name = this.name, + type = this.type, + list_opt = c(dico[["modalities"]] |> + dplyr::filter(list_name == this.listname ) |> + dplyr::pull(name) ), + constraint = this.constraint ) + } + + ## TODO -- check if we have a `repeat_count` to apply limitation.. + + ## append to the repeat + datalist[[rep]] <- repframe + } + + + + + return(datalist) + } +``` + +```{r example-kobo_dummy} +form <- system.file("RMSCAPI.xlsx", package = "IndicatorCalc") +datalist <- kobo_dummy(form, + n = 384, + file = NULL) +``` + +```{r tests-kobo_dummy} +test_that("kobo_dummy works", { + expect_true(inherits(kobo_dummy, "function")) +}) +``` + + +## plot_rbm_sdg - SDG Comparison + +```{r function-plot_rbm_sdg} +#' @title Plot SDG Indicator for a country +#' @description The function is designed to help the interpretation of survey +#' results by providing a comparison guidelines. It displays a chart to have a +#' base of comparison when analyzing the results of UNHCR of UNHCR Result monitoring +#' survey. It pulls the values published within the UN Registry of SDG Indicator values +#' The function includes a mapping table between RBM/RMS and SDG and +#' extract the data from https://unstats.un.org/SDGAPI/swagger/ using the package SDGsR +#' https://drmattg.github.io/SDGsR/articles/Introduction_to_SDGsR.html +#' devtools::install_github("DrMattG/SDGsR", dependencies = TRUE) +#' @param country iso3 code for the country (easier to recall than the M49 used in the API) +#' @param rbm the RBM variable name - that can match SDG +#' @param years years to filter the chart - for instance c(2000,2022) +#' +#' @importFrom SDGsR get_indicator +#' @importFrom ForcedDisplacementStat reference +#' @importFrom janitor clean_names +#' @importFrom stringr str_replace +#' +#' @return a ggplot2 object +plot_rbm_sdg <- function( country = "BRA", + rbm = "outcome16_2", + years = c(2000, 2022)) { + require(ggplot2) + require(dplyr) + + ## Below is a mapping table between indicators codes for UNHCR and SDG... +sdg_rbm <- structure(list( + indicator = c("1.4.1", "3.8.1", "16.9.1", "7.1.2", + "7.1.1", "3.1.2", "6.1.1", "6.2.1", + "8.10.2", "8.5.2", "1.4.2", "1.3.1", "16.1.4"), + + SDG_indic = c( + "1.4.1 Proportion of population living in households with access to basic services", + "3.8.1 Coverage of essential health services (defined as the average coverage of + essential services based on tracer interventions that include reproductive, maternal, + newborn and child health, infectious diseases, non-communicable diseases and service + capacity and access, among the general and the most disadvantaged population)", + "16.9.1 Proportion of children under 5 years of age whose births have been registered + with a civil authority, by age", + "7.1.2 Proportion of population with primary reliance on clean fuels and technology", + "7.1.1 Proportion of population with access to electricity", + "3.1.2 Proportion of births attended by skilled health personnel", + "6.1.1 Proportion of population using safely managed drinking water services", + "6.2.1 Proportion of population using (a) safely managed sanitation services and (b) + a hand-washing facility with soap and water", + "8.10.2 Proportion of adults (15 years and older) with an account at a bank or other + financial institution or with a mobile-money-service provider", + "8.5.2 Unemployment rate, by sex, age and persons with disabilities", + "1.4.2 Proportion of total adult population with secure tenure rights to land, (a) + with legally recognized documentation, and (b) who perceive their rights to land as + secure, by sex and type of tenure", + "1.3.1 Proportion of population covered by social protection floors/systems, by sex, + distinguishing children, unemployed persons, older persons, persons with disabilities, + pregnant women, newborns, work-injury victims and the poor and the vulnerable", + "16.1.4 Proportion of population that feel safe walking alone around the area they live"), + + RMS = c("impact2_2", "impact2_3", "outcome1_2", "outcome8_2", + "outcome9_2", "outcome10_2", "outcome12_1", "outcome12_2", "outcome13_1", + "outcome13_3", "outcome16_1", "outcome16_2", "impact3_3"), + + target = c("1.4", + "3.8", "16.9", "7.1", "7.1", "3.1", "6.1", "6.2", "8.10", "8.5", + "1.4", "1.3", "16.1"), + + description = c("By 2030, ensure that all men and women, in particular the poor and + the vulnerable, have equal rights to economic resources, as well as + access to basic services, ownership and control over land and other + forms of property, inheritance, natural resources, appropriate new + technology and financial services, including microfinance", + "Achieve universal health coverage, including financial risk protection, access to + quality essential health-care services and access to safe, effective, quality and + affordable essential medicines and vaccines for all", + "By 2030, provide legal identity for all, including birth registration", + "By 2030, ensure universal access to affordable, reliable and modern energy services", + "By 2030, ensure universal access to affordable, reliable and modern energy services", + "By 2030, reduce the global maternal mortality ratio to less than 70 per 100,000 live births", + "By 2030, achieve universal and equitable access to safe and affordable drinking water for all", + "By 2030, achieve access to adequate and equitable sanitation and hygiene for all and + end open defecation, paying special attention to the needs of women and girls and those + in vulnerable situations", "Strengthen the capacity of domestic financial institutions + to encourage and expand access to banking, insurance and financial services for all", + "By 2030, achieve full and productive employment and decent work for all women and men, + including for young people and persons with disabilities, and equal pay for work of equal value", + "By 2030, ensure that all men and women, in particular the poor and the vulnerable, + have equal rights to economic resources, as well as access to basic services, ownership + and control over land and other forms of property, inheritance, natural resources, + appropriate new technology and financial services, including microfinance", + "Implement nationally appropriate social protection systems and measures for all, + including floors, and by 2030 achieve substantial coverage of the poor and the vulnerable", + "Significantly reduce all forms of violence and related death rates everywhere")), + class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, + -13L)) + + ctry <- ForcedDisplacementStat::reference |> + filter(iso_3 == country ) |> + select(iso_3, ctryname, M49_code) + + sdg_code_label <- sdg_rbm |> + filter(RMS == rbm ) + + sdg_code <- sdg_rbm |> + filter(RMS == rbm ) |> + pull(indicator) + + #cat(paste0("\n \n -------\n Pulling RMS Host country comparison data for ", ctry$ctryname, "\n\n")) + #cat(paste0("Retrieving now : ", sdg_code_label$SDG_indic , "\n")) + Ind1 <- SDGsR::get_indicator(Country= ctry$M49_code, indicator= sdg_code) + + ## Check if the API did return something + if(nrow(Ind1) == 0) { + #cat(paste0("\n ಠ_ಠ \n No data rcould be etrieved from API for ", sdg_code_label$SDG_indic , " in country: ", ctry$ctryname ,"n\n")) + p <- ggplot() + + annotate("text", x = 1, y = 1, size = 10, + label = stringr::str_wrap(paste0("\n ಠ_ಠ \n No data could be retrieved from UNStat API for: ", + sdg_code_label$SDG_indic , " (related to UNHCR Indicator ", rbm , + ") in country: ", + ctry$ctryname ,"."), 50 ) ) + theme_void() + + + return(p) + } else { + # cat(paste0("Done! ", nrow(Ind1), " records gathered ヽ(´▽`)/ \n\n")) + + ## Remove rows when value is NaN + Ind1 <- Ind1[ !(is.nan(Ind1$value)), ] + ## Replace NaN by NA for upper and lower Bound + Ind1$upperBound <- ifelse(is.nan(Ind1$upperBound), NA , Ind1$upperBound ) + Ind1$lowerBound <- ifelse(is.nan(Ind1$lowerBound), NA , Ind1$lowerBound ) + + #cat ("sleep 3 sec between each indicators...") + #Sys.sleep(3) + + + #Ind1$target <- as.character(Ind1$target ) + Ind1 <- Ind1 %>% + as.data.frame() %>% + janitor::clean_names() %>% + ## Cleaning a few country name for better legibility + dplyr::mutate ( geo_area_name = stringr::str_replace(geo_area_name, "Venezuela \\(Bolivarian Republic of\\)", "Venezuela"), + geo_area_name = stringr::str_replace(geo_area_name, "Bolivia \\(Plurinational State of\\)", "Bolivia")) #%>% + #dplyr::left_join(Goals, by="target") + #names(Ind1) + + ## remove all empty disaggregation + # Ind2 <- as.data.frame(Ind1[ , colSums(is.na(Ind1)) == 0]) + Ind2 <- Ind1%>% + select(series_description, geo_area_name, time_period_start,value ) %>% + group_by(series_description, geo_area_name, time_period_start) %>% + ## Aggregate base on Unit type - aka sum or average + summarize(valmean = mean(as.numeric(value)), + valsum = sum(as.numeric(value)) )%>% + #So we overprint in the right order + arrange(time_period_start) #%>% + + if( unique(Ind1$attributes$Units) %in% c("NUM_TH" , "NUMBER") ) { + Ind2$value <- Ind2$valsum + } else { + Ind2$value <- Ind2$valmean + } + # names(Ind2) + p <- ggplot(data = Ind2, + aes(x = time_period_start, + y = value, + group = 1)) + + geom_line( linewidth = 1.5, color = "#0072BC" ) + + geom_point(shape =15, size = 2, color = "#0072BC") + + facet_wrap(~ series_description) + + #scale_y_continuous(labels = unhcRstyle::format_si()) + + scale_x_continuous(limits = years) + + #geom_hline(yintercept = 0, size = 0.7, colour = "#333333") + + unhcrthemes::theme_unhcr(font_size = 20) + ## Insert UNHCR Style + theme(panel.grid.major.y = element_line(color = "#cbcbcb"), + panel.grid.major.x = element_blank(), + panel.grid.minor = element_blank(), + #panel.grid.major.x = element_blank(), + legend.position="none", + strip.text.x = element_text(size = 8)) + + labs(title = stringr::str_wrap(paste0( ctry$ctryname, ": ",sdg_code_label$description ), 80), + subtitle = stringr::str_wrap( paste0(sdg_code_label$SDG_indic, " (related to UNHCR Indicator ", rbm, ")"), 100), + x = " ", + y = paste0(unique(Ind1$attributes$Units)), + caption = stringr::str_wrap( paste0("Source: ", unique(Ind1$source), ", Data extracted from UNStat API"), 100)) + + # gghighlight::gghighlight(value >= mean(dfna1$value), + # #value - sdval, + # use_direct_label = FALSE) + + # geom_tile( data = yearfocus, + # aes(x = year, + # y = value, + # fill = as_factor((decade/10)%%2)), + # show.legend = FALSE) + + # scale_fill_manual(values = c("0" = "white", + # "1" = "#99999922")) + + return(p) + + } + +} +``` + +```{r example-plot_rbm_sdg} +plot_rbm_sdg( country = "BRA", + rbm = "impact2_2", + years = c(2000, 2022)) + + unhcrthemes::theme_unhcr(font_size = 10) +``` + +```{r tests-plot_rbm_sdg} +test_that("plot_rbm_sdg works", { + expect_true(inherits(plot_rbm_sdg, "function")) +}) +``` + + + + + + + + +```{r development-inflate, eval=FALSE} +# Keep eval=FALSE to avoid infinite loop in case you hit the knit button +# Execute in the console directly +fusen::inflate(flat_file = "dev/flat_full.Rmd", vignette_name = "Get started") +``` + +