From 26379190076f868b366d014134771e3219436808 Mon Sep 17 00:00:00 2001
From: jeffeaton <jeffrey.eaton@imperial.ac.uk>
Date: Tue, 17 Oct 2023 17:34:16 +0200
Subject: [PATCH 01/53] add function to read Spectrum KP table

---
 DESCRIPTION         |  2 +-
 NEWS.md             |  5 ++++
 R/inputs-spectrum.R | 57 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index faa7f91b..5cff70f5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: naomi
 Title: Naomi Model for Subnational HIV Estimates
-Version: 2.9.14
+Version: 2.9.15
 Authors@R:
     person(given = "Jeff",
            family = "Eaton",
diff --git a/NEWS.md b/NEWS.md
index 6ad0a84d..36e628e7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,8 @@
+# naomi 2.9.15
+
+* Extract key population totals from the Spectrum PJNZ file. Data are extracted from 
+  summary table saved in AIM Programme Statistics input for key populations.
+
 # naomi 2.9.14
 
 * Fix scrambled translation keys in summary report.
diff --git a/R/inputs-spectrum.R b/R/inputs-spectrum.R
index c4107e1e..482b26b2 100644
--- a/R/inputs-spectrum.R
+++ b/R/inputs-spectrum.R
@@ -688,3 +688,60 @@ read_dp <- function(pjnz) {
   dpfile <- grep(".DP$", utils::unzip(pjnz, list = TRUE)$Name, value = TRUE)
   utils::read.csv(unz(pjnz, dpfile), as.is = TRUE)
 }
+
+
+#' Read key population summary data from PJNZ
+#'
+#' Reads key population summary data from Spectrum PJNZ.
+#'
+#' @param pjnz path to PJNZ file
+#'
+#' @examples
+#' pjnz <- system.file("extdata/demo_mwi2019.PJNZ", package = "naomi")
+#' dp <- dp <- naomi:::read_dp(pjnz)
+#' read_dp_keypop_summary(dp)
+#'
+#' @noRd
+#'
+read_dp_keypop_summary <- function(dp) {
+
+  exists_dptag <- function(tag, tagcol = 1) {
+    tag %in% dp[, tagcol]
+  }
+  dpsub <- function(tag, rows, cols, tagcol = 1) {
+    dp[which(dp[, tagcol] == tag) + rows, cols]
+  }
+
+  kp_name <- c("FSW", "MSM", "TG", "PWID")
+
+  if (exists_dptag("<KeyPops MV>")) {
+    kp_tab <- dpsub("<KeyPops MV>", 2:5, 4:7)
+    kp_tab <- sapply(kp_tab, as.numeric)
+  } else {
+    kp_tab <- matrix(NA, 4, 4)
+  }
+
+  if (exists_dptag("<KeyPopsYear MV>")) {
+    kp_year <- as.integer(dpsub("<KeyPopsYear MV>", 2, 4))
+  } else {
+    kp_year <- NA_integer_
+  }
+
+    if (exists_dptag("<KeyPopsFName MV>")) {
+    kp_file <- as.character(dpsub("<KeyPopsFName MV>", 2, 4))
+  } else {
+    kp_file <- NA_character_
+  }
+
+  kp_summary <- data.frame(
+    key_population = kp_name,
+    year = kp_year,
+    population_size = kp_tab[1, ],
+    hiv_prevalence = kp_tab[2, ],
+    art_coverage = kp_tab[3, ],
+    infections = kp_tab[4, ],
+    workbook_file = kp_file
+  )
+
+  kp_summary
+}

From 34a17306919d4fad4fbcebeaf170c1e762e421aa Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Wed, 1 Nov 2023 15:20:15 +0000
Subject: [PATCH 02/53] initial commit

---
 DESCRIPTION          |    1 +
 R/agyw-integration.R | 1475 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1476 insertions(+)
 create mode 100644 R/agyw-integration.R

diff --git a/DESCRIPTION b/DESCRIPTION
index faa7f91b..41c153cc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -37,6 +37,7 @@ Imports:
     magrittr,
     mvtnorm,
     naomi.options (>= 0.1.6),
+    naomi.resources,
     plotly,
     prettyunits,
     qs,
diff --git a/R/agyw-integration.R b/R/agyw-integration.R
new file mode 100644
index 00000000..17ebe122
--- /dev/null
+++ b/R/agyw-integration.R
@@ -0,0 +1,1475 @@
+#' Format naomi outputs for PSE tool
+#'
+#' @param outputs Naomi output
+#' @param options Naomi model options.
+#'
+#'
+#' @return District level FSW estimates by 5-year age bands for ages 15-49.
+#'
+#' @export
+
+agyw_format_naomi <- function(outputs, options){
+
+  naomi_ind <- outputs$indicators %>%
+    dplyr::filter(indicator %in% c("population", "plhiv", "infections","incidence"),
+                  calendar_quarter == options$calendar_quarter_t2,
+                  area_level == options$area_level) %>%
+    dplyr::mutate(mean = dplyr::if_else(indicator == "incidence", mean/100, mean))
+
+
+
+  summarise_naomi_ind <- function(dat, age_cat) {
+
+    if(age_cat == "Y015_024"){age_groups <- c("Y015_019", "Y020_024")}
+    if(age_cat == "Y025_049"){age_groups <- c("Y025_029","Y030_034","Y035_039",
+                                              "Y040_044", "Y045_049")}
+
+    dat %>%
+      dplyr::select(area_id, area_name, age_group, sex, indicator, mean) %>%
+      tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
+      dplyr::group_by(area_id, area_name, sex) %>%
+      dplyr::summarise(
+        "population" = sum(population * as.integer(age_group %in% age_groups)),
+        "plhiv" = sum(plhiv * as.integer(age_group %in% age_groups)),
+        "infections" = sum(infections * as.integer(age_group %in% age_groups)),
+        .groups = "drop") %>%
+      dplyr::mutate(age_group = age_cat,
+                    incidence = (infections/(population-plhiv))*100) %>%
+      tidyr::pivot_longer(cols = c(population, plhiv, infections, incidence),
+                          names_to = "indicator",
+                          values_to = "mean") %>%
+      dplyr::mutate(age_group_label = dplyr::if_else(age_group == "Y015_024", "15-24", "25-49"))
+  }
+
+  # Naomi indicators for aggregate age groups
+  df1 <- dplyr::bind_rows(summarise_naomi_ind(naomi_ind, "Y015_024"),
+                          summarise_naomi_ind(naomi_ind, "Y025_049"))
+
+  # Naomi indicators for 5-year age groups + 15-49
+  df2 <- naomi_ind %>%
+    dplyr::filter(age_group %in% c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
+                                   "Y035_039", "Y040_044", "Y045_049", "Y015_049")) %>%
+    dplyr::select(names(df1)) %>%
+    # Add aggregate indicators
+    dplyr::bind_rows(df1) %>%
+    # Format for workbook
+    dplyr::mutate(indicator = dplyr::recode(indicator,
+                                            "population" = "Pop", "plhiv" = "PLHIV",
+                                            "infections" = "new","incidence" = "Inci"),
+                  sex = dplyr::recode(sex,
+                                      "female" = "f", "male" = "m",
+                                      "both" = "all"),
+                  mean = as.character(mean))
+
+  # Incidence categories
+  df3 <- df2 %>%
+    dplyr::filter(indicator == "Inci") %>%
+    dplyr::mutate(mean = dplyr::case_when(mean<0.003 ~ "Low",
+                                          mean>=0.003 & mean<0.01 ~ "Moderate",
+                                          mean>=0.01 & mean<0.03 ~ "High",
+                                          mean>=0.03 ~ "Very High",
+                                          TRUE ~ NA_character_),
+                  indicator = "Incicategory")
+
+  # Incidence for all age groups + sexes
+  df4 <- naomi_ind %>%
+    dplyr::filter(indicator == "incidence", age_group == "Y000_999", sex == "both")
+
+  country <- outputs$meta_area$area_name[outputs$meta_area$area_id == options$area_scope]
+
+
+  # Format
+  naomi_wide <- dplyr::bind_rows(df2, df3) %>%
+  tidyr::pivot_wider(id_cols = c(area_id,area_name),
+                     names_from = c(indicator,age_group_label,sex),
+                     names_sep = "", values_from = mean) %>%
+    dplyr::mutate(Country = country, newAll = df4$mean) %>%
+    dplyr::select(Country,area_id,area_name,`Pop15-24all`,`Pop15-24f`,`Pop15-24m`,
+                  `PLHIV15-24all`,`PLHIV15-24f`,`PLHIV15-24m`,
+                  newAll, `new15-24all`,`new15-24f`,`new15-24m`,
+                  `Inci15-24f`,`Incicategory15-24f`,`Inci15-24m`,`Incicategory15-24m`,
+                  `Pop15-19all`,`Pop15-19f`,`Pop15-19m`,
+                  `PLHIV15-19all`,`PLHIV15-19f`,`PLHIV15-19m`,
+                  `new15-19all`,`new15-19f`,`new15-19m`,
+                  `Inci15-19f`,`Incicategory15-19f`,`Inci15-19m`,`Incicategory15-19m`,
+                  `Pop20-24all`,`Pop20-24f`,`Pop20-24m`,
+                  `PLHIV20-24all`,`PLHIV20-24f`,`PLHIV20-24m`,
+                  `new20-24all`,`new20-24f`,`new20-24m`,
+                  `Inci20-24f`,`Incicategory20-24f`,`Inci20-24m`,`Incicategory20-24m`,
+                  `Pop25-49all`,`Pop25-49f`,`Pop25-49m`,
+                  `PLHIV25-49all`,`PLHIV25-49f`,`PLHIV25-49m`,
+                  `new25-49all`,`new25-49f`,`new25-49m`,
+                  `Inci25-49f`,`Incicategory25-49f`,`Inci25-49m`,`Incicategory25-49m`,
+                  `Pop25-29all`,`Pop25-29f`,`Pop25-29m`,
+                  `PLHIV25-29all`,`PLHIV25-29f`,`PLHIV25-29m`,
+                  `new25-29all`,`new25-29f`,`new25-29m`,
+                  `Inci25-29f`,`Incicategory25-29f`,`Inci25-29m`,`Incicategory25-29m`,
+                  `Pop30-34all`,`Pop30-34f`,`Pop30-34m`,
+                  `PLHIV30-34all`,`PLHIV30-34f`,`PLHIV30-34m`,
+                  `new30-34all`,`new30-34f`,`new30-34m`,
+                  `Inci30-34f`,`Incicategory30-34f`,`Inci30-34m`,`Incicategory30-34m`,
+                  `Pop35-39all`,`Pop35-39f`,`Pop35-39m`,
+                  `PLHIV35-39all`,`PLHIV35-39f`,`PLHIV35-39m`,
+                  `new35-39all`,`new35-39f`,`new35-39m`,
+                  `Inci35-39f`,`Incicategory35-39f`,`Inci35-39m`,`Incicategory35-39m`,
+                  `Pop40-44all`,`Pop40-44f`,`Pop40-44m`,
+                  `PLHIV40-44all`,`PLHIV40-44f`,`PLHIV40-44m`,
+                  `new40-44all`,`new40-44f`,`new40-44m`,
+                  `Inci40-44f`,`Incicategory40-44f`,`Inci40-44m`,`Incicategory40-44m`,
+                  `Pop45-49all`,`Pop45-49f`,`Pop45-49m`,
+                  `PLHIV45-49all`,`PLHIV45-49f`,`PLHIV45-49m`,
+                  `new45-49all`,`new45-49f`,`new45-49m`,
+                  `Inci45-49f`,`Incicategory45-49f`,`Inci45-49m`,`Incicategory45-49m`,
+                  `Pop15-49all`,`Pop15-49f`,`Pop15-49m`,
+                  `PLHIV15-49all`,`PLHIV15-49f`,`PLHIV15-49m`,
+                  `new15-49all`,`new15-49f`,`new15-49m`,
+                  `Inci15-49f`,`Incicategory15-49f`,`Inci15-49m`,`Incicategory15-49m`)
+
+  naomi_wide
+
+  }
+
+
+
+#' Dissagreggate admin1 FSW proportions from Oli's KP model to 5-age groups
+#'
+#' @param outputs Naomi output
+#' @param options Naomi model options.
+#' @param naomi_population Naomi population estimates for T2.
+#'
+#'
+#' @return District level FSW estimates by 5-year age bands for ages 15-49.
+#'
+#' @export
+
+agyw_disaggregate_fsw <- function(outputs,
+                                  options,
+                                  naomi_pop)
+{
+
+  #' Extract country specific national FSW PSEs
+  iso3 <- options$area_scope
+  pse <- naomi.resources::load_agyw_exdata("fsw_pse", iso3)
+
+  fsw_pse <- pse %>%
+    dplyr::filter(iso3 == options$area_scope, indicator == "pse_prop") %>%
+    dplyr::rename(prop_fsw = median) %>%
+    dplyr::select(-indicator,-lower,-upper)
+
+  age_groups <- c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
+                  "Y035_039", "Y040_044", "Y045_049")
+
+  #'Calculating FSW proportion of total female population
+  fsw <- fsw_pse %>%
+    dplyr::mutate(age_group = "Y015_049") %>%
+    dplyr::left_join(naomi_pop %>% dplyr::filter(sex == "female"),
+                     by = dplyr::join_by(iso3, area_id, age_group)) %>%
+    dplyr::mutate(total_fsw = population * prop_fsw) %>%
+    dplyr::select(iso3, area_id, total_fsw, age_group, area_level)
+
+  #' FSW age distribution parameters in ZAF from Thembisa
+  #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
+  gamma_mean <- 29
+  gamma_sd <- 9
+  beta <- gamma_mean / gamma_sd^2 #' rate
+  alpha <- gamma_mean * beta #' shape
+
+  #' Distribution function of the gamma
+  zaf_gamma <- data.frame(
+    dist = diff(pgamma(c(15, 20, 25, 30, 35, 40, 45, 50), shape = alpha, rate = beta)),
+    age_group = age_groups) %>%
+    dplyr::mutate(dist = dist / sum(dist))
+
+  pskewlogis <- function(t, scale, shape, skew) {
+    (1 + (scale * t)^-shape)^-skew
+  }
+
+  #' Calculate proportion of sexually active population using Kinh's country specific
+  #' estimates of age at first sex and naomi population
+  afs <- naomi.resources::load_agyw_exdata("afs", "BWA")
+
+  #' Select birth cohort from 2000, to turn 15 in 2015
+  cohort <- 2000
+
+  afs <- afs %>%
+    dplyr::filter(yob == cohort, sex == "female", ISO_A3 == options$area_scope) %>%
+    dplyr::mutate(iso3 = ISO_A3, ISO_A3 = NULL) %>%
+    dplyr::full_join(dplyr::select(fsw,iso3,area_id), multiple = "all", by = dplyr::join_by(iso3))
+
+  df <- data.frame()
+
+  #' Calculate sexually active population by age and sex for each district
+  for(x in unique(afs$area_id)) {
+    afs_x <- dplyr::filter(afs, area_id == x)
+    ages <- 15:49
+
+    df_x <- data.frame(
+      area_id = x,
+      age = ages,
+      eversex = pskewlogis(
+        ages,
+        scale = afs_x$lambda,
+        skew = afs_x$skew,
+        shape = afs_x$shape
+      ),
+      age_group = rep(age_groups, each = 5)
+    )
+
+    df_x <- df_x %>%
+      dplyr::group_by(area_id, age_group) %>%
+      dplyr::summarise(eversex = mean(eversex), .groups = "drop") %>%
+      dplyr::left_join(
+        naomi_pop %>% dplyr::filter(sex == "female"),
+        by = c("area_id", "age_group")
+      ) %>%
+      dplyr::mutate(
+        eversexpop = eversex * population,
+        eversexpop_prop = eversexpop / sum(eversexpop)
+      )
+
+    df <- dplyr::bind_rows(df, df_x)
+  }
+
+  #' Adjusting country specific sexual debut estimates with age distribution of
+  #' FSW from Thembisa
+  #'Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
+  zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity")
+
+  fsw_est <- df %>%
+    #  Add FSW propensity estimates from ZAF
+    dplyr::left_join(zaf_propensity, by = "age_group") %>%
+    # Calculate distribution of FSWs
+    dplyr::mutate(dist = eversexpop_prop * propensity) %>%
+    dplyr::group_by(area_id) %>%
+    dplyr::mutate(dist = dist / sum(dist)) %>%
+    dplyr::ungroup() %>%
+    # Add FSW PSEs
+    dplyr::full_join(
+      fsw %>% dplyr::select(total_fsw, iso3, area_id, area_level),
+      by = dplyr::join_by(area_id, iso3, area_level)
+    ) %>%
+    # Calculate FSW proportions
+    dplyr::mutate(
+      fsw = dist * total_fsw,
+      fsw_prop = fsw / population
+    ) %>%
+    dplyr::select(-eversexpop, -eversexpop_prop, -propensity, - dist, -total_fsw)
+
+  fsw_est
+
+}
+
+
+#' Disaggregate admin1 PWID proportions from Oli's KP model to 5-age groups
+#'
+#' @param outputs Naomi output..
+#' @param options Naomi model options.
+#' @param naomi_population Naomi population estimates for T2.
+#'
+#' @return District level PWID estimates by 5-year age bands for ages 15-49.
+
+agyw_disaggregate_pwid <- function(outputs,
+                                   options,
+                                   naomi_pop)
+{
+
+  #' Extract country specific national PWID PSEs
+  iso3 <- options$area_scope
+  pse <- naomi.resources::load_agyw_exdata("pwid_pse", iso3)
+
+  pwid_pse <- pse %>%
+    dplyr::filter(iso3 == options$area_scope, indicator == "pse_prop") %>%
+    dplyr::rename(prop_pwid = median) %>%
+    dplyr::select(-indicator,-lower,-upper)
+
+  age_groups <- c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
+                  "Y035_039", "Y040_044", "Y045_049")
+
+  pwid <- pwid_pse %>%
+    dplyr::mutate(age_group = "Y015_049") %>%
+    dplyr::left_join(naomi_pop %>% dplyr::filter(sex == "male"),
+                     by = dplyr::join_by(iso3, area_id, age_group)) %>%
+    dplyr::mutate(total_pwid = population * prop_pwid) %>%
+    dplyr::select(iso3, area_id, total_pwid, age_group, area_level)
+
+  #' Assumption form literature that 9% of PWID are female and remove them from
+  #' the male calculation
+  pwid$total_pwid <- pwid$total_pwid * 0.91
+
+  #' PWID age distribution parameters in ZAF from Thembisa
+  #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
+  gamma_mean <- 29.4
+  gamma_sd <- 7
+  beta <- gamma_mean / gamma_sd^2 #' rate
+  alpha <- gamma_mean * beta #' shape
+
+  #' Distribution function of the gamma
+  zaf_gamma <- data.frame(
+    dist = diff(pgamma(c(15, 20, 25, 30, 35, 40, 45, 50), shape = alpha, rate = beta)),
+    age_group = age_groups) %>%
+    dplyr::mutate(dist = dist / sum(dist))
+
+
+  # Naomi population
+  pop <- naomi_pop %>%
+    dplyr::filter(area_id %in% unique(pwid$area_id),
+                  age_group %in% age_groups,
+                  sex == "male")
+
+  pwid_est <- dplyr::left_join(
+    pop, zaf_gamma,
+    by = dplyr::join_by(age_group)) %>%
+    dplyr::full_join(
+      dplyr::select(pwid,total_pwid, iso3, area_id),
+      by = c("area_id", "iso3")
+    ) %>%
+    dplyr::mutate(pwid = dist * total_pwid,
+                  pwid_prop = pwid / population) %>%
+    dplyr::select( -dist, -total_pwid, -sex)
+
+  pwid_est
+}
+
+#' Disaggregate admin1 MSM proportions from Oli's KP model to 5-age groups
+#'
+#' @param outputs Naomi output..
+#' @param options Naomi model options.
+#' @param naomi_population Naomi population estimates for T2.
+#'
+#' @return District level MSM estimates by 5-year age bands for ages 15-49.
+
+agyw_disaggregate_msm <- function(outputs,
+                                  options,
+                                  naomi_pop)
+{
+
+  #' Extract country specific national MSM PSEs
+  iso3 <- options$area_scope
+  pse <- naomi.resources::load_agyw_exdata("msm_pse", iso3)
+
+  msm_pse <- pse %>%
+    dplyr::filter(iso3 == options$area_scope, indicator == "pse_prop") %>%
+    dplyr::rename(prop_msm = median) %>%
+    dplyr::select(-indicator,-lower,-upper)
+
+  age_groups <- c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
+                  "Y035_039", "Y040_044", "Y045_049")
+
+  msm <- msm_pse %>%
+    dplyr::mutate(age_group = "Y015_049") %>%
+    dplyr::left_join(naomi_pop %>% dplyr::filter(sex == "male"),
+                     by = dplyr::join_by(iso3, area_id, age_group)) %>%
+    dplyr::mutate(total_msm = population * prop_msm) %>%
+    dplyr::select(iso3, area_id, total_msm, age_group, area_level)
+
+
+
+  #' MSM age distribution parameters in ZAF from Thembisa
+  #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
+  gamma_mean <- 28
+  gamma_sd <- 9
+  beta <- gamma_mean / gamma_sd^2 #' rate
+  alpha <- gamma_mean * beta #' shape
+
+  #' Distribution function of the gamma
+  zaf_gamma <- data.frame(
+    dist = diff(pgamma(c(15, 20, 25, 30, 35, 40, 45, 50), shape = alpha, rate = beta)),
+    age_group = age_groups) %>%
+    dplyr::mutate(dist = dist / sum(dist))
+
+
+  # Naomi population
+  pop <- naomi_pop %>%
+    dplyr::filter(area_id %in% unique(msm$area_id),
+                  age_group %in% age_groups,
+                  sex == "male")
+
+  msm_est <- dplyr::left_join(
+    pop, zaf_gamma,
+    by = dplyr::join_by(age_group)) %>%
+    dplyr::full_join(
+      dplyr::select(msm, total_msm, iso3, area_id),
+      by = c("area_id", "iso3")
+    ) %>%
+    dplyr::mutate(msm = dist * total_msm,
+                  msm_prop = msm / population) %>%
+    dplyr::select(-dist, -total_msm)
+
+  msm_est
+}
+
+#' Adjust female sexual behavior risk groups by FSW proportions
+#'
+#' @param outputs Naomi output.
+#' @param options Naomi model options.
+#' @param fsw_est 5-year estimates of FSW PSEs generated from `agyw_disaggregate_fsw()`.
+#' @param female_sae_path Path to female estimates of sexual behavior risk group.
+#'
+#' @return District level estimates of female sexual risk behaviour groups
+#'
+#' Estimates are generated for the following groups:
+#'
+#' * `nosex12m`:
+#' * `sexcohab`:
+#' * `sexonregplus`:
+#' * `sexonreg`:
+#' * `sexpaid12m`:
+#' * `nosex12m`:
+#'
+#' Calculation steps:
+#' 1. Align admin0/admin1 FSW proportions with SRB SAE estimates.
+#' 2. Subtract the proportion of FSW from total high risk female population.
+
+
+agyw_adjust_sexbehav_fsw <- function(outputs,
+                                     options,
+                                     fsw_est)
+{
+
+
+  #' Match FSW estimates (admin0 or admin1) with SAE estimates
+  fsw_analysis_level <- paste0("area_id",unique(fsw_est$area_level))
+
+  areas_wide <-  naomi::spread_areas(outputs$meta_area) %>%
+    sf::st_drop_geometry()
+
+  map <- dplyr::select(areas_wide, area_id, dplyr::all_of(fsw_analysis_level)) %>%
+    dplyr::rename(fsw_match_area = 2)
+
+  #' Allocate admin1 FSW proportions
+  fsw_df <- fsw_est %>% dplyr::select(age_group, fsw_match_area = area_id, fsw_prop)
+
+  #' Load female SRB proportions
+
+  female_srb <- naomi.resources::load_agyw_exdata("srb_female", options$area_scope)
+
+  adj_female_srb <- female_srb %>%
+    dplyr::filter(iso3 == options$area_scope) %>%
+    dplyr::left_join(map, by = dplyr::join_by(area_id)) %>%
+    dplyr::left_join(fsw_df, by = dplyr::join_by(age_group, fsw_match_area)) %>%
+    dplyr::select(iso3, year, indicator, survey_id, age_group, area_id, area_name, estimate_smoothed, fsw_prop) %>%
+    tidyr::pivot_wider(
+      names_from = indicator,
+      values_from = estimate_smoothed
+    ) %>%
+    # Subtracting proportion FSW from total high risk female population
+    dplyr::mutate(
+      sexpaid12m = fsw_prop,
+      sexnonreg = 1 - nosex12m - sexcohab - sexpaid12m,
+      fsw_prop = NULL
+    ) %>%
+    tidyr::pivot_longer(
+      cols = c(nosex12m,sexcohab,sexnonregplus,sexnonreg,sexpaid12m),
+      names_to = "indicator",
+      values_to = "estimate_smoothed"
+    )
+
+  adj_female_srb
+
+}
+
+#' Adjust male sexual behavior risk groups by MSM + PWID proportions
+#'
+#' @param outputs Naomi output.
+#' @param options Naomi model options.
+#' @param msm_est 5-year estimates of MSM PSEs generated from `agyw__disaggregate_msm()`.
+#' @param pwid_est 5-year estimates of MSM PSEs generated from `agyw__disaggregate_pwid()`.
+#' @param sae_path Path to female estimates of sexual behavior risk group.
+#'
+#' @return District level estimates of male sexual risk behaviour groups
+#'
+#' Estimates are generated for the following groups:
+#'
+#' * `nosex12m`:
+#' * `sexcohab`:
+#' * `sexonregplus`:
+#' * `sexonreg`:
+#' * `msm`:
+#' * `pwid`:
+#'
+#'
+#' Calculation steps:
+#' 1. Align admin0/admin1 MSM and PWID proportions with SRB SAE estimates.
+#' 2. Subtracting MSM and PWID proportionally from all SRB groups.
+#'
+
+agyw_adjust_sexbehav_msm_pwid <- function(outputs,
+                                          options,
+                                          msm_est,
+                                          pwid_est) {
+
+
+  # Match KP estimates (admin0 or admin1) with SAE estimates
+  msm_analysis_level <- paste0("area_id",unique(msm_est$area_level))
+
+  areas_wide <-  naomi::spread_areas(outputs$meta_area) %>%
+    sf::st_drop_geometry()
+
+  map <- dplyr::select(areas_wide, area_id, dplyr::all_of(msm_analysis_level)) %>%
+    dplyr::rename(kp_match_area = 2)
+
+  #   Allocate KP
+  msm_df <- msm_est %>% dplyr::select(age_group, kp_match_area = area_id, msm_prop)
+  pwid_df <- pwid_est %>% dplyr::select(age_group, kp_match_area = area_id, pwid_prop)
+
+  #' Load male SRB proportions
+
+  male_srb <- naomi.resources::load_agyw_exdata("srb_male", options$area_scope)
+
+  adj_male_srb <- male_srb %>%
+    dplyr::filter(iso3 == options$area_scope) %>%
+    dplyr::left_join(map, by = dplyr::join_by(area_id)) %>%
+    dplyr::left_join(msm_df, by = dplyr::join_by(age_group, kp_match_area)) %>%
+    dplyr::left_join(pwid_df, by = dplyr::join_by(age_group, kp_match_area)) %>%
+    dplyr::select(iso3, year, indicator, survey_id, age_group, area_id, area_name,
+                  estimate_smoothed, msm_prop, pwid_prop) %>%
+    tidyr::pivot_wider(
+      names_from = indicator,
+      values_from = estimate_smoothed
+    ) %>%
+    #' Subtracting MSM and PWID proportionally from all SRB risk groups
+    #' (FSW was just from high-risk females)
+    dplyr::mutate(
+      nosex12m = nosex12m * (1 - pwid_prop - msm_prop),
+      sexcohab = sexcohab * (1 - pwid_prop - msm_prop),
+      sexnonregplus = sexnonregplus * (1 - pwid_prop - msm_prop),
+      sexnonreg = sexnonregplus,
+      msm = msm_prop, msm_prop = NULL,
+      pwid = pwid_prop, pwid_prop = NULL
+    ) %>%
+    tidyr::pivot_longer(
+      cols = c(nosex12m, sexcohab, sexnonregplus, sexnonreg, msm, pwid),
+      names_to = "indicator",
+      values_to = "estimate_smoothed"
+    )
+
+  adj_male_srb
+
+}
+
+#' Calculate prevalence for female SRB groups.
+#'
+#' @param outputs Naomi output.
+#' @param options Naomi model options.
+#' @param fsw_est 5-year estimates of MSM PSEs generated from `agyw_disaggregate_fse()`.
+#' @param female_sexbehav KP adjusted estimates of female SRB groups generated by `agyw_adjust_sexbehav_fsw()`
+#' @param female_hiv_path Path to SRB HIV estimates from household surveys (last updated XX-XX-XX).
+#' @param pse_path Path to KP PSEs last updated (XX-XX-XX).
+#' @param survey_year Year of survey to sample estimates.
+#'
+#' @return SRB PSEs with logit prevalence estimates.
+#'
+#' TODO: add in more documentation here
+#' Calculation steps:
+#' 1.
+#' 2.
+#' 3.
+#' 4.
+#'
+agyw_calculate_prevalence_female <- function(outputs,
+                                             options,
+                                             fsw_est,
+                                             female_srb,
+                                             survey_year_sample = 2018) {
+
+  #' Extract country specific national FSW prevalence
+  iso3 <- options$area_scope
+  pse <- naomi.resources::load_agyw_exdata("fsw_pse", iso3)
+
+  fsw_prev <- pse %>% dplyr::filter(indicator=="prev")
+
+  #' Format SRB survey estimates
+  srb_survey <- naomi.resources::load_agyw_exdata("srb_survey_female", iso3)
+
+  prev_wide <- srb_survey %>%
+    dplyr::filter(
+      area_id == options$area_scope,
+      (nosex12m != 0) & (sexcohab != 0) & (sexnonreg != 0) & (sexpaid12m != 0),
+      !age_group %in% c("Y015_024","Y015_049","Y025_049"),
+      indicator == "prevalence") %>%
+    dplyr::mutate(
+      behav = dplyr::case_when(
+        nosex12m == 1 ~ "nosex12m", sexcohab == 1 ~ "sexcohab",
+        sexnonreg == 1 ~ "sexnonreg", sexpaid12m == 1 ~ "sexpaid12m",
+        TRUE ~ "all"), .after = indicator) %>%
+    dplyr::select(indicator, behav, survey_id, area_id, age_group, estimate) %>%
+    tidyr::pivot_wider(
+      names_from = "behav",
+      values_from = "estimate")
+
+  ind <- prev_wide %>%
+    dplyr::mutate(
+      #' Calculate the odds
+      across(nosex12m:all, ~ .x / (1 - .x), .names = "{.col}_odds"),
+      #' Log odds
+      across(nosex12m:all, ~ log(.x / (1 - .x)), .names = "{.col}_logodds"),
+      #' Prevalence ratios
+      across(nosex12m:all, ~ .x / all, .names = "{.col}_pr"),
+      #' Odds ratios
+      across(nosex12m:all, ~ (.x / (1 - .x)) / all_odds, .names = "{.col}_or")
+    ) %>%
+    dplyr::rename_with(.cols = nosex12m:all, ~ paste0(.x, "_prevalence")) %>%
+    dplyr::select(-indicator) %>%
+    tidyr::pivot_longer(
+      cols = starts_with(c("nosex12m", "sexcohab", "sexnonreg", "sexpaid12m", "all")),
+      names_to = "indicator",
+      values_to = "estimate"
+    ) %>%
+    tidyr::separate(indicator, into = c("behav", "indicator"))
+
+  naomi_gen_pop_prev <- outputs$indicators %>%
+    dplyr::filter(calendar_quarter == options$calendar_quarter_t2, sex == "female",
+                  age_group == "Y015_049", indicator == "prevalence") %>%
+    dplyr::select(area_id, gen_prev = mean)
+
+  kp_prev <- fsw_prev %>%
+    dplyr::select(iso3,area_id,median) %>%
+    dplyr::left_join(naomi_gen_pop_prev, by = dplyr::join_by(area_id)) %>%
+    dplyr::mutate(prev_fsw_logodds = log(median / (1-median)),
+                  prev_logodds = log(gen_prev / (1-gen_prev)))
+
+  #' KP regression: FSW prevalence relative to general prevalence
+  kp_fit <- lm(prev_fsw_logodds ~ prev_logodds, data = kp_prev)
+
+
+  ind_dat <- ind %>%
+    dplyr::mutate(
+      nosex12m_id = ifelse(behav == "nosex12m", 1, 0),
+      sexcohab_id = ifelse(behav == "sexcohab", 1, 0),
+      sexnonreg_id = ifelse(behav == "sexnonreg", 1, 0),
+      sexpaid12m_id = ifelse(behav == "sexpaid12m", 1, 0),
+      all_id = ifelse(behav == "all", 1, 0),
+      year = as.numeric(substr(survey_id,4,7))) %>%
+    dplyr::filter(indicator == "prevalence", !is.na(estimate))
+
+  #' Younger age groups regression
+  fit_y <- glm(estimate ~ -1 + all_id + nosex12m_id + sexcohab_id + sexnonreg_id + sexpaid12m_id,
+               family = quasibinomial(link = "logit"),
+               data = ind_dat %>% dplyr::filter(age_group %in% c("Y015_019","Y020_024","Y025_029")))
+
+  odds_estimate <- exp(fit_y$coefficients)
+  or_y <- odds_estimate / odds_estimate[1]
+  lor_y <- log(odds_estimate / odds_estimate[1])
+  lor_y <- lor_y[-1]
+
+  odds_estimate <- exp(fit_y$coefficients)
+  or_y <- odds_estimate / odds_estimate[1]
+  lor_y <- log(odds_estimate / odds_estimate[1])
+  lor_y <- lor_y[-1]
+
+  #' Older age groups regression
+  fit_o <- glm(estimate ~ -1 + all_id + nosex12m_id + sexcohab_id + sexnonreg_id + sexpaid12m_id,
+               family = quasibinomial(link = "logit"),
+               data = ind_dat %>% dplyr::filter(age_group %in% c("Y030_034","Y035_039","Y040_044","Y045_49")))
+
+  odds_estimate <- exp(fit_o$coefficients)
+  or_o <- odds_estimate / odds_estimate[1]
+  lor_o <- log(odds_estimate / odds_estimate[1])
+  lor_o <- lor_o[-1]
+
+  #' Naomi estimates of PLHIV and population by district and age band
+  age_groups <- c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
+                  "Y035_039", "Y040_044", "Y045_049")
+
+  naomi_est <- outputs$indicators %>%
+    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
+                  sex == "female", area_level == options$area_level,
+                  indicator %in% c("population", "plhiv", "infections", "prevalence"),
+                  age_group %in% age_groups) %>%
+    dplyr::select(area_id, area_level, age_group, indicator, mean) %>%
+    tidyr::pivot_wider( names_from = indicator, values_from = mean) %>%
+    dplyr::rename(gen_prev = prevalence)
+
+  #' Modelled estimates of proportion in each risk group
+  risk_group_prop <- female_srb %>%
+    dplyr::filter(year == survey_year_sample) %>%
+    dplyr::select(area_id, age_group, indicator, estimate_smoothed) %>%
+    tidyr::pivot_wider( names_from = indicator, values_from = estimate_smoothed,
+                        values_fn = mean, names_prefix = "prop_") %>%
+    dplyr::right_join(naomi_est,  by = c("area_id", "age_group")) %>%
+    dplyr::filter(!is.na(prop_nosex12m)) %>%
+    dplyr::mutate(
+      population_nosex12m = population * prop_nosex12m,
+      population_sexcohab = population * prop_sexcohab,
+      population_sexnonreg = population * prop_sexnonreg,
+      population_sexpaid12m = population * prop_sexpaid12m
+    )
+
+  #' Calculate prevalence in each category
+  calculate_prevalence <- function(x){
+    if(x$age_group[1] %in% c("Y015_019","Y020_024","Y025_029")) {lor <- lor_y} else {lor <- lor_o}
+    population_fine <- dplyr::filter(x, indicator == "population")$estimate
+    plhiv <- x$plhiv[1]
+    ywkp_lor <- c("ywkp_lor" = x$ywkp_lor[1])
+    lor <- c(lor[1:3],ywkp_lor)
+    prev <- logit_scale_prev(lor, population_fine, plhiv)
+    y <- dplyr::filter(x, indicator == "prop") %>%
+      dplyr::mutate( indicator = "prev", estimate = prev)
+    dplyr::bind_rows(x, y)
+  }
+
+  #' Calculate logit prevalence and format
+  logit_prev <- risk_group_prop %>%
+    dplyr::mutate(ywkp_lor = calculate_ywkp_pr_lor(gen_prev, fit = kp_fit)$lor) %>%
+    dplyr::select(-starts_with("pr_"), -gen_prev) %>%
+    tidyr::pivot_longer(
+      cols = starts_with(c("population_", "prop_")),
+      names_to = "indicator",
+      values_to = "estimate"
+    ) %>%
+    tidyr::separate(
+      indicator,
+      into = c("indicator", "behav")
+    ) %>%
+    dplyr::filter(behav %in% c("nosex12m", "sexcohab", "sexnonreg", "sexpaid12m")) %>%
+    split(~ area_id + age_group) %>%
+    lapply(calculate_prevalence) %>%
+    dplyr::bind_rows() %>%
+    tidyr::unite("indicator", indicator, behav, sep = "_") %>%
+    tidyr::pivot_wider( names_from = indicator, values_from = estimate)
+
+  logit_prev
+
+}
+
+#' Calculate prevalence for male SRB groups.
+#'
+#' @param outputs Naomi output.
+#' @param options Naomi model options.
+#' @param male_srb
+#' @param msm_est .
+#' @param survey_year Year of survey to sample estimates.
+#'
+#' @return SRB PSEs with logit prevalence estimates.
+#'
+#' TODO: add in more documentation here
+#' Calculation steps:
+#' 1.
+#' 2.
+#' 3.
+#' 4.
+#'
+
+agyw_calculate_prevalence_male <- function(outputs,
+                                           options,
+                                           msm_est,
+                                           male_srb,
+                                           survey_year_sample = 2018) {
+
+
+
+  # Naomi general population prevalence
+  naomi_gen_pop_prev <- outputs$indicators %>%
+    dplyr::filter(calendar_quarter == options$calendar_quarter_t2, sex == "male",
+                  age_group == "Y015_049", indicator == "prevalence") %>%
+    dplyr::mutate(logit_gen_prev = log(mean / (1-mean))) %>%
+    dplyr::select(area_id, logit_gen_prev, area_level)
+
+
+  #' Extract country specific national MSM + PWID prevalence
+  iso3 <- options$area_scope
+
+  msm <- naomi.resources::load_agyw_exdata("msm_pse", iso3)
+  pwid <- naomi.resources::load_agyw_exdata("pwid_pse", iso3)
+
+  msm_prev <- msm %>% dplyr::filter(indicator=="prev") %>%
+    dplyr::mutate(kp = "MSM")
+  pwid_prev <- pwid %>% dplyr::filter(indicator=="prev") %>%
+    dplyr::mutate(kp = "PWID")
+
+
+  # KP population prevalence
+  kp_prev <- dplyr::bind_rows(msm_prev, pwid_prev) %>%
+    dplyr::select(-indicator,-lower, - upper) %>%
+    dplyr::mutate(median = log(median / (1-median))) %>%
+    # Add in Naomi general pop prevalence
+    dplyr::left_join(naomi_gen_pop_prev, by = dplyr::join_by(area_id)) %>%
+    dplyr::select(kp, iso3, area_id, logit_gen_prev, median, area_level) %>%
+    # Calculate Log-Odds ratio
+    tidyr::pivot_wider(names_from = kp,
+                       values_from = c("logit_gen_prev","median")) %>%
+    dplyr::mutate(msm_lor = median_MSM - logit_gen_prev_MSM,
+                  pwid_lor = median_PWID - logit_gen_prev_PWID) %>%
+    dplyr::select(-c("logit_gen_prev_PWID","logit_gen_prev_MSM","median_PWID","median_MSM","area_level"))
+
+  #' Format SRB survey estimates
+
+  srb_survey <- naomi.resources::load_agyw_exdata("srb_survey_male", iso3)
+
+  prev_wide <- srb_survey %>%
+    dplyr::filter(
+      area_id == options$area_scope,
+      (nosex12m != 0) & (sexcohab != 0) & (sexnonreg != 0) & (sexpaid12m != 0),
+      !age_group %in% c("Y015_024","Y015_049","Y025_049"),
+      indicator == "prevalence") %>%
+    dplyr::mutate(
+      behav = dplyr::case_when(
+        nosex12m == 1 ~ "nosex12m", sexcohab == 1 ~ "sexcohab",
+        sexnonreg == 1 ~ "sexnonreg", sexpaid12m == 1 ~ "sexpaid12m",
+        TRUE ~ "all"), .after = indicator) %>%
+    dplyr::select(indicator, behav, survey_id, area_id, age_group, estimate) %>%
+    tidyr::pivot_wider(
+      names_from = "behav",
+      values_from = "estimate")
+
+  ind <- prev_wide %>%
+    dplyr::mutate(
+      #' Calculate the odds
+      across(nosex12m:all, ~ .x / (1 - .x), .names = "{.col}_odds"),
+      #' Log odds
+      across(nosex12m:all, ~ log(.x / (1 - .x)), .names = "{.col}_logodds"),
+      #' Prevalence ratios
+      across(nosex12m:all, ~ .x / all, .names = "{.col}_pr"),
+      #' Odds ratios
+      across(nosex12m:all, ~ (.x / (1 - .x)) / all_odds, .names = "{.col}_or")
+    ) %>%
+    dplyr::rename_with(.cols = nosex12m:all, ~ paste0(.x, "_prevalence")) %>%
+    dplyr::select(-indicator) %>%
+    tidyr::pivot_longer(
+      cols = starts_with(c("nosex12m", "sexcohab", "sexnonreg", "sexpaid12m", "all")),
+      names_to = "indicator",
+      values_to = "estimate"
+    ) %>%
+    tidyr::separate(indicator, into = c("behav", "indicator"))
+
+  ind_dat <- ind %>%
+    dplyr::mutate(
+      nosex12m_id = ifelse(behav == "nosex12m", 1, 0),
+      sexcohab_id = ifelse(behav == "sexcohab", 1, 0),
+      sexnonreg_id = ifelse(behav == "sexnonreg", 1, 0),
+      sexpaid12m_id = ifelse(behav == "sexpaid12m", 1, 0),
+      all_id = ifelse(behav == "all", 1, 0)) %>%
+    dplyr::filter( indicator == "prevalence", !is.na(estimate))
+
+
+  # Young age group regression
+  fit_y <- glm(estimate ~ -1 + all_id + nosex12m_id + sexcohab_id + sexnonreg_id + sexpaid12m_id,
+               family = quasibinomial(link = "logit"),
+               data = ind_dat %>% dplyr::filter(age_group %in% c("Y015_019","Y020_024","Y025_029")))
+
+  odds_estimate <- exp(fit_y$coefficients)
+  or_y <- odds_estimate / odds_estimate[1]
+  lor_y <- log(odds_estimate / odds_estimate[1])
+  lor_y <- lor_y[-1]
+
+  # Older age-group regression
+  fit_o <- glm(estimate ~ -1 + all_id + nosex12m_id + sexcohab_id + sexnonreg_id + sexpaid12m_id,
+               family = quasibinomial(link = "logit"),
+               data = ind_dat %>% dplyr::filter(age_group %in% c("Y030_034","Y035_039","Y040_044","Y045_49")))
+
+  odds_estimate <- exp(fit_o$coefficients)
+  or_o <- odds_estimate / odds_estimate[1]
+  lor_o <- log(odds_estimate / odds_estimate[1])
+  lor_o <- lor_o[-1]
+
+
+  #' Naomi estimates of PLHIV and population by district and age band
+  age_groups <- c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
+                  "Y035_039", "Y040_044", "Y045_049")
+
+  naomi_est <- outputs$indicators %>%
+    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
+                  sex == "male", area_level == options$area_level,
+                  indicator %in% c("population", "plhiv", "infections", "prevalence"),
+                  age_group %in% age_groups) %>%
+    dplyr::select(area_id, area_level, age_group, indicator, mean) %>%
+    tidyr::pivot_wider( names_from = indicator, values_from = mean) %>%
+    dplyr::rename(gen_prev = prevalence)
+
+  # Match KP estimates (admin0 or admin1) with SAE estimates
+  msm_analysis_level <- paste0("area_id",unique(msm_est$area_level))
+  areas_wide <-  naomi::spread_areas(outputs$meta_area) %>%
+    sf::st_drop_geometry()
+  map <- dplyr::select(areas_wide, area_id, dplyr::all_of(msm_analysis_level)) %>%
+    dplyr::rename(kp_match_area = 2)
+
+  #' Modelled estimates of proportion in each risk group
+  risk_group_prop <- male_srb %>%
+    dplyr::left_join(map, by = dplyr::join_by(area_id)) %>%
+    dplyr::filter(year == survey_year_sample, iso3 == options$area_scope) %>%
+    dplyr::select(area_id,kp_match_area, age_group, indicator, estimate_smoothed) %>%
+    tidyr::pivot_wider( names_from = indicator, values_from = estimate_smoothed,
+                        values_fn = mean, names_prefix = "prop_") %>%
+    # Add in Naomi indicators
+    dplyr::right_join(naomi_est,  by = c("area_id", "age_group")) %>%
+    dplyr::filter(!is.na(prop_nosex12m)) %>%
+    dplyr::mutate(
+      population_nosex12m = population * prop_nosex12m,
+      population_sexcohab = population * prop_sexcohab,
+      population_sexnonreg = population * prop_sexnonreg,
+      population_msm = population * prop_msm,
+      population_pwid = population * prop_pwid
+    )
+
+  #' Calculate prevalence in each category
+  calculate_prevalence <- function(x){
+
+    if(x$age_group[1] %in% c("Y015_019","Y020_024","Y025_029")) {lor <- lor_y} else {lor <- lor_o}
+
+    population_fine <- dplyr::filter(x, indicator == "population")$estimate
+    plhiv <- x$plhiv[1]
+    kp_lor <- c("msm_lor" = x$msm_lor[1],"pwid_lor"= x$pwid_lor[1])
+    lor <- c(lor[1:3], kp_lor)
+    prev <- logit_scale_prev(lor, population_fine, plhiv)
+    y <- dplyr::filter(x, indicator == "prop") %>%
+      dplyr::mutate(indicator = "prev",estimate = prev)
+    dplyr::bind_rows(x, y)
+  }
+
+
+  #' Calculate logit prevalence and format
+  logit_prev <- risk_group_prop %>%
+    dplyr::left_join(kp_prev, by = c("kp_match_area" = "area_id")) %>%
+    tidyr::pivot_longer(
+      cols = starts_with(c("population_", "prop_")),
+      names_to = "indicator",
+      values_to = "estimate") %>%
+    tidyr::separate(indicator, into = c("indicator", "behav")) %>%
+    dplyr::filter(behav %in% c("nosex12m", "sexcohab", "sexnonreg", "msm", "pwid")) %>%
+    split(~ area_id + age_group) %>%
+    lapply(calculate_prevalence) %>%
+    dplyr::bind_rows() %>%
+    tidyr::unite("indicator", indicator, behav, sep = "_") %>%
+    tidyr::pivot_wider( names_from = indicator, values_from = estimate)
+
+  logit_prev
+
+}
+
+
+#' Calculate the odds
+#'
+#' @param p Probability in [0, 1]
+odds <- function(p) p / (1 - p)
+
+#' Calculate YWKP prevalence ratio and log odds ratio
+#'
+#' @param prev (General population) prevalence
+#' @param fit A model relating log-odds prevalence to YWKP log odds prevalence
+calculate_ywkp_pr_lor <- function(prev, fit = ywkp_fit) {
+  prev_logodds <- qlogis(prev)
+  prev_ywkp_logodds <- predict(fit, data.frame(prev_logodds = prev_logodds))
+  #' Ensure that the LOR is above that of e.g. the sexnonreg risk group
+  prev_ywkp_logodds <- pmax(prev_ywkp_logodds, prev_logodds + 0.25)
+  prev_ywkp <- plogis(prev_ywkp_logodds)
+  #' Prevalence ratio
+  pr <- prev_ywkp / prev
+  #' Log-odds ratio
+  lor <- prev_ywkp_logodds - prev_logodds
+  return(list(pr = pr, lor = lor, prev = prev, prev_ywkp = prev_ywkp))
+}
+
+#' Calculate prevalence and PLHIV using logit-scale disaggregation
+#'
+#' @param lor Log odds-ratios
+#' @param N_fine Number of individuals in each group
+#' @param plhiv Total number of people living with HIV
+logit_scale_prev <- function(lor, N_fine, plhiv) {
+  #' theta represents prevalence in baseline risk group
+  #' plogis(lor + theta) is prevalence in each risk group
+  #' plogis(lor + theta) * N_fine is PLHIV in each risk group
+  optfn <- function(theta) (sum(plogis(lor + theta) * N_fine) - plhiv)^2
+  #' Optimisation for baseline risk group prevalence
+  #' On the logit scale should be more numerically stable
+  opt <- optimise(optfn, c(-10, 10), tol = .Machine$double.eps^0.5)
+  #' Return prevalence
+  plogis(lor + opt$minimum)
+
+}
+
+
+#' Calculate incidence for female SRB groups.
+#'
+#' @param outputs Naomi output.
+#' @param options Naomi options extracted from outputs
+#' @param female_srb Estimates of female sexual risk groups generated by `agyw_adjust_sexbehav_fsw()`
+#' @param female_logit_prevalence Risk adjusted estimates of female prevalence in sexual risk groups generated by `agyw_calculate_prevalence_female()`
+#' @param survey_year Survey year to sample from the SAE model. Default is 2018.
+#'
+#'
+#' @return Wide format output required for the AGYW workbook.
+#'
+#'#' TODO: add in more documentation here
+#' Calculation steps:
+#' 1.
+#' 2.
+#' 3.
+#' 4.
+
+agyw_calculate_incidence_female <- function(outputs,
+                                            options,
+                                            female_srb,
+                                            female_logit_prevalence,
+                                            survey_year = 2018) {
+
+  naomi_incidence <- outputs$indicators %>%
+    dplyr::select(-lower,-upper,-se, -median, -mode, -indicator_label) %>%
+    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
+                  indicator %in% c("population", "plhiv", "infections", "incidence"),
+                  sex == "female") %>%
+    tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
+    #' In terms of new infections per hundred person years
+    #' (Naomi estimate is per 1000 person years)
+    dplyr::mutate(incidence = incidence / 10)
+
+  risk_group_prevalence <- female_logit_prevalence %>%
+    dplyr::select(area_id, age_group, starts_with("prev_"))
+
+  df <- female_srb %>%
+    dplyr::filter(year == survey_year) %>%
+    dplyr::select(area_id, age_group, indicator, estimate_smoothed) %>%
+    tidyr::pivot_wider(names_from = indicator, values_from = estimate_smoothed, values_fn = mean) %>%
+    dplyr::left_join(naomi_incidence, by = dplyr::join_by(area_id, age_group)) %>%
+    dplyr::left_join(risk_group_prevalence, by = dplyr::join_by(area_id, age_group)) %>%
+    dplyr::filter(!is.na(population))
+
+
+  #' Risk ratios for people non-regular sex partners relative to those with a
+  #' single cohabiting sex partner
+  #' TODO: Add source
+  rr_sexcohab <- 1
+  rr_sexnonreg_young <- 1.72
+  rr_sexnonreg_old <- 2.1
+
+  #' Tiered HIV risk ratio for the FSW group depending on district-level HIV
+  #' incidence in general population
+  #' TODO: Add source
+  rr_sexpaid12m_vvh <- 3 #' >3%
+  rr_sexpaid12m_vh <- 6 #' 1-3%
+  rr_sexpaid12m_h <- 9 #' 0.3-1%
+  rr_sexpaid12m_m <- 13 #' 0.1-0.3%
+  rr_sexpaid12m_l <- 25 #' <0.1%
+
+  #' x = Incidence levels in the general population
+  #' y = Tiered HIV risk ratios
+  regression_dat <- data.frame(x = c(0.1,0.3,1,3,9), y = c(25,13,9,6,3))
+  rr_reg <- lm(log(y) ~ log(x), data = regression_dat)
+
+  rr_sexpaid12m <- exp(predict(rr_reg,data.frame(x = df$incidence)))
+  # This gives implausibly high RRs for very low districts (e.g. IRR = 297!)
+  # capping at 25
+  rr_sexpaid12m[rr_sexpaid12m > 25] <- 25
+
+  #' TODO: Get distributions on these and using a sampling method to get
+  #' uncertainty in economic analysis e.g.
+  rr_sexnonreg_se <- 0.2
+  rr_sexnonreg_se <- 1
+
+
+  #' Calculate risk group incidence
+  Y015_024 <-  c("Y015_019", "Y020_024")
+  Y025_049 <-  c("Y025_029","Y030_034","Y035_039","Y040_044", "Y045_049")
+
+  df1 <- df %>%
+    dplyr::mutate(
+      rr_sexpaid12m = rr_sexpaid12m,
+      rr_sexnonreg = dplyr::case_when(
+        age_group %in% Y015_024 ~ rr_sexnonreg_young,
+        age_group %in% Y025_049 ~ rr_sexnonreg_old,
+        TRUE ~ NA_real_),
+      population_nosex12m = population * nosex12m,
+      population_sexcohab = population * sexcohab,
+      population_sexnonreg = population * sexnonreg,
+      population_sexpaid12m = population * sexpaid12m,
+      plhiv_nosex12m = population_nosex12m * prev_nosex12m,
+      plhiv_sexcohab = population_sexcohab * prev_sexcohab,
+      plhiv_sexnonreg = population_sexnonreg * prev_sexnonreg,
+      plhiv_sexpaid12m = population_sexpaid12m * prev_sexpaid12m,
+      susceptible_nosex12m = population_nosex12m - plhiv_nosex12m,
+      susceptible_sexcohab = population_sexcohab - plhiv_sexcohab,
+      susceptible_sexnonreg = population_sexnonreg - plhiv_sexnonreg,
+      susceptible_sexpaid12m = population_sexpaid12m - plhiv_sexpaid12m,
+      incidence_nosex12m = 0,
+      incidence_sexcohab = infections / (susceptible_sexcohab + rr_sexnonreg * susceptible_sexnonreg + rr_sexpaid12m *susceptible_sexpaid12m),
+      incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
+      incidence_sexpaid12m = incidence_sexcohab * rr_sexpaid12m,
+      infections_nosex12m = 0,
+      infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
+      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
+      infections_sexpaid12m = susceptible_sexpaid12m * incidence_sexpaid12m)
+
+  #' Calculate risk group incidence for aggregate age groups
+
+  summarise_age_cat_female <- function(dat, age_cat) {
+
+    if(age_cat == "Y015_024"){age_groups <- c("Y015_019", "Y020_024")}
+    if(age_cat == "Y025_049"){age_groups <- c("Y025_029","Y030_034","Y035_039",
+                                              "Y040_044", "Y045_049")}
+    if(age_cat == "Y015_049"){age_groups <- c("Y015_019", "Y020_024","Y025_029",
+                                              "Y030_034","Y035_039","Y040_044",
+                                              "Y045_049")}
+
+
+    dat %>%
+      dplyr::group_by(area_id, area_name, sex, calendar_quarter) %>%
+      dplyr::summarise(
+        "population" = sum(population * as.integer(age_group %in% age_groups)),
+        "plhiv" = sum(plhiv * as.integer(age_group %in% age_groups)),
+        "infections" = sum(infections *as.integer(age_group %in% age_groups)),
+        "population_nosex12m" = sum(population_nosex12m * as.integer(age_group %in% age_groups)),
+        "population_sexcohab" = sum(population_sexcohab * as.integer(age_group %in% age_groups)),
+        "population_sexnonreg" = sum(population_sexnonreg * as.integer(age_group %in% age_groups)),
+        "population_sexpaid12m" = sum(population_sexpaid12m * as.integer(age_group %in% age_groups)),
+        "plhiv_nosex12m" = sum(plhiv_nosex12m * as.integer(age_group %in% age_groups)),
+        "plhiv_sexnonreg" = sum(plhiv_sexnonreg * as.integer(age_group %in% age_groups)),
+        "plhiv_sexpaid12m" = sum(plhiv_sexpaid12m * as.integer(age_group %in% age_groups)),
+        "plhiv_sexcohab" = sum(plhiv_sexcohab * as.integer(age_group %in% age_groups)),
+        "susceptible_nosex12m" = sum(susceptible_nosex12m * as.integer(age_group %in% age_groups)),
+        "susceptible_sexcohab" = sum(susceptible_sexcohab * as.integer(age_group %in% age_groups)),
+        "susceptible_sexnonreg" = sum(susceptible_sexnonreg * as.integer(age_group %in% age_groups)),
+        "susceptible_sexpaid12m" = sum(susceptible_sexpaid12m * as.integer(age_group %in% age_groups)),
+        "infections_nosex12m" = sum(infections_nosex12m * as.integer(age_group %in% age_groups)),
+        "infections_sexcohab" = sum(infections_sexcohab * as.integer(age_group %in% age_groups)),
+        "infections_sexnonreg" = sum(infections_sexnonreg * as.integer(age_group %in% age_groups)),
+        "infections_sexpaid12m" = sum(infections_sexpaid12m * as.integer(age_group %in% age_groups)),
+        .groups = "drop") %>%
+      dplyr::mutate(age_group = age_cat,
+                    nosex12m = susceptible_nosex12m/(population-plhiv),
+                    sexcohab = susceptible_sexcohab/(population-plhiv),
+                    sexnonregplus = sum(susceptible_sexnonreg,susceptible_sexpaid12m)/(population-plhiv),
+                    sexnonreg = susceptible_sexnonreg/(population-plhiv),
+                    sexpaid12m = susceptible_sexpaid12m/(population-plhiv),
+                    incidence = (infections/(population-plhiv))*100,
+                    incidence_nosex12m = infections_nosex12m/susceptible_nosex12m,
+                    incidence_sexcohab = infections_sexcohab/susceptible_sexcohab,
+                    incidence_sexnonreg = infections_sexnonreg/susceptible_sexnonreg,
+                    incidence_sexpaid12m = infections_sexpaid12m/susceptible_sexpaid12m,
+                    prev_nosex12m = plhiv_nosex12m/(susceptible_nosex12m + plhiv_nosex12m),
+                    prev_sexcohab = plhiv_sexcohab/(susceptible_sexcohab + plhiv_sexcohab),
+                    prev_sexnonreg = plhiv_sexnonreg/(susceptible_sexnonreg + plhiv_sexnonreg),
+                    prev_sexpaid12m = plhiv_sexpaid12m/(susceptible_sexpaid12m + plhiv_sexpaid12m),
+                    rr_sexpaid12m = NA)
+  }
+
+  # Aggregate data
+  df2 <- dplyr::bind_rows(summarise_age_cat_female(df1, "Y015_024"),
+                          summarise_age_cat_female(df1, "Y025_049"),
+                          summarise_age_cat_female(df1, "Y015_049"))
+
+  # Calculate incidence
+  df3 <- dplyr::bind_rows(df1, df2) %>%
+    dplyr::mutate(incidence_cat = cut(incidence,
+                                      c(0, 0.3, 1, 3, 10^6),
+                                      labels = c("Low", "Moderate", "High", "Very High"),
+                                      include.lowest = TRUE, right = TRUE))
+
+  #' Check that sum of disaggregated infections is the same as total infections
+  sum_infections <- df3$infections_nosex12m + df3$infections_sexcohab + df3$infections_sexnonreg + df3$infections_sexpaid12m
+
+  # TO DO: Flag this to add in warning - stop please contact support (usually an issue with mapping boundaries)
+  # ADD IN WARNIING HERE
+  stopifnot(max(df3$infections - sum_infections) < 10^{-9})
+
+
+  df3 %>%
+    dplyr::mutate(concat = paste0(area_id, age_group), iso3 = options$area_scope) %>%
+    dplyr::select(area_id, age_group, concat,
+                  nosex12m, sexcohab, sexnonregplus, sexnonreg, sexpaid12m,
+                  iso3, area_level,
+                  population, plhiv, infections, incidence, incidence_cat,
+                  prev_nosex12m, prev_sexcohab, prev_sexnonreg, prev_sexpaid12m,
+                  rr_sexpaid12m, rr_sexnonreg,
+                  population_nosex12m, population_sexcohab,
+                  population_sexnonreg, population_sexpaid12m,
+                  plhiv_nosex12m, plhiv_sexcohab,
+                  plhiv_sexnonreg, plhiv_sexpaid12m,
+                  susceptible_nosex12m, susceptible_sexcohab,
+                  susceptible_sexnonreg, susceptible_sexpaid12m,
+                  incidence_nosex12m, incidence_sexcohab,
+                  incidence_sexnonreg, incidence_sexpaid12m,
+                  infections_nosex12m,infections_sexcohab,
+                  infections_sexnonreg, infections_sexpaid12m,
+                  incidence_cat)
+
+}
+
+#' Calculate incidence in high risk male key populations
+#'
+#' @param outputs Naomi output.
+#' @param options Naomi options extracted from outputs
+#' @param male_srb Estimates of male sexual risk groups generated by `agyw_adjust_sexbehav_msm_pwid()`
+#' @param male_logit_prevalence Risk adjusted estimates of male prevalence in sexual risk groups generated by `agyw_calculate_prevalence_male()`
+#' @param survey_year NOTES:: add in description + when this should be adjusted. Hardcoded to 2018.
+#'
+#' @return Wide format output required for the AGYW workbook
+#'
+#' @export
+
+agyw_calculate_incidence_male <- function(outputs,
+                                          options,
+                                          male_srb,
+                                          male_logit_prevalence,
+                                          survey_year = 2018) {
+
+
+  naomi_indicators <- outputs$indicators %>%
+    dplyr::select(-lower,-upper,-se, -median, -mode, -indicator_label,
+                  -age_group_label) %>%
+    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
+                  indicator %in% c("population", "plhiv","prevalence","infections", "incidence"),
+                  sex == "male") %>%
+    tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
+    dplyr::mutate(
+      #' In terms of new infections per hundred person years (Naomi estimate is per 1000 person years)
+      incidence = incidence * 10,
+      incidence_cat = cut(incidence,
+                          c(0, 0.3, 1, 3, 10^6),
+                          labels = c("Low", "Moderate", "High", "Very High"),
+                          include.lowest = TRUE, right = TRUE))
+
+  risk_group_prevalence <- male_logit_prevalence %>%
+    dplyr::select(area_id, age_group, starts_with("prev_"))
+
+  df <- male_srb %>%
+    dplyr::filter(year == survey_year) %>%
+    dplyr::select(area_id, age_group, indicator, estimate_smoothed) %>%
+    tidyr::pivot_wider(names_from = indicator, values_from = estimate_smoothed, values_fn = mean) %>%
+    dplyr::left_join(naomi_indicators, by = dplyr::join_by(area_id, age_group)) %>%
+    dplyr::left_join(risk_group_prevalence, by = dplyr::join_by(area_id, age_group)) %>%
+    dplyr::filter(!is.na(population))
+
+
+  # NOTES/SOURCE??
+  rr_sexcohab <- 1
+  rr_sexnonreg_young <- 1.89
+  rr_sexnonreg_old <- 2.1
+
+  #' TODO: Get distributions on these and using a sampling method to get uncertainty in economic analysis e.g.
+  rr_sexnonreg_se <- 0.2
+  rr_sexnonreg_se <- 1
+
+
+  #' Calculate risk group incidence
+  Y015_024 <-  c("Y015_019", "Y020_024")
+  Y025_049 <-  c("Y025_029","Y030_034","Y035_039","Y040_044", "Y045_049")
+
+  df1 <- df %>%
+    dplyr::mutate(
+      msm_pr = prev_msm/ prevalence,
+      pwid_pr = prev_pwid / prevalence,
+      # correcting since the reference cat is reg cohabiting not gen pop
+      # need more sustainable fix for this
+      rr_msm = ifelse(msm_pr>2.5, msm_pr, 2.5),
+      rr_pwid = ifelse(pwid_pr>2.5, pwid_pr, 2.5),
+      rr_sexnonreg = dplyr::case_when(
+        age_group %in% Y015_024 ~ rr_sexnonreg_young,
+        age_group %in% Y025_049 ~ rr_sexnonreg_old,
+        TRUE ~ NA_real_),
+      population_nosex12m = population * nosex12m,
+      population_sexcohab = population * sexcohab,
+      population_sexnonreg = population * sexnonreg,
+      population_msm = population * msm,
+      population_pwid = population * pwid,
+      plhiv_nosex12m = population_nosex12m * prev_nosex12m,
+      plhiv_sexcohab = population_sexcohab * prev_sexcohab,
+      plhiv_sexnonreg = population_sexnonreg * prev_sexnonreg,
+      plhiv_msm = population_msm * prev_msm,
+      plhiv_pwid = population_pwid * prev_pwid,
+      susceptible_nosex12m = population_nosex12m - plhiv_nosex12m,
+      susceptible_sexcohab = population_sexcohab - plhiv_sexcohab,
+      susceptible_sexnonreg = population_sexnonreg - plhiv_sexnonreg,
+      susceptible_msm = population_msm - plhiv_msm,
+      susceptible_pwid = population_pwid - plhiv_pwid,
+      incidence_nosex12m = 0,
+      incidence_sexcohab = infections / (susceptible_sexcohab +
+                                           rr_sexnonreg * susceptible_sexnonreg + rr_msm * susceptible_msm +
+                                           rr_pwid * susceptible_pwid),
+      incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
+      incidence_msm = incidence_sexcohab * rr_msm,
+      incidence_pwid = incidence_sexcohab * rr_pwid,
+      infections_nosex12m = 0,
+      infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
+      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
+      infections_msm = susceptible_msm * incidence_msm,
+      infections_pwid = susceptible_pwid * incidence_pwid
+    )
+
+  #' Calculate risk group incidence for aggregate age groups
+
+  summarise_age_cat_male <- function(dat, age_cat) {
+
+    if(age_cat == "Y015_024"){age_groups <- c("Y015_019", "Y020_024")}
+    if(age_cat == "Y025_049"){age_groups <- c("Y025_029","Y030_034","Y035_039",
+                                              "Y040_044", "Y045_049")}
+    if(age_cat == "Y015_049"){age_groups <- c("Y015_019", "Y020_024","Y025_029",
+                                              "Y030_034","Y035_039","Y040_044",
+                                              "Y045_049")}
+
+    dat %>%
+      dplyr::group_by(area_id, area_name, sex, calendar_quarter) %>%
+      dplyr::summarise(
+        "population" = sum(population * as.integer(age_group %in% age_groups)),
+        "plhiv" = sum(plhiv * as.integer(age_group %in% age_groups)),
+        "infections" = sum(infections *as.integer(age_group %in% age_groups)),
+        "population_nosex12m" = sum(population_nosex12m * as.integer(age_group %in% age_groups)),
+        "population_sexcohab" = sum(population_sexcohab * as.integer(age_group %in% age_groups)),
+        "population_sexnonreg" = sum(population_sexnonreg * as.integer(age_group %in% age_groups)),
+        "population_msm" = sum(population_pwid * as.integer(age_group %in% age_groups)),
+        "population_pwid" = sum(population_pwid * as.integer(age_group %in% age_groups)),
+        "plhiv_nosex12m" = sum(plhiv_nosex12m * as.integer(age_group %in% age_groups)),
+        "plhiv_sexnonreg" = sum(plhiv_sexnonreg * as.integer(age_group %in% age_groups)),
+        "plhiv_sexcohab" = sum(plhiv_sexcohab * as.integer(age_group %in% age_groups)),
+        "plhiv_msm" = sum(plhiv_msm * as.integer(age_group %in% age_groups)),
+        "plhiv_pwid" = sum(plhiv_pwid * as.integer(age_group %in% age_groups)),
+        "susceptible_nosex12m" = sum(susceptible_nosex12m * as.integer(age_group %in% age_groups)),
+        "susceptible_sexcohab" = sum(susceptible_sexcohab * as.integer(age_group %in% age_groups)),
+        "susceptible_sexnonreg" = sum(susceptible_sexnonreg * as.integer(age_group %in% age_groups)),
+        "susceptible_msm" = sum(susceptible_msm * as.integer(age_group %in% age_groups)),
+        "susceptible_pwid" = sum(susceptible_pwid * as.integer(age_group %in% age_groups)),
+        "infections_nosex12m" = sum(infections_nosex12m * as.integer(age_group %in% age_groups)),
+        "infections_sexcohab" = sum(infections_sexcohab * as.integer(age_group %in% age_groups)),
+        "infections_sexnonreg" = sum(infections_sexnonreg * as.integer(age_group %in% age_groups)),
+        "infections_msm" = sum(infections_msm * as.integer(age_group %in% age_groups)),
+        "infections_pwid" = sum(infections_pwid * as.integer(age_group %in% age_groups)),
+        .groups = "drop") %>%
+      dplyr::mutate(age_group = age_cat,
+                    nosex12m = susceptible_nosex12m/(population-plhiv),
+                    sexcohab = susceptible_sexcohab/(population-plhiv),
+                    sexnonregplus = sum(susceptible_sexnonreg)/(population-plhiv),
+                    sexnonreg = susceptible_sexnonreg/(population-plhiv),
+                    msm = susceptible_msm/(population-plhiv),
+                    pwid = susceptible_pwid/(population-plhiv),
+                    incidence = (infections/(population-plhiv))*100,
+                    incidence_nosex12m = infections_nosex12m/susceptible_nosex12m,
+                    incidence_sexcohab = infections_sexcohab/susceptible_sexcohab,
+                    incidence_sexnonreg = infections_sexnonreg/susceptible_sexnonreg,
+                    incidence_msm = infections_msm/susceptible_msm,
+                    incidence_pwid = infections_pwid/susceptible_pwid,
+                    prev_nosex12m = plhiv_nosex12m/(susceptible_nosex12m + plhiv_nosex12m),
+                    prev_sexcohab = plhiv_sexcohab/(susceptible_sexcohab + plhiv_sexcohab),
+                    prev_sexnonreg = plhiv_sexnonreg/(susceptible_sexnonreg + plhiv_sexnonreg),
+                    prev_msm = plhiv_msm/(susceptible_msm + plhiv_msm),
+                    prev_pwid = plhiv_pwid/(susceptible_pwid+ plhiv_pwid),
+                    rr_msm = NA,
+                    rr_pwid = NA)
+  }
+
+  # Aggregate data
+  df2 <- dplyr::bind_rows(summarise_age_cat_male(df1, "Y015_024"),
+                          summarise_age_cat_male(df1, "Y025_049"),
+                          summarise_age_cat_male(df1, "Y015_049"))
+
+  # Calculate incidence
+  df3 <- dplyr::bind_rows(df1, df2) %>%
+    dplyr::mutate(incidence_cat = cut(incidence,
+                                      c(0, 0.3, 1, 3, 10^6),
+                                      labels = c("Low", "Moderate", "High", "Very High"),
+                                      include.lowest = TRUE, right = TRUE))
+
+
+
+  #' Check that sum of disaggregated infections is the same as total infections
+  #  TO DO: add warning for sum not matching - contact admin
+  sum_infections <- df3$infections_nosex12m + df3$infections_sexcohab + df3$infections_sexnonreg + df3$infections_msm + df3$infections_pwid
+  stopifnot(max(df3$infections - sum_infections) < 10^{-9})
+
+
+  df3 %>%
+    dplyr::mutate(concat = paste0(area_id, age_group), iso3 = options$area_scope) %>%
+    dplyr::select(area_id, age_group, concat,
+                  nosex12m, sexcohab, sexnonregplus, sexnonreg, msm, pwid,
+                  iso3, area_level,
+                  population, plhiv, infections, incidence, incidence_cat,
+                  prev_nosex12m, prev_sexcohab,
+                  prev_sexnonreg, prev_msm, prev_pwid,
+                  rr_msm, rr_pwid, rr_sexnonreg,
+                  population_nosex12m, population_sexcohab,
+                  population_sexnonreg, population_msm, population_pwid,
+                  plhiv_nosex12m, plhiv_sexcohab,
+                  plhiv_sexnonreg, plhiv_msm, plhiv_pwid,
+                  susceptible_nosex12m, susceptible_sexcohab,
+                  susceptible_sexnonreg, susceptible_msm, susceptible_pwid,
+                  incidence_nosex12m, incidence_sexcohab,
+                  incidence_sexnonreg, incidence_msm, incidence_pwid,
+                  infections_nosex12m,infections_sexcohab,
+                  infections_sexnonreg, infections_msm, infections_pwid)
+
+}
+
+
+
+
+#' Calculate incidence in high risk male key populations
+#'
+#' @param outputs Naomi output.
+#' @param options Naomi options extracted from outputs
+#' @param male_srb Estimates of male sexual risk groups generated by `agyw_adjust_sexbehav_msm_pwid()`
+#' @param male_logit_prevalence Risk adjusted estimates of male prevalence in sexual risk groups generated by `agyw_calculate_prevalence_male()`
+#' @param survey_year NOTES:: add in description + when this should be adjusted. Hardcoded to 2018.
+#'
+#' @return Wide format output required for the AGYW workbook
+#'
+#' @export
+
+
+
+agyw_generate_risk_populations <- function(naomi_output) {
+
+
+  # Read in naomi outputs
+  if(tolower(tools::file_ext(naomi_output)) %in% c("rds", "qs")) {
+    # Read files if hintr rds provided
+    model_object <- read_hintr_output(naomi_output)
+    outputs <- model_object$output_package
+    options <- yaml::read_yaml(text = model_object$info$options.yml)
+
+  } else if(grepl("\\.zip$", naomi_output)) {
+    # Read files if output zip is provided
+    output_zip <- naomi_output
+    outputs <- naomi::read_output_package(output_zip)
+    options <- unz(output_zip, "info/options.yml")
+    options <- yaml::read_yaml(options)
+  }
+
+
+
+  #' Disaggregate KP PSEs to five-year age-bands using Naomi population
+
+  #' Naomi population
+  #'
+  naomi_pop <- outputs$indicators %>%
+    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
+                  indicator == "population") %>%
+    dplyr::mutate(iso3 = options$area_scope) %>%
+    dplyr::select(iso3, area_id, area_level,sex, age_group, area_level, population = mean)
+
+  #' Disaggregate KP PSEs from Oli's analysis to 5-year bands
+  fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop)
+  pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop)
+  msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop)
+
+  #' Adjust SAE model output with KP proportions
+  female_srb <- agyw_adjust_sexbehav_fsw(outputs, options, fsw_est)
+  male_srb <- agyw_adjust_sexbehav_msm_pwid(outputs, options, msm_est, pwid_est)
+
+  #' Calculate risk group prevalence
+  female_logit_prevalence <- agyw_calculate_prevalence_female(outputs,
+                                                              options,
+                                                              fsw_est,
+                                                              female_srb)
+
+  male_logit_prevalence <- agyw_calculate_prevalence_male(outputs,
+                                                          options,
+                                                          msm_est,
+                                                          male_srb)
+
+  #' Calculate risk group incidence
+  female_incidence <- agyw_calculate_incidence_female(outputs, options,
+                                                      female_srb,
+                                                      female_logit_prevalence)
+
+  male_incidence <- agyw_calculate_incidence_male(outputs, options,
+                                                  male_srb,
+                                                  male_logit_prevalence)
+
+  # ' Get Naomi output
+  naomi_output <- agyw_format_naomi(outputs, options)
+
+  v <- list(female_incidence = female_incidence,
+            male_incidence = male_incidence,
+            naomi_output = naomi_output)
+
+  v
+
+}
+

From b1607a751506eee77234f8ed63d9481ad9760925 Mon Sep 17 00:00:00 2001
From: Rob Ashton <r.ashton@imperial.ac.uk>
Date: Mon, 6 Nov 2023 10:27:36 +0000
Subject: [PATCH 03/53] Add WIP

---
 R/downloads.R                   | 6 ++++++
 tests/testthat/test-downloads.R | 7 ++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/R/downloads.R b/R/downloads.R
index 7d18ddb8..5403a888 100644
--- a/R/downloads.R
+++ b/R/downloads.R
@@ -111,6 +111,12 @@ hintr_prepare_agyw_download <- function(output, pjnz,
   assert_model_output_version(output, "2.7.16")
   progress <- new_simple_progress()
   progress$update_progress("PROGRESS_DOWNLOAD_AGYW")
+
+  template_path <- naomi.resources::get_workbook_template_path()
+  browser()
+
+  risk_populations <- agyw_generate_risk_populations(output)
+
   dummy_data <- data.frame(x = c(1, 2, 3), y = c(3, 4, 5))
   writexl::write_xlsx(list(sheet = dummy_data), path = path)
 
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index 8f742a6f..a6c1b217 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -164,11 +164,12 @@ test_that("comparison report download can be created", {
 
 test_that("AGYW download can be created", {
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
-  with_mock(new_simple_progress = mock_new_simple_progress, {
+  with_mocked_bindings(
     messages <- naomi_evaluate_promise(
       out <- hintr_prepare_agyw_download(a_hintr_output_calibrated,
-                                         a_hintr_data$pjnz))
-  })
+                                         a_hintr_data$pjnz)),
+    new_simple_progress = mock_new_simple_progress)
+
   expect_true(file.exists(out$path))
 
   expect_type(out$metadata$description, "character")

From f68acc0fe49982878adf08f74aa1e2abfd4e6727 Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Mon, 6 Nov 2023 11:25:33 +0000
Subject: [PATCH 04/53] Add WIP impl

---
 R/agyw-integration.R            | 11 +++++------
 R/downloads.R                   |  6 +++---
 R/utils.R                       | 23 +++++++++++++++++++++++
 tests/testthat/test-downloads.R |  3 +++
 tests/testthat/test-utils.R     |  6 ++++++
 5 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 17ebe122..80c85069 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -1405,15 +1405,14 @@ agyw_calculate_incidence_male <- function(outputs,
 
 agyw_generate_risk_populations <- function(naomi_output) {
 
-
   # Read in naomi outputs
-  if(tolower(tools::file_ext(naomi_output)) %in% c("rds", "qs")) {
+  if (tolower(tools::file_ext(naomi_output)) %in% c("rds", "qs")) {
     # Read files if hintr rds provided
     model_object <- read_hintr_output(naomi_output)
     outputs <- model_object$output_package
+    outputs$indicators$area_level <- area_level_from_id(outputs$indicators$area_id)
     options <- yaml::read_yaml(text = model_object$info$options.yml)
-
-  } else if(grepl("\\.zip$", naomi_output)) {
+  } else if (grepl("\\.zip$", naomi_output)) {
     # Read files if output zip is provided
     output_zip <- naomi_output
     outputs <- naomi::read_output_package(output_zip)
@@ -1422,7 +1421,7 @@ agyw_generate_risk_populations <- function(naomi_output) {
   }
 
 
-
+  browser()
   #' Disaggregate KP PSEs to five-year age-bands using Naomi population
 
   #' Naomi population
@@ -1431,7 +1430,7 @@ agyw_generate_risk_populations <- function(naomi_output) {
     dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
                   indicator == "population") %>%
     dplyr::mutate(iso3 = options$area_scope) %>%
-    dplyr::select(iso3, area_id, area_level,sex, age_group, area_level, population = mean)
+    dplyr::select(iso3, area_id, area_level, sex, age_group, population = mean)
 
   #' Disaggregate KP PSEs from Oli's analysis to 5-year bands
   fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop)
diff --git a/R/downloads.R b/R/downloads.R
index 5403a888..c8ba6509 100644
--- a/R/downloads.R
+++ b/R/downloads.R
@@ -112,10 +112,10 @@ hintr_prepare_agyw_download <- function(output, pjnz,
   progress <- new_simple_progress()
   progress$update_progress("PROGRESS_DOWNLOAD_AGYW")
 
-  template_path <- naomi.resources::get_workbook_template_path()
-  browser()
+  ## TODO: Remove hardcoding of ISO3
+  template_path <- naomi.resources::get_agyw_workbook_path("BWA")
 
-  risk_populations <- agyw_generate_risk_populations(output)
+  risk_populations <- agyw_generate_risk_populations(output$model_output_path)
 
   dummy_data <- data.frame(x = c(1, 2, 3), y = c(3, 4, 5))
   writexl::write_xlsx(list(sheet = dummy_data), path = path)
diff --git a/R/utils.R b/R/utils.R
index 6662f84c..a96258a1 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -97,6 +97,29 @@ vlapply <- function(X, FUN, ...) {
   vapply(X, FUN, ..., FUN.VALUE = logical(1))
 }
 
+vnapply <- function(X, FUN, ...) {
+  vapply(X, FUN, ..., FUN.VALUE = numeric(1))
+}
+
+vcapply <- function(X, FUN, ...) {
+  vapply(X, FUN, ..., FUN.VALUE = character(1))
+}
+
 is_empty <- function(x) {
   length(x) == 0 || is.null(x) || is.na(x) || !nzchar(x)
 }
+
+area_level_from_id <- function(area_ids) {
+  ## Area ids are of format <iso3>_<area_level>_<area_num>
+  ## so we can split and return the 2nd
+  split_ids <- strsplit(area_ids, "_")
+  vnapply(split_ids, function(id)  {
+    if (length(id) == 1) {
+      ## This is our top level ID i.e. level = 0
+      level = 0
+    } else {
+      level = as.numeric(id[2])
+    }
+    level
+  })
+}
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index a6c1b217..7c319b92 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -163,6 +163,9 @@ test_that("comparison report download can be created", {
 })
 
 test_that("AGYW download can be created", {
+  out <- hintr_prepare_agyw_download(a_hintr_output_calibrated,
+                                     a_hintr_data$pjnz)
+
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
   with_mocked_bindings(
     messages <- naomi_evaluate_promise(
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 96af513f..793ede42 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -26,3 +26,9 @@ test_that("is_empty", {
   expect_true(is_empty(c()))
   expect_false(is_empty(c("things")))
 })
+
+test_that("can get area level from area id", {
+  expect_equal(area_level_from_id("MWI_2_3"), 2)
+  expect_equal(area_level_from_id("MWI"), 0)
+  expect_equal(area_level_from_id(c("MWI_4_3", "MWI_23_25")), c(4, 23))
+})

From 0769c866d0c880097fa928b2aa059d08e1ff2aae Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Tue, 14 Nov 2023 11:34:15 +0000
Subject: [PATCH 05/53] fix incidence calculations

---
 R/agyw-integration.R | 176 ++++++++++++++++++++++++-------------------
 1 file changed, 98 insertions(+), 78 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 17ebe122..f3912c31 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -11,11 +11,9 @@
 agyw_format_naomi <- function(outputs, options){
 
   naomi_ind <- outputs$indicators %>%
-    dplyr::filter(indicator %in% c("population", "plhiv", "infections","incidence"),
-                  calendar_quarter == options$calendar_quarter_t2,
-                  area_level == options$area_level) %>%
-    dplyr::mutate(mean = dplyr::if_else(indicator == "incidence", mean/100, mean))
-
+    dplyr::filter(indicator %in% c("population", "plhiv", "infections","incidence",
+                                   "prevalence"),
+                  calendar_quarter == options$calendar_quarter_t2)
 
 
   summarise_naomi_ind <- function(dat, age_cat) {
@@ -25,17 +23,19 @@ agyw_format_naomi <- function(outputs, options){
                                               "Y040_044", "Y045_049")}
 
     dat %>%
-      dplyr::select(area_id, area_name, age_group, sex, indicator, mean) %>%
+      dplyr::select(area_id, area_name, area_level,calendar_quarter,
+                    age_group, sex, indicator, mean) %>%
       tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
-      dplyr::group_by(area_id, area_name, sex) %>%
+      dplyr::group_by(area_id, area_name, area_level, calendar_quarter, sex) %>%
       dplyr::summarise(
         "population" = sum(population * as.integer(age_group %in% age_groups)),
         "plhiv" = sum(plhiv * as.integer(age_group %in% age_groups)),
         "infections" = sum(infections * as.integer(age_group %in% age_groups)),
         .groups = "drop") %>%
       dplyr::mutate(age_group = age_cat,
-                    incidence = (infections/(population-plhiv))*100) %>%
-      tidyr::pivot_longer(cols = c(population, plhiv, infections, incidence),
+                    incidence = (infections/(population-plhiv)),
+                    prevalence = plhiv/population) %>%
+      tidyr::pivot_longer(cols = c(population, plhiv, infections, incidence, prevalence),
                           names_to = "indicator",
                           values_to = "mean") %>%
       dplyr::mutate(age_group_label = dplyr::if_else(age_group == "Y015_024", "15-24", "25-49"))
@@ -52,7 +52,11 @@ agyw_format_naomi <- function(outputs, options){
     dplyr::select(names(df1)) %>%
     # Add aggregate indicators
     dplyr::bind_rows(df1) %>%
-    # Format for workbook
+    # Format incidence from 1000 person years to 100 person years
+    dplyr::mutate(mean = dplyr::if_else(indicator == "incidence", mean * 100, mean))
+
+  # Format for workbook
+  df3 <- df2 %>%
     dplyr::mutate(indicator = dplyr::recode(indicator,
                                             "population" = "Pop", "plhiv" = "PLHIV",
                                             "infections" = "new","incidence" = "Inci"),
@@ -62,28 +66,30 @@ agyw_format_naomi <- function(outputs, options){
                   mean = as.character(mean))
 
   # Incidence categories
-  df3 <- df2 %>%
+  df4 <- df3 %>%
     dplyr::filter(indicator == "Inci") %>%
-    dplyr::mutate(mean = dplyr::case_when(mean<0.003 ~ "Low",
-                                          mean>=0.003 & mean<0.01 ~ "Moderate",
-                                          mean>=0.01 & mean<0.03 ~ "High",
-                                          mean>=0.03 ~ "Very High",
+    dplyr::mutate(mean = dplyr::case_when(mean < 0.3 ~ "Low",
+                                          mean >= 0.3 & mean< 1 ~ "Moderate",
+                                          mean >= 1 & mean < 3 ~ "High",
+                                          mean >= 3 ~ "Very High",
                                           TRUE ~ NA_character_),
                   indicator = "Incicategory")
 
   # Incidence for all age groups + sexes
-  df4 <- naomi_ind %>%
-    dplyr::filter(indicator == "incidence", age_group == "Y000_999", sex == "both")
+  df5 <- naomi_ind %>%
+    dplyr::filter(indicator == "incidence", age_group == "Y000_999", sex == "both",
+                  area_level == options$area_level)
 
   country <- outputs$meta_area$area_name[outputs$meta_area$area_id == options$area_scope]
 
 
   # Format
-  naomi_wide <- dplyr::bind_rows(df2, df3) %>%
+  naomi_wide <- dplyr::bind_rows(df3, df4) %>%
+  dplyr::filter(area_level == options$area_level) %>%
   tidyr::pivot_wider(id_cols = c(area_id,area_name),
                      names_from = c(indicator,age_group_label,sex),
                      names_sep = "", values_from = mean) %>%
-    dplyr::mutate(Country = country, newAll = df4$mean) %>%
+    dplyr::mutate(Country = country, newAll = df5$mean) %>%
     dplyr::select(Country,area_id,area_name,`Pop15-24all`,`Pop15-24f`,`Pop15-24m`,
                   `PLHIV15-24all`,`PLHIV15-24f`,`PLHIV15-24m`,
                   newAll, `new15-24all`,`new15-24f`,`new15-24m`,
@@ -125,7 +131,8 @@ agyw_format_naomi <- function(outputs, options){
                   `new15-49all`,`new15-49f`,`new15-49m`,
                   `Inci15-49f`,`Incicategory15-49f`,`Inci15-49m`,`Incicategory15-49m`)
 
-  naomi_wide
+  v <- list(naomi_long = df2,
+            naomi_wide = naomi_wide)
 
   }
 
@@ -566,12 +573,18 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
 #' 3.
 #' 4.
 #'
-agyw_calculate_prevalence_female <- function(outputs,
+agyw_calculate_prevalence_female <- function(naomi_output,
                                              options,
                                              fsw_est,
                                              female_srb,
                                              survey_year_sample = 2018) {
 
+
+  # 'Naomi general population prevalence
+  naomi_gen_pop_prev <- naomi_output %>%
+    dplyr::filter(sex == "female", age_group == "Y015_049", indicator == "prevalence") %>%
+    dplyr::select(area_id, gen_prev = mean)
+
   #' Extract country specific national FSW prevalence
   iso3 <- options$area_scope
   pse <- naomi.resources::load_agyw_exdata("fsw_pse", iso3)
@@ -617,10 +630,7 @@ agyw_calculate_prevalence_female <- function(outputs,
     ) %>%
     tidyr::separate(indicator, into = c("behav", "indicator"))
 
-  naomi_gen_pop_prev <- outputs$indicators %>%
-    dplyr::filter(calendar_quarter == options$calendar_quarter_t2, sex == "female",
-                  age_group == "Y015_049", indicator == "prevalence") %>%
-    dplyr::select(area_id, gen_prev = mean)
+
 
   kp_prev <- fsw_prev %>%
     dplyr::select(iso3,area_id,median) %>%
@@ -632,6 +642,8 @@ agyw_calculate_prevalence_female <- function(outputs,
   kp_fit <- lm(prev_fsw_logodds ~ prev_logodds, data = kp_prev)
 
 
+
+
   ind_dat <- ind %>%
     dplyr::mutate(
       nosex12m_id = ifelse(behav == "nosex12m", 1, 0),
@@ -642,6 +654,8 @@ agyw_calculate_prevalence_female <- function(outputs,
       year = as.numeric(substr(survey_id,4,7))) %>%
     dplyr::filter(indicator == "prevalence", !is.na(estimate))
 
+
+
   #' Younger age groups regression
   fit_y <- glm(estimate ~ -1 + all_id + nosex12m_id + sexcohab_id + sexnonreg_id + sexpaid12m_id,
                family = quasibinomial(link = "logit"),
@@ -750,20 +764,18 @@ agyw_calculate_prevalence_female <- function(outputs,
 #' 4.
 #'
 
-agyw_calculate_prevalence_male <- function(outputs,
+agyw_calculate_prevalence_male <- function(naomi_output,
                                            options,
                                            msm_est,
                                            male_srb,
                                            survey_year_sample = 2018) {
 
 
-
   # Naomi general population prevalence
-  naomi_gen_pop_prev <- outputs$indicators %>%
-    dplyr::filter(calendar_quarter == options$calendar_quarter_t2, sex == "male",
-                  age_group == "Y015_049", indicator == "prevalence") %>%
+  naomi_gen_pop_prev <- naomi_output %>%
+    dplyr::filter(sex == "male", age_group == "Y015_024", indicator == "prevalence") %>%
     dplyr::mutate(logit_gen_prev = log(mean / (1-mean))) %>%
-    dplyr::select(area_id, logit_gen_prev, area_level)
+    dplyr::select(area_id, prevalence = mean, logit_gen_prev, , area_level)
 
 
   #' Extract country specific national MSM + PWID prevalence
@@ -792,8 +804,11 @@ agyw_calculate_prevalence_male <- function(outputs,
                   pwid_lor = median_PWID - logit_gen_prev_PWID) %>%
     dplyr::select(-c("logit_gen_prev_PWID","logit_gen_prev_MSM","median_PWID","median_MSM","area_level"))
 
+
+  # ----------------------------------------------------------------------------
   #' Format SRB survey estimates
 
+
   srb_survey <- naomi.resources::load_agyw_exdata("srb_survey_male", iso3)
 
   prev_wide <- srb_survey %>%
@@ -862,6 +877,8 @@ agyw_calculate_prevalence_male <- function(outputs,
   lor_o <- log(odds_estimate / odds_estimate[1])
   lor_o <- lor_o[-1]
 
+  # ----------------------------------------------------------------------------
+
 
   #' Naomi estimates of PLHIV and population by district and age band
   age_groups <- c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
@@ -996,21 +1013,21 @@ logit_scale_prev <- function(lor, N_fine, plhiv) {
 #' 3.
 #' 4.
 
-agyw_calculate_incidence_female <- function(outputs,
+agyw_calculate_incidence_female <- function(naomi_output,
                                             options,
                                             female_srb,
                                             female_logit_prevalence,
                                             survey_year = 2018) {
 
-  naomi_incidence <- outputs$indicators %>%
-    dplyr::select(-lower,-upper,-se, -median, -mode, -indicator_label) %>%
-    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
-                  indicator %in% c("population", "plhiv", "infections", "incidence"),
-                  sex == "female") %>%
+  naomi_indicators <- naomi_output  %>%
+    dplyr::filter(indicator %in% c("population", "plhiv","prevalence","infections", "incidence"),
+                  sex == "female", area_level == options$area_level) %>%
     tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
-    #' In terms of new infections per hundred person years
-    #' (Naomi estimate is per 1000 person years)
-    dplyr::mutate(incidence = incidence / 10)
+    dplyr::mutate(
+      incidence_cat = cut(incidence,
+                          c(0, 0.3, 1, 3, 10^6),
+                          labels = c("Low", "Moderate", "High", "Very High"),
+                          include.lowest = TRUE, right = TRUE))
 
   risk_group_prevalence <- female_logit_prevalence %>%
     dplyr::select(area_id, age_group, starts_with("prev_"))
@@ -1019,7 +1036,7 @@ agyw_calculate_incidence_female <- function(outputs,
     dplyr::filter(year == survey_year) %>%
     dplyr::select(area_id, age_group, indicator, estimate_smoothed) %>%
     tidyr::pivot_wider(names_from = indicator, values_from = estimate_smoothed, values_fn = mean) %>%
-    dplyr::left_join(naomi_incidence, by = dplyr::join_by(area_id, age_group)) %>%
+    dplyr::left_join(naomi_indicators, by = dplyr::join_by(area_id, age_group)) %>%
     dplyr::left_join(risk_group_prevalence, by = dplyr::join_by(area_id, age_group)) %>%
     dplyr::filter(!is.na(population))
 
@@ -1100,8 +1117,8 @@ agyw_calculate_incidence_female <- function(outputs,
                                               "Y045_049")}
 
 
-    dat %>%
-      dplyr::group_by(area_id, area_name, sex, calendar_quarter) %>%
+    x <- dat %>%
+      dplyr::group_by(area_id, area_name, area_level, sex, calendar_quarter) %>%
       dplyr::summarise(
         "population" = sum(population * as.integer(age_group %in% age_groups)),
         "plhiv" = sum(plhiv * as.integer(age_group %in% age_groups)),
@@ -1122,11 +1139,11 @@ agyw_calculate_incidence_female <- function(outputs,
         "infections_sexcohab" = sum(infections_sexcohab * as.integer(age_group %in% age_groups)),
         "infections_sexnonreg" = sum(infections_sexnonreg * as.integer(age_group %in% age_groups)),
         "infections_sexpaid12m" = sum(infections_sexpaid12m * as.integer(age_group %in% age_groups)),
+        "sexnonregplus" = sum(susceptible_sexnonreg, susceptible_sexpaid12m)/(population-plhiv),
         .groups = "drop") %>%
       dplyr::mutate(age_group = age_cat,
                     nosex12m = susceptible_nosex12m/(population-plhiv),
                     sexcohab = susceptible_sexcohab/(population-plhiv),
-                    sexnonregplus = sum(susceptible_sexnonreg,susceptible_sexpaid12m)/(population-plhiv),
                     sexnonreg = susceptible_sexnonreg/(population-plhiv),
                     sexpaid12m = susceptible_sexpaid12m/(population-plhiv),
                     incidence = (infections/(population-plhiv))*100,
@@ -1179,7 +1196,9 @@ agyw_calculate_incidence_female <- function(outputs,
                   incidence_sexnonreg, incidence_sexpaid12m,
                   infections_nosex12m,infections_sexcohab,
                   infections_sexnonreg, infections_sexpaid12m,
-                  incidence_cat)
+                  incidence_cat) %>%
+    dplyr::mutate_if(is.numeric, as.numeric) %>%
+    dplyr::mutate_if(is.factor, as.character)
 
 }
 
@@ -1195,30 +1214,27 @@ agyw_calculate_incidence_female <- function(outputs,
 #'
 #' @export
 
-agyw_calculate_incidence_male <- function(outputs,
+agyw_calculate_incidence_male <- function(naomi_output,
                                           options,
                                           male_srb,
                                           male_logit_prevalence,
                                           survey_year = 2018) {
 
 
-  naomi_indicators <- outputs$indicators %>%
-    dplyr::select(-lower,-upper,-se, -median, -mode, -indicator_label,
-                  -age_group_label) %>%
-    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
-                  indicator %in% c("population", "plhiv","prevalence","infections", "incidence"),
-                  sex == "male") %>%
+  naomi_indicators <- naomi_output  %>%
+    dplyr::filter(indicator %in% c("population", "plhiv","prevalence","infections", "incidence"),
+                  sex == "male", area_level == options$area_level) %>%
     tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
     dplyr::mutate(
-      #' In terms of new infections per hundred person years (Naomi estimate is per 1000 person years)
-      incidence = incidence * 10,
       incidence_cat = cut(incidence,
                           c(0, 0.3, 1, 3, 10^6),
                           labels = c("Low", "Moderate", "High", "Very High"),
                           include.lowest = TRUE, right = TRUE))
 
   risk_group_prevalence <- male_logit_prevalence %>%
-    dplyr::select(area_id, age_group, starts_with("prev_"))
+    dplyr::select(area_id, age_group, gen_prev, starts_with( "prev_")) %>%
+    dplyr::mutate(msm_pr = prev_msm/ gen_prev,
+                  pwid_pr = prev_pwid / gen_prev)
 
   df <- male_srb %>%
     dplyr::filter(year == survey_year) %>%
@@ -1245,12 +1261,12 @@ agyw_calculate_incidence_male <- function(outputs,
 
   df1 <- df %>%
     dplyr::mutate(
-      msm_pr = prev_msm/ prevalence,
-      pwid_pr = prev_pwid / prevalence,
+      msm_pr = round(prev_msm/ prevalence, 2),
+      pwid_pr = round(prev_pwid / prevalence, 2),
       # correcting since the reference cat is reg cohabiting not gen pop
       # need more sustainable fix for this
-      rr_msm = ifelse(msm_pr>2.5, msm_pr, 2.5),
-      rr_pwid = ifelse(pwid_pr>2.5, pwid_pr, 2.5),
+      rr_msm = dplyr::if_else(msm_pr > 2.5, msm_pr, 2.5),
+      rr_pwid = dplyr::if_else(pwid_pr > 2.5, pwid_pr, 2.5),
       rr_sexnonreg = dplyr::case_when(
         age_group %in% Y015_024 ~ rr_sexnonreg_young,
         age_group %in% Y025_049 ~ rr_sexnonreg_old,
@@ -1296,7 +1312,7 @@ agyw_calculate_incidence_male <- function(outputs,
                                               "Y045_049")}
 
     dat %>%
-      dplyr::group_by(area_id, area_name, sex, calendar_quarter) %>%
+      dplyr::group_by(area_id, area_name, area_level, sex, calendar_quarter) %>%
       dplyr::summarise(
         "population" = sum(population * as.integer(age_group %in% age_groups)),
         "plhiv" = sum(plhiv * as.integer(age_group %in% age_groups)),
@@ -1321,11 +1337,11 @@ agyw_calculate_incidence_male <- function(outputs,
         "infections_sexnonreg" = sum(infections_sexnonreg * as.integer(age_group %in% age_groups)),
         "infections_msm" = sum(infections_msm * as.integer(age_group %in% age_groups)),
         "infections_pwid" = sum(infections_pwid * as.integer(age_group %in% age_groups)),
+        "sexnonregplus" = sum(susceptible_sexnonreg)/(population-plhiv),
         .groups = "drop") %>%
       dplyr::mutate(age_group = age_cat,
                     nosex12m = susceptible_nosex12m/(population-plhiv),
                     sexcohab = susceptible_sexcohab/(population-plhiv),
-                    sexnonregplus = sum(susceptible_sexnonreg)/(population-plhiv),
                     sexnonreg = susceptible_sexnonreg/(population-plhiv),
                     msm = susceptible_msm/(population-plhiv),
                     pwid = susceptible_pwid/(population-plhiv),
@@ -1361,6 +1377,7 @@ agyw_calculate_incidence_male <- function(outputs,
   #' Check that sum of disaggregated infections is the same as total infections
   #  TO DO: add warning for sum not matching - contact admin
   sum_infections <- df3$infections_nosex12m + df3$infections_sexcohab + df3$infections_sexnonreg + df3$infections_msm + df3$infections_pwid
+
   stopifnot(max(df3$infections - sum_infections) < 10^{-9})
 
 
@@ -1382,7 +1399,11 @@ agyw_calculate_incidence_male <- function(outputs,
                   incidence_nosex12m, incidence_sexcohab,
                   incidence_sexnonreg, incidence_msm, incidence_pwid,
                   infections_nosex12m,infections_sexcohab,
-                  infections_sexnonreg, infections_msm, infections_pwid)
+                  infections_sexnonreg, infections_msm, infections_pwid) %>%
+    dplyr::mutate_if(is.numeric, as.numeric) %>%
+    dplyr::mutate_if(is.factor, as.character)
+
+
 
 }
 
@@ -1421,17 +1442,15 @@ agyw_generate_risk_populations <- function(naomi_output) {
     options <- yaml::read_yaml(options)
   }
 
-
-
-  #' Disaggregate KP PSEs to five-year age-bands using Naomi population
+  #' Format naomi output
+  naomi <- agyw_format_naomi(outputs, options)
 
   #' Naomi population
-  #'
-  naomi_pop <- outputs$indicators %>%
-    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
-                  indicator == "population") %>%
-    dplyr::mutate(iso3 = options$area_scope) %>%
-    dplyr::select(iso3, area_id, area_level,sex, age_group, area_level, population = mean)
+  naomi_pop <- naomi$naomi_long %>%
+    dplyr::filter(indicator == "population") %>%
+    dplyr::select(area_id, area_level,sex, age_group, area_level, population = mean)
+
+  naomi_pop$iso3 <- options$area_scope
 
   #' Disaggregate KP PSEs from Oli's analysis to 5-year bands
   fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop)
@@ -1443,31 +1462,32 @@ agyw_generate_risk_populations <- function(naomi_output) {
   male_srb <- agyw_adjust_sexbehav_msm_pwid(outputs, options, msm_est, pwid_est)
 
   #' Calculate risk group prevalence
-  female_logit_prevalence <- agyw_calculate_prevalence_female(outputs,
+  female_logit_prevalence <- agyw_calculate_prevalence_female(naomi$naomi_long,
                                                               options,
                                                               fsw_est,
                                                               female_srb)
 
-  male_logit_prevalence <- agyw_calculate_prevalence_male(outputs,
+  male_logit_prevalence <- agyw_calculate_prevalence_male(naomi$naomi_long,
                                                           options,
                                                           msm_est,
                                                           male_srb)
 
   #' Calculate risk group incidence
-  female_incidence <- agyw_calculate_incidence_female(outputs, options,
+  female_incidence <- agyw_calculate_incidence_female(naomi$naomi_long,
+                                                      options,
                                                       female_srb,
                                                       female_logit_prevalence)
 
-  male_incidence <- agyw_calculate_incidence_male(outputs, options,
+  male_incidence <- agyw_calculate_incidence_male(naomi$naomi_long,
+                                                  options,
                                                   male_srb,
                                                   male_logit_prevalence)
 
-  # ' Get Naomi output
-  naomi_output <- agyw_format_naomi(outputs, options)
+
 
   v <- list(female_incidence = female_incidence,
             male_incidence = male_incidence,
-            naomi_output = naomi_output)
+            naomi_output = naomi$naomi_wide)
 
   v
 

From d824b8818cabe80bb363de64315d8547e342e780 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Wed, 15 Nov 2023 15:40:41 +0000
Subject: [PATCH 06/53] remove survey data from prevalence calculation

---
 R/agyw-integration.R | 266 +++++++++++--------------------------------
 1 file changed, 68 insertions(+), 198 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index f3912c31..03e2d41a 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -579,11 +579,19 @@ agyw_calculate_prevalence_female <- function(naomi_output,
                                              female_srb,
                                              survey_year_sample = 2018) {
 
+  #' Naomi estimates of PLHIV and population by district and age band
+  naomi_est <- naomi_output %>%
+    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
+                  sex == "female",
+                  indicator %in% c("population", "plhiv", "infections", "prevalence")) %>%
+    dplyr::select(area_id, area_level, age_group, indicator, mean) %>%
+    tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
+    dplyr::rename(gen_prev = prevalence)
 
-  # 'Naomi general population prevalence
-  naomi_gen_pop_prev <- naomi_output %>%
-    dplyr::filter(sex == "female", age_group == "Y015_049", indicator == "prevalence") %>%
-    dplyr::select(area_id, gen_prev = mean)
+  # Naomi general population prevalence
+  genpop_prev <- naomi_est %>%
+    dplyr::filter(age_group == "Y015_049") %>%
+    dplyr::select(area_id, gen_prev)
 
   #' Extract country specific national FSW prevalence
   iso3 <- options$area_scope
@@ -591,109 +599,15 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 
   fsw_prev <- pse %>% dplyr::filter(indicator=="prev")
 
-  #' Format SRB survey estimates
-  srb_survey <- naomi.resources::load_agyw_exdata("srb_survey_female", iso3)
-
-  prev_wide <- srb_survey %>%
-    dplyr::filter(
-      area_id == options$area_scope,
-      (nosex12m != 0) & (sexcohab != 0) & (sexnonreg != 0) & (sexpaid12m != 0),
-      !age_group %in% c("Y015_024","Y015_049","Y025_049"),
-      indicator == "prevalence") %>%
-    dplyr::mutate(
-      behav = dplyr::case_when(
-        nosex12m == 1 ~ "nosex12m", sexcohab == 1 ~ "sexcohab",
-        sexnonreg == 1 ~ "sexnonreg", sexpaid12m == 1 ~ "sexpaid12m",
-        TRUE ~ "all"), .after = indicator) %>%
-    dplyr::select(indicator, behav, survey_id, area_id, age_group, estimate) %>%
-    tidyr::pivot_wider(
-      names_from = "behav",
-      values_from = "estimate")
-
-  ind <- prev_wide %>%
-    dplyr::mutate(
-      #' Calculate the odds
-      across(nosex12m:all, ~ .x / (1 - .x), .names = "{.col}_odds"),
-      #' Log odds
-      across(nosex12m:all, ~ log(.x / (1 - .x)), .names = "{.col}_logodds"),
-      #' Prevalence ratios
-      across(nosex12m:all, ~ .x / all, .names = "{.col}_pr"),
-      #' Odds ratios
-      across(nosex12m:all, ~ (.x / (1 - .x)) / all_odds, .names = "{.col}_or")
-    ) %>%
-    dplyr::rename_with(.cols = nosex12m:all, ~ paste0(.x, "_prevalence")) %>%
-    dplyr::select(-indicator) %>%
-    tidyr::pivot_longer(
-      cols = starts_with(c("nosex12m", "sexcohab", "sexnonreg", "sexpaid12m", "all")),
-      names_to = "indicator",
-      values_to = "estimate"
-    ) %>%
-    tidyr::separate(indicator, into = c("behav", "indicator"))
-
-
-
   kp_prev <- fsw_prev %>%
     dplyr::select(iso3,area_id,median) %>%
-    dplyr::left_join(naomi_gen_pop_prev, by = dplyr::join_by(area_id)) %>%
+    dplyr::left_join(genpop_prev, by = dplyr::join_by(area_id)) %>%
     dplyr::mutate(prev_fsw_logodds = log(median / (1-median)),
                   prev_logodds = log(gen_prev / (1-gen_prev)))
 
   #' KP regression: FSW prevalence relative to general prevalence
   kp_fit <- lm(prev_fsw_logodds ~ prev_logodds, data = kp_prev)
 
-
-
-
-  ind_dat <- ind %>%
-    dplyr::mutate(
-      nosex12m_id = ifelse(behav == "nosex12m", 1, 0),
-      sexcohab_id = ifelse(behav == "sexcohab", 1, 0),
-      sexnonreg_id = ifelse(behav == "sexnonreg", 1, 0),
-      sexpaid12m_id = ifelse(behav == "sexpaid12m", 1, 0),
-      all_id = ifelse(behav == "all", 1, 0),
-      year = as.numeric(substr(survey_id,4,7))) %>%
-    dplyr::filter(indicator == "prevalence", !is.na(estimate))
-
-
-
-  #' Younger age groups regression
-  fit_y <- glm(estimate ~ -1 + all_id + nosex12m_id + sexcohab_id + sexnonreg_id + sexpaid12m_id,
-               family = quasibinomial(link = "logit"),
-               data = ind_dat %>% dplyr::filter(age_group %in% c("Y015_019","Y020_024","Y025_029")))
-
-  odds_estimate <- exp(fit_y$coefficients)
-  or_y <- odds_estimate / odds_estimate[1]
-  lor_y <- log(odds_estimate / odds_estimate[1])
-  lor_y <- lor_y[-1]
-
-  odds_estimate <- exp(fit_y$coefficients)
-  or_y <- odds_estimate / odds_estimate[1]
-  lor_y <- log(odds_estimate / odds_estimate[1])
-  lor_y <- lor_y[-1]
-
-  #' Older age groups regression
-  fit_o <- glm(estimate ~ -1 + all_id + nosex12m_id + sexcohab_id + sexnonreg_id + sexpaid12m_id,
-               family = quasibinomial(link = "logit"),
-               data = ind_dat %>% dplyr::filter(age_group %in% c("Y030_034","Y035_039","Y040_044","Y045_49")))
-
-  odds_estimate <- exp(fit_o$coefficients)
-  or_o <- odds_estimate / odds_estimate[1]
-  lor_o <- log(odds_estimate / odds_estimate[1])
-  lor_o <- lor_o[-1]
-
-  #' Naomi estimates of PLHIV and population by district and age band
-  age_groups <- c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
-                  "Y035_039", "Y040_044", "Y045_049")
-
-  naomi_est <- outputs$indicators %>%
-    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
-                  sex == "female", area_level == options$area_level,
-                  indicator %in% c("population", "plhiv", "infections", "prevalence"),
-                  age_group %in% age_groups) %>%
-    dplyr::select(area_id, area_level, age_group, indicator, mean) %>%
-    tidyr::pivot_wider( names_from = indicator, values_from = mean) %>%
-    dplyr::rename(gen_prev = prevalence)
-
   #' Modelled estimates of proportion in each risk group
   risk_group_prop <- female_srb %>%
     dplyr::filter(year == survey_year_sample) %>%
@@ -709,9 +623,28 @@ agyw_calculate_prevalence_female <- function(naomi_output,
       population_sexpaid12m = population * prop_sexpaid12m
     )
 
+
+
+
   #' Calculate prevalence in each category
   calculate_prevalence <- function(x){
-    if(x$age_group[1] %in% c("Y015_019","Y020_024","Y025_029")) {lor <- lor_y} else {lor <- lor_o}
+
+    #' Log odds ratio from SRB group survey prevalence
+    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", "BWA") %>%
+      dplyr::filter(sex == "female")
+
+    lor_15to29 <- lor$lor_15to29
+    names(lor_15to29) <- lor$srb_group
+
+    lor_30to49 <- lor$lor_30to49
+    names(lor_30to49) <- lor$srb_group
+
+    if(x$age_group[1] %in% c("Y015_019","Y020_024","Y025_029")) {
+      lor <- lor_15to29
+    } else {
+      lor <- lor_30to49
+    }
+
     population_fine <- dplyr::filter(x, indicator == "population")$estimate
     plhiv <- x$plhiv[1]
     ywkp_lor <- c("ywkp_lor" = x$ywkp_lor[1])
@@ -740,7 +673,9 @@ agyw_calculate_prevalence_female <- function(naomi_output,
     lapply(calculate_prevalence) %>%
     dplyr::bind_rows() %>%
     tidyr::unite("indicator", indicator, behav, sep = "_") %>%
-    tidyr::pivot_wider( names_from = indicator, values_from = estimate)
+    tidyr::pivot_wider( names_from = indicator, values_from = estimate) %>%
+    dplyr::mutate_if(is.numeric, as.numeric) %>%
+    dplyr::mutate_if(is.factor, as.character)
 
   logit_prev
 
@@ -770,13 +705,21 @@ agyw_calculate_prevalence_male <- function(naomi_output,
                                            male_srb,
                                            survey_year_sample = 2018) {
 
+  #' Naomi estimates of PLHIV and population by district and age band
+  naomi_est <- naomi_output %>%
+    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
+                  sex == "male",
+                  indicator %in% c("population", "plhiv", "infections", "prevalence")) %>%
+    dplyr::select(area_id, area_level, age_group, indicator, mean) %>%
+    tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
+    dplyr::rename(gen_prev = prevalence)
 
   # Naomi general population prevalence
-  naomi_gen_pop_prev <- naomi_output %>%
-    dplyr::filter(sex == "male", age_group == "Y015_024", indicator == "prevalence") %>%
-    dplyr::mutate(logit_gen_prev = log(mean / (1-mean))) %>%
-    dplyr::select(area_id, prevalence = mean, logit_gen_prev, , area_level)
-
+  genpop_prev <- naomi_est %>%
+    dplyr::filter(age_group == "Y015_024") %>%
+    dplyr::select(area_id, area_level, gen_prev) %>%
+    dplyr::mutate(logit_gen_prev = log(gen_prev / (1 - gen_prev))) %>%
+    dplyr::select(area_id, gen_prev, logit_gen_prev, area_level)
 
   #' Extract country specific national MSM + PWID prevalence
   iso3 <- options$area_scope
@@ -795,7 +738,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
     dplyr::select(-indicator,-lower, - upper) %>%
     dplyr::mutate(median = log(median / (1-median))) %>%
     # Add in Naomi general pop prevalence
-    dplyr::left_join(naomi_gen_pop_prev, by = dplyr::join_by(area_id)) %>%
+    dplyr::left_join(genpop_prev, by = dplyr::join_by(area_id)) %>%
     dplyr::select(kp, iso3, area_id, logit_gen_prev, median, area_level) %>%
     # Calculate Log-Odds ratio
     tidyr::pivot_wider(names_from = kp,
@@ -804,95 +747,6 @@ agyw_calculate_prevalence_male <- function(naomi_output,
                   pwid_lor = median_PWID - logit_gen_prev_PWID) %>%
     dplyr::select(-c("logit_gen_prev_PWID","logit_gen_prev_MSM","median_PWID","median_MSM","area_level"))
 
-
-  # ----------------------------------------------------------------------------
-  #' Format SRB survey estimates
-
-
-  srb_survey <- naomi.resources::load_agyw_exdata("srb_survey_male", iso3)
-
-  prev_wide <- srb_survey %>%
-    dplyr::filter(
-      area_id == options$area_scope,
-      (nosex12m != 0) & (sexcohab != 0) & (sexnonreg != 0) & (sexpaid12m != 0),
-      !age_group %in% c("Y015_024","Y015_049","Y025_049"),
-      indicator == "prevalence") %>%
-    dplyr::mutate(
-      behav = dplyr::case_when(
-        nosex12m == 1 ~ "nosex12m", sexcohab == 1 ~ "sexcohab",
-        sexnonreg == 1 ~ "sexnonreg", sexpaid12m == 1 ~ "sexpaid12m",
-        TRUE ~ "all"), .after = indicator) %>%
-    dplyr::select(indicator, behav, survey_id, area_id, age_group, estimate) %>%
-    tidyr::pivot_wider(
-      names_from = "behav",
-      values_from = "estimate")
-
-  ind <- prev_wide %>%
-    dplyr::mutate(
-      #' Calculate the odds
-      across(nosex12m:all, ~ .x / (1 - .x), .names = "{.col}_odds"),
-      #' Log odds
-      across(nosex12m:all, ~ log(.x / (1 - .x)), .names = "{.col}_logodds"),
-      #' Prevalence ratios
-      across(nosex12m:all, ~ .x / all, .names = "{.col}_pr"),
-      #' Odds ratios
-      across(nosex12m:all, ~ (.x / (1 - .x)) / all_odds, .names = "{.col}_or")
-    ) %>%
-    dplyr::rename_with(.cols = nosex12m:all, ~ paste0(.x, "_prevalence")) %>%
-    dplyr::select(-indicator) %>%
-    tidyr::pivot_longer(
-      cols = starts_with(c("nosex12m", "sexcohab", "sexnonreg", "sexpaid12m", "all")),
-      names_to = "indicator",
-      values_to = "estimate"
-    ) %>%
-    tidyr::separate(indicator, into = c("behav", "indicator"))
-
-  ind_dat <- ind %>%
-    dplyr::mutate(
-      nosex12m_id = ifelse(behav == "nosex12m", 1, 0),
-      sexcohab_id = ifelse(behav == "sexcohab", 1, 0),
-      sexnonreg_id = ifelse(behav == "sexnonreg", 1, 0),
-      sexpaid12m_id = ifelse(behav == "sexpaid12m", 1, 0),
-      all_id = ifelse(behav == "all", 1, 0)) %>%
-    dplyr::filter( indicator == "prevalence", !is.na(estimate))
-
-
-  # Young age group regression
-  fit_y <- glm(estimate ~ -1 + all_id + nosex12m_id + sexcohab_id + sexnonreg_id + sexpaid12m_id,
-               family = quasibinomial(link = "logit"),
-               data = ind_dat %>% dplyr::filter(age_group %in% c("Y015_019","Y020_024","Y025_029")))
-
-  odds_estimate <- exp(fit_y$coefficients)
-  or_y <- odds_estimate / odds_estimate[1]
-  lor_y <- log(odds_estimate / odds_estimate[1])
-  lor_y <- lor_y[-1]
-
-  # Older age-group regression
-  fit_o <- glm(estimate ~ -1 + all_id + nosex12m_id + sexcohab_id + sexnonreg_id + sexpaid12m_id,
-               family = quasibinomial(link = "logit"),
-               data = ind_dat %>% dplyr::filter(age_group %in% c("Y030_034","Y035_039","Y040_044","Y045_49")))
-
-  odds_estimate <- exp(fit_o$coefficients)
-  or_o <- odds_estimate / odds_estimate[1]
-  lor_o <- log(odds_estimate / odds_estimate[1])
-  lor_o <- lor_o[-1]
-
-  # ----------------------------------------------------------------------------
-
-
-  #' Naomi estimates of PLHIV and population by district and age band
-  age_groups <- c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
-                  "Y035_039", "Y040_044", "Y045_049")
-
-  naomi_est <- outputs$indicators %>%
-    dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
-                  sex == "male", area_level == options$area_level,
-                  indicator %in% c("population", "plhiv", "infections", "prevalence"),
-                  age_group %in% age_groups) %>%
-    dplyr::select(area_id, area_level, age_group, indicator, mean) %>%
-    tidyr::pivot_wider( names_from = indicator, values_from = mean) %>%
-    dplyr::rename(gen_prev = prevalence)
-
   # Match KP estimates (admin0 or admin1) with SAE estimates
   msm_analysis_level <- paste0("area_id",unique(msm_est$area_level))
   areas_wide <-  naomi::spread_areas(outputs$meta_area) %>%
@@ -921,7 +775,21 @@ agyw_calculate_prevalence_male <- function(naomi_output,
   #' Calculate prevalence in each category
   calculate_prevalence <- function(x){
 
-    if(x$age_group[1] %in% c("Y015_019","Y020_024","Y025_029")) {lor <- lor_y} else {lor <- lor_o}
+    #' Log odds ratio from SRB group survey prevalence
+    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", "BWA") %>%
+      dplyr::filter(sex == "male")
+
+    lor_15to29 <- lor$lor_15to29
+    names(lor_15to29) <- lor$srb_group
+
+    lor_30to49 <- lor$lor_30to49
+    names(lor_30to49) <- lor$srb_group
+
+    if(x$age_group[1] %in% c("Y015_019","Y020_024","Y025_029")) {
+      lor <- lor_15to29
+    } else {
+      lor <- lor_30to49
+    }
 
     population_fine <- dplyr::filter(x, indicator == "population")$estimate
     plhiv <- x$plhiv[1]
@@ -947,7 +815,9 @@ agyw_calculate_prevalence_male <- function(naomi_output,
     lapply(calculate_prevalence) %>%
     dplyr::bind_rows() %>%
     tidyr::unite("indicator", indicator, behav, sep = "_") %>%
-    tidyr::pivot_wider( names_from = indicator, values_from = estimate)
+    tidyr::pivot_wider( names_from = indicator, values_from = estimate) %>%
+    dplyr::mutate_if(is.numeric, as.numeric) %>%
+    dplyr::mutate_if(is.factor, as.character)
 
   logit_prev
 

From 24be9e35cdce3e1fa9e8959ee472e33d04bb18cc Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Fri, 17 Nov 2023 17:03:34 +0000
Subject: [PATCH 07/53] update workflow to pull in updated KP estimates

---
 R/agyw-integration.R | 46 +++++++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 03e2d41a..70de08ca 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -156,10 +156,11 @@ agyw_disaggregate_fsw <- function(outputs,
 
   #' Extract country specific national FSW PSEs
   iso3 <- options$area_scope
-  pse <- naomi.resources::load_agyw_exdata("fsw_pse", iso3)
+
+  pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+    dplyr::filter(kp == "FSW", indicator == "pse_prop")
 
   fsw_pse <- pse %>%
-    dplyr::filter(iso3 == options$area_scope, indicator == "pse_prop") %>%
     dplyr::rename(prop_fsw = median) %>%
     dplyr::select(-indicator,-lower,-upper)
 
@@ -193,7 +194,7 @@ agyw_disaggregate_fsw <- function(outputs,
 
   #' Calculate proportion of sexually active population using Kinh's country specific
   #' estimates of age at first sex and naomi population
-  afs <- naomi.resources::load_agyw_exdata("afs", "BWA")
+  afs <- naomi.resources::load_agyw_exdata("afs", iso3)
 
   #' Select birth cohort from 2000, to turn 15 in 2015
   cohort <- 2000
@@ -282,10 +283,11 @@ agyw_disaggregate_pwid <- function(outputs,
 
   #' Extract country specific national PWID PSEs
   iso3 <- options$area_scope
-  pse <- naomi.resources::load_agyw_exdata("pwid_pse", iso3)
+
+  pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+    dplyr::filter(kp == "PWID", indicator == "pse_prop")
 
   pwid_pse <- pse %>%
-    dplyr::filter(iso3 == options$area_scope, indicator == "pse_prop") %>%
     dplyr::rename(prop_pwid = median) %>%
     dplyr::select(-indicator,-lower,-upper)
 
@@ -299,8 +301,9 @@ agyw_disaggregate_pwid <- function(outputs,
     dplyr::mutate(total_pwid = population * prop_pwid) %>%
     dplyr::select(iso3, area_id, total_pwid, age_group, area_level)
 
-  #' Assumption form literature that 9% of PWID are female and remove them from
-  #' the male calculation
+  #' Assumption from literature that 9% of PWID are female so remove them from
+  #' the male denominator
+
   pwid$total_pwid <- pwid$total_pwid * 0.91
 
   #' PWID age distribution parameters in ZAF from Thembisa
@@ -352,10 +355,10 @@ agyw_disaggregate_msm <- function(outputs,
 
   #' Extract country specific national MSM PSEs
   iso3 <- options$area_scope
-  pse <- naomi.resources::load_agyw_exdata("msm_pse", iso3)
+  pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+    dplyr::filter(kp == "PWID", indicator == "pse_prop")
 
   msm_pse <- pse %>%
-    dplyr::filter(iso3 == options$area_scope, indicator == "pse_prop") %>%
     dplyr::rename(prop_msm = median) %>%
     dplyr::select(-indicator,-lower,-upper)
 
@@ -418,8 +421,8 @@ agyw_disaggregate_msm <- function(outputs,
 #'
 #' * `nosex12m`:
 #' * `sexcohab`:
-#' * `sexonregplus`:
-#' * `sexonreg`:
+#' * `sexnonregplus`:
+#' * `sexnonreg`:
 #' * `sexpaid12m`:
 #' * `nosex12m`:
 #'
@@ -447,7 +450,6 @@ agyw_adjust_sexbehav_fsw <- function(outputs,
   fsw_df <- fsw_est %>% dplyr::select(age_group, fsw_match_area = area_id, fsw_prop)
 
   #' Load female SRB proportions
-
   female_srb <- naomi.resources::load_agyw_exdata("srb_female", options$area_scope)
 
   adj_female_srb <- female_srb %>%
@@ -520,7 +522,6 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
   pwid_df <- pwid_est %>% dplyr::select(age_group, kp_match_area = area_id, pwid_prop)
 
   #' Load male SRB proportions
-
   male_srb <- naomi.resources::load_agyw_exdata("srb_male", options$area_scope)
 
   adj_male_srb <- male_srb %>%
@@ -595,9 +596,8 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 
   #' Extract country specific national FSW prevalence
   iso3 <- options$area_scope
-  pse <- naomi.resources::load_agyw_exdata("fsw_pse", iso3)
-
-  fsw_prev <- pse %>% dplyr::filter(indicator=="prev")
+   fsw_prev <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+    dplyr::filter(kp == "FSW", indicator == "prevalence")
 
   kp_prev <- fsw_prev %>%
     dplyr::select(iso3,area_id,median) %>%
@@ -724,17 +724,11 @@ agyw_calculate_prevalence_male <- function(naomi_output,
   #' Extract country specific national MSM + PWID prevalence
   iso3 <- options$area_scope
 
-  msm <- naomi.resources::load_agyw_exdata("msm_pse", iso3)
-  pwid <- naomi.resources::load_agyw_exdata("pwid_pse", iso3)
-
-  msm_prev <- msm %>% dplyr::filter(indicator=="prev") %>%
-    dplyr::mutate(kp = "MSM")
-  pwid_prev <- pwid %>% dplyr::filter(indicator=="prev") %>%
-    dplyr::mutate(kp = "PWID")
-
+  msm_pwid_prev <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+    dplyr::filter(indicator == "prevalence", kp %in% c("MSM", "PWID"))
 
-  # KP population prevalence
-  kp_prev <- dplyr::bind_rows(msm_prev, pwid_prev) %>%
+# KP population prevalence
+  kp_prev <- msm_pwid_prev %>%
     dplyr::select(-indicator,-lower, - upper) %>%
     dplyr::mutate(median = log(median / (1-median))) %>%
     # Add in Naomi general pop prevalence

From d9ae04e1763ec7f7656ed6be3b700f525ab9b21e Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Thu, 7 Dec 2023 12:25:14 +0000
Subject: [PATCH 08/53] Use new format of workbook

---
 R/downloads.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/downloads.R b/R/downloads.R
index c8ba6509..d2c00faf 100644
--- a/R/downloads.R
+++ b/R/downloads.R
@@ -113,7 +113,7 @@ hintr_prepare_agyw_download <- function(output, pjnz,
   progress$update_progress("PROGRESS_DOWNLOAD_AGYW")
 
   ## TODO: Remove hardcoding of ISO3
-  template_path <- naomi.resources::get_agyw_workbook_path("BWA")
+  template_path <- naomi.resources::get_agyw_workbook_path()
 
   risk_populations <- agyw_generate_risk_populations(output$model_output_path)
 

From 4f79a952f494fc243a3ec2f9f982f449b959d97c Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Fri, 8 Dec 2023 19:21:03 +0300
Subject: [PATCH 09/53] add labels to aggregated data and patch for
 naomi.resource update

---
 R/agyw-integration.R | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 70de08ca..12c08470 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -15,6 +15,10 @@ agyw_format_naomi <- function(outputs, options){
                                    "prevalence"),
                   calendar_quarter == options$calendar_quarter_t2)
 
+  naomi_ind_labelled <- naomi_ind %>%
+    dplyr::left_join(outputs$meta_area %>% dplyr::select(area_id, area_name),
+                     by = dplyr::join_by(area_id))
+
 
   summarise_naomi_ind <- function(dat, age_cat) {
 
@@ -23,7 +27,7 @@ agyw_format_naomi <- function(outputs, options){
                                               "Y040_044", "Y045_049")}
 
     dat %>%
-      dplyr::select(area_id, area_name, area_level,calendar_quarter,
+      dplyr::select(area_id, area_name, area_level, calendar_quarter,
                     age_group, sex, indicator, mean) %>%
       tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
       dplyr::group_by(area_id, area_name, area_level, calendar_quarter, sex) %>%
@@ -42,13 +46,14 @@ agyw_format_naomi <- function(outputs, options){
   }
 
   # Naomi indicators for aggregate age groups
-  df1 <- dplyr::bind_rows(summarise_naomi_ind(naomi_ind, "Y015_024"),
-                          summarise_naomi_ind(naomi_ind, "Y025_049"))
+  df1 <- dplyr::bind_rows(summarise_naomi_ind(naomi_ind_labelled, "Y015_024"),
+                          summarise_naomi_ind(naomi_ind_labelled, "Y025_049"))
 
   # Naomi indicators for 5-year age groups + 15-49
-  df2 <- naomi_ind %>%
+  df2 <- naomi_ind_labelled %>%
     dplyr::filter(age_group %in% c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
                                    "Y035_039", "Y040_044", "Y045_049", "Y015_049")) %>%
+    dplyr::left_join(outputs$meta_age_group, by = dplyr::join_by(age_group)) %>%
     dplyr::select(names(df1)) %>%
     # Add aggregate indicators
     dplyr::bind_rows(df1) %>%
@@ -241,7 +246,7 @@ agyw_disaggregate_fsw <- function(outputs,
   #' Adjusting country specific sexual debut estimates with age distribution of
   #' FSW from Thembisa
   #'Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
-  zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity")
+  zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF")
 
   fsw_est <- df %>%
     #  Add FSW propensity estimates from ZAF

From b3d5ef7e65e6c55c14c2658f79509931ceca9802 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Tue, 12 Dec 2023 11:20:28 +0300
Subject: [PATCH 10/53] Update test-downloads.R

---
 tests/testthat/test-downloads.R | 35 ++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index 7c319b92..6319bbe9 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -162,8 +162,40 @@ test_that("comparison report download can be created", {
                "Generating comparison report")
 })
 
+
 test_that("AGYW download can be created", {
-  out <- hintr_prepare_agyw_download(a_hintr_output_calibrated,
+
+  #' Create naomi outputs with "MWI_demo" iso3 to align with testing data in
+  #' naomi.resources
+  output <- qs::qread(a_hintr_output_calibrated$model_output_path)
+
+  # Create demo datasets
+  # Indicators
+  ind_demo <- output$output_package$indicators %>%
+    dplyr::mutate(area_id = dplyr::if_else(area_id == "MWI", "MWI_demo", area_id))
+  # Options
+  options_demo <- output$output_package$fit$model_options
+  options_demo$area_scope <- "MWI_demo"
+
+  # Areas
+  meta_area_demo <- output$output_package$meta_area %>%
+    dplyr::mutate(area_id = dplyr::if_else(area_id == "MWI", "MWI_demo", area_id))
+
+  # Save out demo output package
+  demo <- output
+  demo$output_package$indicators <- ind_demo
+  demo$output_package$fit$model_options <- options_demo
+  demo$output_package$meta_area <- meta_area_demo
+
+  out_demo <- tempfile(fileext = ".qs")
+  qs::qsave(demo, preset = "fast", out_demo)
+
+  # Add to existing hintr_test data
+  agyw_output_demo <- a_hintr_output_calibrated
+  agyw_output_demo$model_output_path <- out_demo
+
+  # Generate AGYW outputs
+  out <- hintr_prepare_agyw_download(agyw_output_demo,
                                      a_hintr_data$pjnz)
 
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
@@ -189,6 +221,7 @@ test_that("AGYW download can be created", {
   expect_equal(messages$progress[[1]]$message, "Generating AGYW tool")
 })
 
+
 test_that("output description is translated", {
   text <- build_output_description(a_hintr_options)
   expect_match(

From 34d1de292ff29a19c360c1cedc2bc507a8cc4670 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Tue, 12 Dec 2023 16:31:08 +0300
Subject: [PATCH 11/53] update test data and fix bugs

---
 R/agyw-integration.R            | 22 +++++++++++++---------
 tests/testthat/test-downloads.R | 10 ++++------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 1f0ad9dc..0031f7ed 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -16,7 +16,8 @@ agyw_format_naomi <- function(outputs, options){
                   calendar_quarter == options$calendar_quarter_t2)
 
   naomi_ind_labelled <- naomi_ind %>%
-    dplyr::left_join(outputs$meta_area %>% dplyr::select(area_id, area_name),
+    dplyr::left_join(outputs$meta_area %>% dplyr::select(area_id, area_name,
+                                                         area_level),
                      by = dplyr::join_by(area_id))
 
 
@@ -81,7 +82,7 @@ agyw_format_naomi <- function(outputs, options){
                   indicator = "Incicategory")
 
   # Incidence for all age groups + sexes
-  df5 <- naomi_ind %>%
+  df5 <- naomi_ind_labelled %>%
     dplyr::filter(indicator == "incidence", age_group == "Y000_999", sex == "both",
                   area_level == options$area_level)
 
@@ -635,7 +636,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
   calculate_prevalence <- function(x){
 
     #' Log odds ratio from SRB group survey prevalence
-    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", "BWA") %>%
+    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", iso3) %>%
       dplyr::filter(sex == "female")
 
     lor_15to29 <- lor$lor_15to29
@@ -684,6 +685,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 
   logit_prev
 
+
 }
 
 #' Calculate prevalence for male SRB groups.
@@ -691,6 +693,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
 #' @param male_srb
+#' @param areas
 #' @param msm_est .
 #' @param survey_year Year of survey to sample estimates.
 #'
@@ -705,6 +708,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 #'
 
 agyw_calculate_prevalence_male <- function(naomi_output,
+                                           areas,
                                            options,
                                            msm_est,
                                            male_srb,
@@ -748,7 +752,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
 
   # Match KP estimates (admin0 or admin1) with SAE estimates
   msm_analysis_level <- paste0("area_id",unique(msm_est$area_level))
-  areas_wide <-  naomi::spread_areas(outputs$meta_area) %>%
+  areas_wide <-  naomi::spread_areas(areas) %>%
     sf::st_drop_geometry()
   map <- dplyr::select(areas_wide, area_id, dplyr::all_of(msm_analysis_level)) %>%
     dplyr::rename(kp_match_area = 2)
@@ -775,7 +779,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
   calculate_prevalence <- function(x){
 
     #' Log odds ratio from SRB group survey prevalence
-    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", "BWA") %>%
+    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", iso3) %>%
       dplyr::filter(sex == "male")
 
     lor_15to29 <- lor$lor_15to29
@@ -1300,14 +1304,13 @@ agyw_generate_risk_populations <- function(naomi_output) {
     # Read files if hintr rds provided
     model_object <- read_hintr_output(naomi_output)
     outputs <- model_object$output_package
-    outputs$indicators$area_level <- area_level_from_id(outputs$indicators$area_id)
-    options <- yaml::read_yaml(text = model_object$info$options.yml)
+    options <- outputs$fit$model_options
+
   } else if (grepl("\\.zip$", naomi_output)) {
     # Read files if output zip is provided
     output_zip <- naomi_output
     outputs <- naomi::read_output_package(output_zip)
-    options <- unz(output_zip, "info/options.yml")
-    options <- yaml::read_yaml(options)
+    options <- outputs$fit$model_options
   }
 
   #' Format naomi output
@@ -1336,6 +1339,7 @@ agyw_generate_risk_populations <- function(naomi_output) {
                                                               female_srb)
 
   male_logit_prevalence <- agyw_calculate_prevalence_male(naomi$naomi_long,
+                                                          outputs$meta_area,
                                                           options,
                                                           msm_est,
                                                           male_srb)
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index 6319bbe9..dc275d6b 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -179,7 +179,8 @@ test_that("AGYW download can be created", {
 
   # Areas
   meta_area_demo <- output$output_package$meta_area %>%
-    dplyr::mutate(area_id = dplyr::if_else(area_id == "MWI", "MWI_demo", area_id))
+    dplyr::mutate(area_id = dplyr::if_else(area_id == "MWI", "MWI_demo", area_id),
+                  parent_area_id = dplyr::if_else(parent_area_id == "MWI", "MWI_demo", parent_area_id))
 
   # Save out demo output package
   demo <- output
@@ -194,14 +195,11 @@ test_that("AGYW download can be created", {
   agyw_output_demo <- a_hintr_output_calibrated
   agyw_output_demo$model_output_path <- out_demo
 
-  # Generate AGYW outputs
-  out <- hintr_prepare_agyw_download(agyw_output_demo,
-                                     a_hintr_data$pjnz)
-
+  # Test agyw download
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
   with_mocked_bindings(
     messages <- naomi_evaluate_promise(
-      out <- hintr_prepare_agyw_download(a_hintr_output_calibrated,
+      out <- hintr_prepare_agyw_download(agyw_output_demo,
                                          a_hintr_data$pjnz)),
     new_simple_progress = mock_new_simple_progress)
 

From 0d48f7558405f59dcd8384df8ece1baea0a076d4 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Wed, 13 Dec 2023 09:01:26 +0300
Subject: [PATCH 12/53] Update to match renaming of columns in ASF data in
 naomi.resources

---
 R/agyw-integration.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 0031f7ed..091a5fd9 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -206,8 +206,7 @@ agyw_disaggregate_fsw <- function(outputs,
   cohort <- 2000
 
   afs <- afs %>%
-    dplyr::filter(yob == cohort, sex == "female", ISO_A3 == options$area_scope) %>%
-    dplyr::mutate(iso3 = ISO_A3, ISO_A3 = NULL) %>%
+    dplyr::filter(yob == cohort, sex == "female", iso3 == options$area_scope) %>%
     dplyr::full_join(dplyr::select(fsw,iso3,area_id), multiple = "all", by = dplyr::join_by(iso3))
 
   df <- data.frame()

From d18a72c4e4ad409fac22dc20177b59075ff7f3f7 Mon Sep 17 00:00:00 2001
From: Katie Risher <krisher@ic.ac.uk>
Date: Wed, 13 Dec 2023 02:28:19 -0500
Subject: [PATCH 13/53] Update agyw integration code

-Adds comments & sources
-Corrects minor issues w/translation of code
-Updates KP calculations based on updated data
-Corrects age distribution of MSM to reflect age at sexual debut by country
---
 R/agyw-integration.R | 263 ++++++++++++++++++++++++++++---------------
 1 file changed, 172 insertions(+), 91 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 12c08470..94fb858b 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -15,9 +15,12 @@ agyw_format_naomi <- function(outputs, options){
                                    "prevalence"),
                   calendar_quarter == options$calendar_quarter_t2)
 
-  naomi_ind_labelled <- naomi_ind %>%
-    dplyr::left_join(outputs$meta_area %>% dplyr::select(area_id, area_name),
-                     by = dplyr::join_by(area_id))
+  # In testing this out, area name is already in the naomi_ind file so adding in area_name here causes errors below -
+  # commenting this out for now
+  # naomi_ind_labelled <- naomi_ind %>%
+  #   dplyr::left_join(outputs$meta_area %>% dplyr::select(area_id, area_name),
+  #                    by = dplyr::join_by(area_id))
+  naomi_ind_labelled <- naomi_ind
 
 
   summarise_naomi_ind <- function(dat, age_cat) {
@@ -53,7 +56,9 @@ agyw_format_naomi <- function(outputs, options){
   df2 <- naomi_ind_labelled %>%
     dplyr::filter(age_group %in% c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
                                    "Y035_039", "Y040_044", "Y045_049", "Y015_049")) %>%
-    dplyr::left_join(outputs$meta_age_group, by = dplyr::join_by(age_group)) %>%
+    # age group label already in the naomi_ind_labelled data frame
+    dplyr::left_join(outputs$meta_age_group %>% dplyr::select(-age_group_label),
+                     by = dplyr::join_by(age_group)) %>%
     dplyr::select(names(df1)) %>%
     # Add aggregate indicators
     dplyr::bind_rows(df1) %>%
@@ -246,7 +251,9 @@ agyw_disaggregate_fsw <- function(outputs,
   #' Adjusting country specific sexual debut estimates with age distribution of
   #' FSW from Thembisa
   #'Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
-  zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF")
+  zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF") %>%
+    dplyr::filter(kp=="FSW") %>%
+    dplyr::select(-kp)
 
   fsw_est <- df %>%
     #  Add FSW propensity estimates from ZAF
@@ -311,8 +318,8 @@ agyw_disaggregate_pwid <- function(outputs,
 
   pwid$total_pwid <- pwid$total_pwid * 0.91
 
-  #' PWID age distribution parameters in ZAF from Thembisa
-  #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
+  #' PWID age distribution
+  #' Review of literature - Hines et al Lancet Global Health 2020
   gamma_mean <- 29.4
   gamma_sd <- 7
   beta <- gamma_mean / gamma_sd^2 #' rate
@@ -380,9 +387,9 @@ agyw_disaggregate_msm <- function(outputs,
 
 
   #' MSM age distribution parameters in ZAF from Thembisa
-  #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
-  gamma_mean <- 28
-  gamma_sd <- 9
+  #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3report
+  gamma_mean <- 25
+  gamma_sd <- 7
   beta <- gamma_mean / gamma_sd^2 #' rate
   alpha <- gamma_mean * beta #' shape
 
@@ -393,22 +400,80 @@ agyw_disaggregate_msm <- function(outputs,
     dplyr::mutate(dist = dist / sum(dist))
 
 
-  # Naomi population
-  pop <- naomi_pop %>%
-    dplyr::filter(area_id %in% unique(msm$area_id),
-                  age_group %in% age_groups,
-                  sex == "male")
+  pskewlogis <- function(t, scale, shape, skew) {
+    (1 + (scale * t)^-shape)^-skew
+  }
 
-  msm_est <- dplyr::left_join(
-    pop, zaf_gamma,
-    by = dplyr::join_by(age_group)) %>%
+  #' Calculate proportion of sexually active population using Kinh's country specific
+  #' estimates of age at first sex and naomi population
+  afs <- naomi.resources::load_agyw_exdata("afs", iso3)
+
+  #' Select birth cohort from 2000, to turn 15 in 2015
+  cohort <- 2000
+
+  afs <- afs %>%
+    dplyr::filter(yob == cohort, sex == "male", ISO_A3 == options$area_scope) %>%
+    dplyr::mutate(iso3 = ISO_A3, ISO_A3 = NULL) %>%
+    dplyr::full_join(dplyr::select(msm,iso3,area_id), multiple = "all", by = dplyr::join_by(iso3))
+
+  df <- data.frame()
+
+  #' Calculate sexually active population by age and sex for each district
+  for(x in unique(afs$area_id)) {
+    afs_x <- dplyr::filter(afs, area_id == x)
+    ages <- 15:49
+
+    df_x <- data.frame(
+      area_id = x,
+      age = ages,
+      eversex = pskewlogis(
+        ages,
+        scale = afs_x$lambda,
+        skew = afs_x$skew,
+        shape = afs_x$shape
+      ),
+      age_group = rep(age_groups, each = 5)
+    )
+
+    df_x <- df_x %>%
+      dplyr::group_by(area_id, age_group) %>%
+      dplyr::summarise(eversex = mean(eversex), .groups = "drop") %>%
+      dplyr::left_join(
+        naomi_pop %>% dplyr::filter(sex == "male"),
+        by = c("area_id", "age_group")
+      ) %>%
+      dplyr::mutate(
+        eversexpop = eversex * population,
+        eversexpop_prop = eversexpop / sum(eversexpop)
+      )
+
+    df <- dplyr::bind_rows(df, df_x)
+  }
+
+  #' Adjusting country specific sexual debut estimates with age distribution of
+  #' MSM from Thembisa
+  zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF") %>%
+    dplyr::filter(kp=="MSM") %>%
+    dplyr::select(-kp)
+
+
+  msm_est <- df %>%
+    #  Add MSM propensity estimates from ZAF
+    dplyr::left_join(zaf_propensity, by = "age_group") %>%
+    # Calculate distribution of MSM
+    dplyr::mutate(dist = eversexpop_prop * propensity) %>%
+    dplyr::group_by(area_id) %>%
+    dplyr::mutate(dist = dist / sum(dist)) %>%
+    dplyr::ungroup() %>%
+    # Add MSM PSEs
     dplyr::full_join(
-      dplyr::select(msm, total_msm, iso3, area_id),
-      by = c("area_id", "iso3")
+      msm %>% dplyr::select(total_msm, iso3, area_id, area_level),
+      by = dplyr::join_by(area_id, iso3, area_level)
     ) %>%
+    # Calculate FSW proportions
     dplyr::mutate(msm = dist * total_msm,
                   msm_prop = msm / population) %>%
-    dplyr::select(-dist, -total_msm)
+    dplyr::select(-eversexpop, -eversexpop_prop, -propensity, - dist, -total_msm)
 
   msm_est
 }
@@ -572,12 +637,13 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
 #'
 #' @return SRB PSEs with logit prevalence estimates.
 #'
-#' TODO: add in more documentation here
-#' Calculation steps:
-#' 1.
-#' 2.
-#' 3.
-#' 4.
+#' To calculate district-age-sex-sexual behaviour-specific HIV prevalence, we maintain
+#' HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
+#' risk behaviours using 1) HIV prevalence ratios from household surveys for
+#' those reporting no sex vs one cohabiting vs non-regular sexual partner(s),
+#' and 2) a linear regression through admin-1 level estimates of the ratio of KP
+#' prevalence to gen-pop prevalence used to predict an age-district-specific FSW
+#' to general population prevalence ratio.
 #'
 agyw_calculate_prevalence_female <- function(naomi_output,
                                              options,
@@ -601,7 +667,8 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 
   #' Extract country specific national FSW prevalence
   iso3 <- options$area_scope
-   fsw_prev <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+  #' THIS IS NOW USING SINGLE COUNTRY INSTEAD OF ALL COUNTRIES
+  fsw_prev <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
     dplyr::filter(kp == "FSW", indicator == "prevalence")
 
   kp_prev <- fsw_prev %>%
@@ -611,6 +678,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
                   prev_logodds = log(gen_prev / (1-gen_prev)))
 
   #' KP regression: FSW prevalence relative to general prevalence
+  #' ########## THIS REGRESSION SHOULD BE TAKING DATA FROM ALL ADMIN-1 LEVEL
   kp_fit <- lm(prev_fsw_logodds ~ prev_logodds, data = kp_prev)
 
   #' Modelled estimates of proportion in each risk group
@@ -632,10 +700,10 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 
 
   #' Calculate prevalence in each category
-  calculate_prevalence <- function(x){
+  calculate_prevalence <- function(x, iso3){
 
     #' Log odds ratio from SRB group survey prevalence
-    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", "BWA") %>%
+    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", iso3) %>%
       dplyr::filter(sex == "female")
 
     lor_15to29 <- lor$lor_15to29
@@ -675,7 +743,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
     ) %>%
     dplyr::filter(behav %in% c("nosex12m", "sexcohab", "sexnonreg", "sexpaid12m")) %>%
     split(~ area_id + age_group) %>%
-    lapply(calculate_prevalence) %>%
+    lapply(calculate_prevalence, iso3) %>%
     dplyr::bind_rows() %>%
     tidyr::unite("indicator", indicator, behav, sep = "_") %>%
     tidyr::pivot_wider( names_from = indicator, values_from = estimate) %>%
@@ -696,12 +764,14 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 #'
 #' @return SRB PSEs with logit prevalence estimates.
 #'
-#' TODO: add in more documentation here
-#' Calculation steps:
-#' 1.
-#' 2.
-#' 3.
-#' 4.
+#' To calculate district-age-sex-sexual behaviour-specific HIV prevalence, we maintain
+#' HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
+#' risk behaviours using 1) HIV prevalence ratios from household surveys for
+#' those reporting no sex vs one cohabiting vs non-regular sexual partner(s),
+#' and 2) admin-1 level estimates of the ratio of KP prevalence to gen-pop prevalence
+#' among 15-24 year olds for MSM (due to the young age distribution of MSM) or
+#' among 15-49 year olds for PWID (due to the older age distribution of PWID)
+#' applied to all age groups among MSM and PWID in districts by admin-1 unit.
 #'
 
 agyw_calculate_prevalence_male <- function(naomi_output,
@@ -721,10 +791,12 @@ agyw_calculate_prevalence_male <- function(naomi_output,
 
   # Naomi general population prevalence
   genpop_prev <- naomi_est %>%
-    dplyr::filter(age_group == "Y015_024") %>%
-    dplyr::select(area_id, area_level, gen_prev) %>%
-    dplyr::mutate(logit_gen_prev = log(gen_prev / (1 - gen_prev))) %>%
-    dplyr::select(area_id, gen_prev, logit_gen_prev, area_level)
+    dplyr::filter(age_group == "Y015_024" | age_group == "Y015_049") %>%
+    dplyr::select(area_id, area_level, age_group, gen_prev) %>%
+    tidyr::pivot_wider(names_from = age_group, values_from = gen_prev) %>%
+    dplyr::mutate(logit_gen_prev_msm = log(Y015_024 / (1 - Y015_024)),
+                  logit_gen_prev_pwid = log(Y015_049 / (1 - Y015_049))) %>%
+    dplyr::select(area_id, logit_gen_prev_msm, logit_gen_prev_pwid, area_level)
 
   #' Extract country specific national MSM + PWID prevalence
   iso3 <- options$area_scope
@@ -738,13 +810,14 @@ agyw_calculate_prevalence_male <- function(naomi_output,
     dplyr::mutate(median = log(median / (1-median))) %>%
     # Add in Naomi general pop prevalence
     dplyr::left_join(genpop_prev, by = dplyr::join_by(area_id)) %>%
-    dplyr::select(kp, iso3, area_id, logit_gen_prev, median, area_level) %>%
+    dplyr::select(kp, iso3, area_id, logit_gen_prev_msm, logit_gen_prev_pwid, median, area_level) %>%
     # Calculate Log-Odds ratio
     tidyr::pivot_wider(names_from = kp,
-                       values_from = c("logit_gen_prev","median")) %>%
-    dplyr::mutate(msm_lor = median_MSM - logit_gen_prev_MSM,
-                  pwid_lor = median_PWID - logit_gen_prev_PWID) %>%
-    dplyr::select(-c("logit_gen_prev_PWID","logit_gen_prev_MSM","median_PWID","median_MSM","area_level"))
+                       names_glue = "{.value}_{kp}",
+                       values_from = c("median")) %>%
+    dplyr::mutate(msm_lor = median_MSM - logit_gen_prev_msm,
+                  pwid_lor = median_PWID - logit_gen_prev_pwid) %>%
+    dplyr::select(-c("logit_gen_prev_pwid","logit_gen_prev_msm","median_PWID","median_MSM","area_level"))
 
   # Match KP estimates (admin0 or admin1) with SAE estimates
   msm_analysis_level <- paste0("area_id",unique(msm_est$area_level))
@@ -772,10 +845,10 @@ agyw_calculate_prevalence_male <- function(naomi_output,
     )
 
   #' Calculate prevalence in each category
-  calculate_prevalence <- function(x){
+  calculate_prevalence <- function(x, iso3){
 
     #' Log odds ratio from SRB group survey prevalence
-    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", "BWA") %>%
+    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", iso3) %>%
       dplyr::filter(sex == "male")
 
     lor_15to29 <- lor$lor_15to29
@@ -811,7 +884,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
     tidyr::separate(indicator, into = c("indicator", "behav")) %>%
     dplyr::filter(behav %in% c("nosex12m", "sexcohab", "sexnonreg", "msm", "pwid")) %>%
     split(~ area_id + age_group) %>%
-    lapply(calculate_prevalence) %>%
+    lapply(calculate_prevalence, iso3) %>%
     dplyr::bind_rows() %>%
     tidyr::unite("indicator", indicator, behav, sep = "_") %>%
     tidyr::pivot_wider( names_from = indicator, values_from = estimate) %>%
@@ -875,12 +948,10 @@ logit_scale_prev <- function(lor, N_fine, plhiv) {
 #'
 #' @return Wide format output required for the AGYW workbook.
 #'
-#'#' TODO: add in more documentation here
-#' Calculation steps:
-#' 1.
-#' 2.
-#' 3.
-#' 4.
+#' While maintaining age/sex/district-specific HIV incidence from Naomi, distribute
+#' HIV incidence between our 4 different behavioural groups utilizing IRRs from the
+#' literature
+#'
 
 agyw_calculate_incidence_female <- function(naomi_output,
                                             options,
@@ -912,29 +983,29 @@ agyw_calculate_incidence_female <- function(naomi_output,
 
   #' Risk ratios for people non-regular sex partners relative to those with a
   #' single cohabiting sex partner
-  #' TODO: Add source
+  #' ALPHA Network pooled analysis (Slaymaker et al CROI 2020), Jia et al systematic review, Ssempijja et al JAIDS 2022
   rr_sexcohab <- 1
   rr_sexnonreg_young <- 1.72
   rr_sexnonreg_old <- 2.1
 
   #' Tiered HIV risk ratio for the FSW group depending on district-level HIV
   #' incidence in general population
-  #' TODO: Add source
-  rr_sexpaid12m_vvh <- 3 #' >3%
-  rr_sexpaid12m_vh <- 6 #' 1-3%
-  rr_sexpaid12m_h <- 9 #' 0.3-1%
-  rr_sexpaid12m_m <- 13 #' 0.1-0.3%
-  rr_sexpaid12m_l <- 25 #' <0.1%
-
-  #' x = Incidence levels in the general population
-  #' y = Tiered HIV risk ratios
-  regression_dat <- data.frame(x = c(0.1,0.3,1,3,9), y = c(25,13,9,6,3))
-  rr_reg <- lm(log(y) ~ log(x), data = regression_dat)
-
-  rr_sexpaid12m <- exp(predict(rr_reg,data.frame(x = df$incidence)))
+  #' Jones et al medRxiv "HIV incidence among women engaging in sex work in sub-Saharan Africa: a systematic review and meta-analysis"
+  #' https://www.medrxiv.org/content/10.1101/2023.10.17.23297108v2
+  #' linear relationship between log(FSW incidence) and log(gen pop incidence)
+  #' regression points shared in confidence, y = mx + b slope is 0.604104017 and
+  #' intercept is 0.075090952
+
+  rr_reg_dat <- data.frame(genpop_incidence = df$incidence/100) %>%
+    mutate(log_gen = log(genpop_incidence),
+           log_sexpaid12m = 0.604104017 * log_gen + 0.075090952,
+           sexpaid12m_incidence = exp(log_sexpaid12m),
+           rr_sexpaid12m = sexpaid12m_incidence / genpop_incidence)
+
+  rr_sexpaid12m <- rr_reg_dat$rr_sexpaid12m
   # This gives implausibly high RRs for very low districts (e.g. IRR = 297!)
-  # capping at 25
-  rr_sexpaid12m[rr_sexpaid12m > 25] <- 25
+  # capping at 100
+  rr_sexpaid12m[rr_sexpaid12m > 100] <- 100
 
   #' TODO: Get distributions on these and using a sampling method to get
   #' uncertainty in economic analysis e.g.
@@ -965,14 +1036,14 @@ agyw_calculate_incidence_female <- function(naomi_output,
       susceptible_sexcohab = population_sexcohab - plhiv_sexcohab,
       susceptible_sexnonreg = population_sexnonreg - plhiv_sexnonreg,
       susceptible_sexpaid12m = population_sexpaid12m - plhiv_sexpaid12m,
+      incidence_sexpaid12m = (incidence/100) * rr_sexpaid12m,
+      infections_sexpaid12m = susceptible_sexpaid12m * incidence_sexpaid12m,
       incidence_nosex12m = 0,
-      incidence_sexcohab = infections / (susceptible_sexcohab + rr_sexnonreg * susceptible_sexnonreg + rr_sexpaid12m *susceptible_sexpaid12m),
+      incidence_sexcohab = (infections - infections_sexpaid12m) / (susceptible_sexcohab + rr_sexnonreg * susceptible_sexnonreg),
       incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
-      incidence_sexpaid12m = incidence_sexcohab * rr_sexpaid12m,
       infections_nosex12m = 0,
       infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
-      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
-      infections_sexpaid12m = susceptible_sexpaid12m * incidence_sexpaid12m)
+      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg)
 
   #' Calculate risk group incidence for aggregate age groups
 
@@ -1114,7 +1185,7 @@ agyw_calculate_incidence_male <- function(naomi_output,
     dplyr::filter(!is.na(population))
 
 
-  # NOTES/SOURCE??
+  # ALPHA Network pooled analysis (Slaymaker et al CROI 2020), Hoffman et al JAIDS 2022, Ssempijja et al JAIDS 2022
   rr_sexcohab <- 1
   rr_sexnonreg_young <- 1.89
   rr_sexnonreg_old <- 2.1
@@ -1132,8 +1203,9 @@ agyw_calculate_incidence_male <- function(naomi_output,
     dplyr::mutate(
       msm_pr = round(prev_msm/ prevalence, 2),
       pwid_pr = round(prev_pwid / prevalence, 2),
-      # correcting since the reference cat is reg cohabiting not gen pop
-      # need more sustainable fix for this
+      # Setting artificial cutoff of IRR of 2.5 due to Stannah et al
+      # Lancet HIV systematic review of MSM vs gen pop IRR
+      # https://www.thelancet.com/journals/lanhiv/article/PIIS2352-3018(23)00111-X/fulltext
       rr_msm = dplyr::if_else(msm_pr > 2.5, msm_pr, 2.5),
       rr_pwid = dplyr::if_else(pwid_pr > 2.5, pwid_pr, 2.5),
       rr_sexnonreg = dplyr::case_when(
@@ -1155,18 +1227,18 @@ agyw_calculate_incidence_male <- function(naomi_output,
       susceptible_sexnonreg = population_sexnonreg - plhiv_sexnonreg,
       susceptible_msm = population_msm - plhiv_msm,
       susceptible_pwid = population_pwid - plhiv_pwid,
+      incidence_msm = (incidence/100) * rr_msm,
+      incidence_pwid = (incidence/100) * rr_pwid,
+      infections_msm = susceptible_msm * incidence_msm,
+      infections_pwid = susceptible_pwid * incidence_pwid,
       incidence_nosex12m = 0,
-      incidence_sexcohab = infections / (susceptible_sexcohab +
-                                           rr_sexnonreg * susceptible_sexnonreg + rr_msm * susceptible_msm +
-                                           rr_pwid * susceptible_pwid),
+      incidence_sexcohab = (infections - infections_msm - infections_pwid) / (susceptible_sexcohab +
+                                           rr_sexnonreg * susceptible_sexnonreg),
       incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
-      incidence_msm = incidence_sexcohab * rr_msm,
-      incidence_pwid = incidence_sexcohab * rr_pwid,
       infections_nosex12m = 0,
       infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
-      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
-      infections_msm = susceptible_msm * incidence_msm,
-      infections_pwid = susceptible_pwid * incidence_pwid
+      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg
+
     )
 
   #' Calculate risk group incidence for aggregate age groups
@@ -1290,10 +1362,15 @@ agyw_calculate_incidence_male <- function(naomi_output,
 #' @return Wide format output required for the AGYW workbook
 #'
 #' @export
+#'
+#' Survey year should be updated to most current household survey in the country -
+#' for countries without recent household surveys, leave at 2018 - the spatiotemporal
+#' model of sexual behaviour fitted to all countries has the most data for in roughly 2018
 
 
 
-agyw_generate_risk_populations <- function(naomi_output) {
+agyw_generate_risk_populations <- function(naomi_output,
+                                           survey_year = 2018) {
 
 
   # Read in naomi outputs
@@ -1334,23 +1411,27 @@ agyw_generate_risk_populations <- function(naomi_output) {
   female_logit_prevalence <- agyw_calculate_prevalence_female(naomi$naomi_long,
                                                               options,
                                                               fsw_est,
-                                                              female_srb)
+                                                              female_srb,
+                                                              survey_year)
 
   male_logit_prevalence <- agyw_calculate_prevalence_male(naomi$naomi_long,
                                                           options,
                                                           msm_est,
-                                                          male_srb)
+                                                          male_srb,
+                                                          survey_year)
 
   #' Calculate risk group incidence
   female_incidence <- agyw_calculate_incidence_female(naomi$naomi_long,
                                                       options,
                                                       female_srb,
-                                                      female_logit_prevalence)
+                                                      female_logit_prevalence,
+                                                      survey_year)
 
   male_incidence <- agyw_calculate_incidence_male(naomi$naomi_long,
                                                   options,
                                                   male_srb,
-                                                  male_logit_prevalence)
+                                                  male_logit_prevalence,
+                                                  survey_year)
 
 
 

From 97cb4873fef05f590798fded5eaf4123fa33ea94 Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Wed, 13 Dec 2023 17:00:51 +0000
Subject: [PATCH 14/53] Wire up data with AGYW template

---
 DESCRIPTION                     |  2 +-
 R/downloads.R                   |  9 ++++++---
 R/utils.R                       | 19 +++++++++++++++++++
 docker/Dockerfile               |  3 ++-
 tests/testthat/test-downloads.R | 14 ++++++++------
 5 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 66afb408..8c47b0da 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -38,6 +38,7 @@ Imports:
     mvtnorm,
     naomi.resources,
     naomi.options (>= 1.2.0),
+    openxlsx,
     plotly,
     prettyunits,
     qs,
@@ -51,7 +52,6 @@ Imports:
     traduire,
     utils,
     withr,
-    writexl,
     yaml,
     zip,
     zoo
diff --git a/R/downloads.R b/R/downloads.R
index d2c00faf..4aab096d 100644
--- a/R/downloads.R
+++ b/R/downloads.R
@@ -112,13 +112,16 @@ hintr_prepare_agyw_download <- function(output, pjnz,
   progress <- new_simple_progress()
   progress$update_progress("PROGRESS_DOWNLOAD_AGYW")
 
-  ## TODO: Remove hardcoding of ISO3
   template_path <- naomi.resources::get_agyw_workbook_path()
 
   risk_populations <- agyw_generate_risk_populations(output$model_output_path)
 
-  dummy_data <- data.frame(x = c(1, 2, 3), y = c(3, 4, 5))
-  writexl::write_xlsx(list(sheet = dummy_data), path = path)
+  sheets <- list(
+    "All outputs - F" = risk_populations$female_incidence,
+    "All outputs - M" = risk_populations$male_incidence,
+    "NAOMI outputs" = risk_populations$naomi_output
+  )
+  write_xlsx_sheets(template_path, sheets, path = path)
 
   model_output <- read_hintr_output(output$model_output_path)
   options <- yaml::read_yaml(text = model_output$info$options.yml)
diff --git a/R/utils.R b/R/utils.R
index a96258a1..1dcac24d 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -123,3 +123,22 @@ area_level_from_id <- function(area_ids) {
     level
   })
 }
+
+#' Write list of data frames into an xlsx file
+#'
+#' @param template Path to xlsx file with empty sheets
+#' @param sheets Named list of data frames to write into template. The names
+#'   must match the destination sheet in the xlsx
+#' @param path Path to output the filled in xlsx
+#'
+#' @return Path to complete xlsx file
+#' @keywords internal
+write_xlsx_sheets <- function(template, sheets, path) {
+  wb <- openxlsx::loadWorkbook(template)
+  for (sheet in names(sheets)) {
+    openxlsx::writeData(wb, sheet, sheets[[sheet]])
+  }
+
+  openxlsx::saveWorkbook(wb, path)
+  path
+}
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 99299213..5336b2d8 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -13,7 +13,8 @@ RUN install_packages --repo=https://mrc-ide.r-universe.dev \
     mockr \
     rvest \
     pkgbuild \
-    testthat.buildkite
+    testthat.buildkite \
+    openxlsx
 
 ## Model run will try to parallelise over as many threads as are available
 ## potentially slowing the application, manually limit threads to 1
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index dc275d6b..1d785609 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -167,7 +167,7 @@ test_that("AGYW download can be created", {
 
   #' Create naomi outputs with "MWI_demo" iso3 to align with testing data in
   #' naomi.resources
-  output <- qs::qread(a_hintr_output_calibrated$model_output_path)
+  output <- read_hintr_output(a_hintr_output_calibrated$model_output_path)
 
   # Create demo datasets
   # Indicators
@@ -189,7 +189,7 @@ test_that("AGYW download can be created", {
   demo$output_package$meta_area <- meta_area_demo
 
   out_demo <- tempfile(fileext = ".qs")
-  qs::qsave(demo, preset = "fast", out_demo)
+  hintr_save(demo, out_demo)
 
   # Add to existing hintr_test data
   agyw_output_demo <- a_hintr_output_calibrated
@@ -209,10 +209,12 @@ test_that("AGYW download can be created", {
   expect_length(out$metadata$description, 1)
   expect_equal(out$metadata$areas, "MWI")
 
-  read <- readxl::read_xlsx(out$path)
-  expect_equal(read,
-               data.frame(x = c(1, 2, 3), y = c(3, 4, 5)),
-               ignore_attr = TRUE)
+  outputs_female <- openxlsx::readWorkbook(out$path, sheet = "All outputs - F")
+  expect_true(nrow(outputs_female) > 10)
+  outputs_male <- openxlsx::readWorkbook(out$path, sheet = "All outputs - M")
+  expect_true(nrow(outputs_male) > 10)
+  naomi_outputs <- openxlsx::readWorkbook(out$path, sheet = "NAOMI outputs")
+  expect_true(nrow(naomi_outputs) > 4)
 
   ## Progress messages printed
   expect_length(messages$progress, 1)

From f897636c6af740a3c44d3f14ec32ccb546dd200b Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Wed, 13 Dec 2023 17:29:52 +0000
Subject: [PATCH 15/53] Fix spdep warning by explicitly setting style for
 mat2listw

---
 R/car.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/car.R b/R/car.R
index e7f121f8..7264cb62 100644
--- a/R/car.R
+++ b/R/car.R
@@ -74,7 +74,7 @@ scale_gmrf_precision <- function(Q,
                                  A = matrix(1, ncol = ncol(Q)),
                                  eps = sqrt(.Machine$double.eps)) {
 
-  nb <- spdep::mat2listw(abs(Q), style = "M")$neighbours
+  nb <- spdep::mat2listw(abs(Q), style = "B")$neighbours
   comp <- spdep::n.comp.nb(nb)
 
   for (k in seq_len(comp$nc)) {

From 1030010360c0e8b63eed27723e42c7a0b70d4d70 Mon Sep 17 00:00:00 2001
From: Rob Ashton <r.ashton@imperial.ac.uk>
Date: Thu, 14 Dec 2023 11:36:02 +0000
Subject: [PATCH 16/53] Suppress single warning

---
 R/car.R | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/R/car.R b/R/car.R
index 7264cb62..122ae8c1 100644
--- a/R/car.R
+++ b/R/car.R
@@ -74,7 +74,17 @@ scale_gmrf_precision <- function(Q,
                                  A = matrix(1, ncol = ncol(Q)),
                                  eps = sqrt(.Machine$double.eps)) {
 
-  nb <- spdep::mat2listw(abs(Q), style = "B")$neighbours
+  # spdep is annoying warning with style = "M" but not clear from docs
+  # if this means style missing or style Matrix. And none of the other
+  # options for style seemingly do the same thing as style = "M"
+  # so just ignoring this warning for now
+  withCallingHandlers({
+    nb <- spdep::mat2listw(abs(Q), style = "M")$neighbours
+  }, warning = function(w) {
+    if (startsWith(conditionMessage(w), "style is M")) {
+      invokeRestart("muffleWarning")
+    }
+  })
   comp <- spdep::n.comp.nb(nb)
 
   for (k in seq_len(comp$nc)) {

From c87824d80bc73d7db4ce6b605e2fdd042fec32d5 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Wed, 20 Dec 2023 12:49:33 +0000
Subject: [PATCH 17/53] align katie's code and clean up documentation

---
 R/agyw-integration.R | 66 +++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 38 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 3ed1d915..21c0cfac 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -4,7 +4,7 @@
 #' @param options Naomi model options.
 #'
 #'
-#' @return District level FSW estimates by 5-year age bands for ages 15-49.
+#' @return Naomi indicators formatted for the AGYW workbook.
 #'
 #' @export
 
@@ -16,13 +16,10 @@ agyw_format_naomi <- function(outputs, options){
                   calendar_quarter == options$calendar_quarter_t2)
 
   naomi_ind_labelled <- naomi_ind %>%
-    dplyr::left_join(outputs$meta_area %>% dplyr::select(area_id, area_name,
-                                                         area_level),
+    dplyr::left_join(outputs$meta_area %>% dplyr::select(area_id, area_name,area_level),
                      by = dplyr::join_by(area_id))
 
 
-
-
   summarise_naomi_ind <- function(dat, age_cat) {
 
     if(age_cat == "Y015_024"){age_groups <- c("Y015_019", "Y020_024")}
@@ -57,8 +54,7 @@ agyw_format_naomi <- function(outputs, options){
     dplyr::filter(age_group %in% c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
                                    "Y035_039", "Y040_044", "Y045_049", "Y015_049")) %>%
     # age group label already in the naomi_ind_labelled data frame
-    dplyr::left_join(outputs$meta_age_group %>% dplyr::select(-age_group_label),
-                     by = dplyr::join_by(age_group)) %>%
+    dplyr::left_join(outputs$meta_age_group , by = dplyr::join_by(age_group)) %>%
     dplyr::select(names(df1)) %>%
     # Add aggregate indicators
     dplyr::bind_rows(df1) %>%
@@ -150,7 +146,7 @@ agyw_format_naomi <- function(outputs, options){
 
 #' Dissagreggate admin1 FSW proportions from Oli's KP model to 5-age groups
 #'
-#' @param outputs Naomi output
+#' @param outputs Naomi output.
 #' @param options Naomi model options.
 #' @param naomi_population Naomi population estimates for T2.
 #'
@@ -251,8 +247,7 @@ agyw_disaggregate_fsw <- function(outputs,
   #' FSW from Thembisa
   #'Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
   zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF") %>%
-    dplyr::filter(kp=="FSW") %>%
-    dplyr::select(-kp)
+    dplyr::filter(kp == "FSW")
 
   fsw_est <- df %>%
     #  Add FSW propensity estimates from ZAF
@@ -281,7 +276,7 @@ agyw_disaggregate_fsw <- function(outputs,
 
 #' Disaggregate admin1 PWID proportions from Oli's KP model to 5-age groups
 #'
-#' @param outputs Naomi output..
+#' @param outputs Naomi output.
 #' @param options Naomi model options.
 #' @param naomi_population Naomi population estimates for T2.
 #'
@@ -353,7 +348,7 @@ agyw_disaggregate_pwid <- function(outputs,
 
 #' Disaggregate admin1 MSM proportions from Oli's KP model to 5-age groups
 #'
-#' @param outputs Naomi output..
+#' @param outputs Naomi output.
 #' @param options Naomi model options.
 #' @param naomi_population Naomi population estimates for T2.
 #'
@@ -411,8 +406,7 @@ agyw_disaggregate_msm <- function(outputs,
   cohort <- 2000
 
   afs <- afs %>%
-    dplyr::filter(yob == cohort, sex == "male", ISO_A3 == options$area_scope) %>%
-    dplyr::mutate(iso3 = ISO_A3, ISO_A3 = NULL) %>%
+    dplyr::filter(yob == cohort, sex == "male", iso3 == options$area_scope) %>%
     dplyr::full_join(dplyr::select(msm,iso3,area_id), multiple = "all", by = dplyr::join_by(iso3))
 
   df <- data.frame()
@@ -452,8 +446,7 @@ agyw_disaggregate_msm <- function(outputs,
   #' Adjusting country specific sexual debut estimates with age distribution of
   #' MSM from Thembisa
   zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF") %>%
-    dplyr::filter(kp=="MSM") %>%
-    dplyr::select(-kp)
+    dplyr::filter(kp == "MSM")
 
 
   msm_est <- df %>%
@@ -638,11 +631,12 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
 #'
 #' To calculate district-age-sex-sexual behaviour-specific HIV prevalence, we maintain
 #' HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
-#' risk behaviours using 1) HIV prevalence ratios from household surveys for
-#' those reporting no sex vs one cohabiting vs non-regular sexual partner(s),
-#' and 2) a linear regression through admin-1 level estimates of the ratio of KP
-#' prevalence to gen-pop prevalence used to predict an age-district-specific FSW
-#' to general population prevalence ratio.
+#' risk behaviours using:
+#'  (1) HIV prevalence ratios from household surveys for those reporting no
+#'  sex vs one cohabiting vs non-regular sexual partner(s), and
+#'  (2) a linear regression through admin-1 level estimates of the ratio of KP
+#'  prevalence to gen-pop prevalence used to predict an age-district-specific
+#'  FSW to general population prevalence ratio.
 #'
 agyw_calculate_prevalence_female <- function(naomi_output,
                                              options,
@@ -695,9 +689,6 @@ agyw_calculate_prevalence_female <- function(naomi_output,
       population_sexpaid12m = population * prop_sexpaid12m
     )
 
-
-
-
   #' Calculate prevalence in each category
   calculate_prevalence <- function(x, iso3){
 
@@ -767,13 +758,14 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 #'
 #' To calculate district-age-sex-sexual behaviour-specific HIV prevalence, we maintain
 #' HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
-#' risk behaviours using 1) HIV prevalence ratios from household surveys for
-#' those reporting no sex vs one cohabiting vs non-regular sexual partner(s),
-#' and 2) admin-1 level estimates of the ratio of KP prevalence to gen-pop prevalence
-#' among 15-24 year olds for MSM (due to the young age distribution of MSM) or
-#' among 15-49 year olds for PWID (due to the older age distribution of PWID)
-#' applied to all age groups among MSM and PWID in districts by admin-1 unit.
-#'
+#' risk behaviours using:
+#'  (1) HIV prevalence ratios from household surveys for those reporting no
+#'  sex vs one cohabiting vs non-regular sexual partner(s), and
+#'  (2) admin-1 level estimates of the ratio of KP prevalence to gen-pop prevalence
+#'   among 15-24 year olds for MSM (due to the young age distribution of MSM) or
+#'   among 15-49 year olds for PWID (due to the older age distribution of PWID)
+#'   applied to all age groups among MSM and PWID in districts by admin-1 unit.
+
 
 agyw_calculate_prevalence_male <- function(naomi_output,
                                            areas,
@@ -806,7 +798,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
   msm_pwid_prev <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
     dplyr::filter(indicator == "prevalence", kp %in% c("MSM", "PWID"))
 
-# KP population prevalence
+ # KP population prevalence
   kp_prev <- msm_pwid_prev %>%
     dplyr::select(-indicator,-lower, -upper) %>%
     dplyr::mutate(median = log(median / (1 - median))) %>%
@@ -999,10 +991,10 @@ agyw_calculate_incidence_female <- function(naomi_output,
   #' intercept is 0.075090952
 
   rr_reg_dat <- data.frame(genpop_incidence = df$incidence/100) %>%
-    mutate(log_gen = log(genpop_incidence),
-           log_sexpaid12m = 0.604104017 * log_gen + 0.075090952,
-           sexpaid12m_incidence = exp(log_sexpaid12m),
-           rr_sexpaid12m = sexpaid12m_incidence / genpop_incidence)
+    dplyr::mutate(log_gen = log(genpop_incidence),
+                  log_sexpaid12m = 0.604104017 * log_gen + 0.075090952,
+                  sexpaid12m_incidence = exp(log_sexpaid12m),
+                  rr_sexpaid12m = sexpaid12m_incidence / genpop_incidence)
 
   rr_sexpaid12m <- rr_reg_dat$rr_sexpaid12m
   # This gives implausibly high RRs for very low districts (e.g. IRR = 297!)
@@ -1351,8 +1343,6 @@ agyw_calculate_incidence_male <- function(naomi_output,
 }
 
 
-
-
 #' Calculate incidence in high risk male key populations
 #'
 #' @param outputs Naomi output.

From 64843690d02a456664c12f17ceb5691835060fe4 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Fri, 22 Dec 2023 14:48:11 +0000
Subject: [PATCH 18/53] account for all label configurations

---
 R/agyw-integration.R | 117 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 104 insertions(+), 13 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 21c0cfac..8bfd6188 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -15,9 +15,16 @@ agyw_format_naomi <- function(outputs, options){
                                    "prevalence"),
                   calendar_quarter == options$calendar_quarter_t2)
 
+  area_labels <- outputs$meta_area %>%
+    dplyr::select(area_id, area_name,area_level, spectrum_region_code)
+  area_label_cols <- intersect(names(naomi_ind), names(area_labels))
+
+  age_labels <- outputs$meta_age_group
+  age_label_cols <- intersect(names(naomi_ind), names(age_labels))
+
   naomi_ind_labelled <- naomi_ind %>%
-    dplyr::left_join(outputs$meta_area %>% dplyr::select(area_id, area_name,area_level),
-                     by = dplyr::join_by(area_id))
+    dplyr::left_join(area_labels, by = area_label_cols) %>%
+    dplyr::left_join(age_labels, by = age_label_cols)
 
 
   summarise_naomi_ind <- function(dat, age_cat) {
@@ -27,10 +34,11 @@ agyw_format_naomi <- function(outputs, options){
                                               "Y040_044", "Y045_049")}
 
     dat %>%
-      dplyr::select(area_id, area_name, area_level, calendar_quarter,
+      dplyr::select(area_id, area_name, area_level, spectrum_region_code, calendar_quarter,
                     age_group, sex, indicator, mean) %>%
       tidyr::pivot_wider(names_from = indicator, values_from = mean) %>%
-      dplyr::group_by(area_id, area_name, area_level, calendar_quarter, sex) %>%
+      dplyr::group_by(area_id, area_name, area_level,spectrum_region_code,
+                      calendar_quarter, sex) %>%
       dplyr::summarise(
         "population" = sum(population * as.integer(age_group %in% age_groups)),
         "plhiv" = sum(plhiv * as.integer(age_group %in% age_groups)),
@@ -53,8 +61,6 @@ agyw_format_naomi <- function(outputs, options){
   df2 <- naomi_ind_labelled %>%
     dplyr::filter(age_group %in% c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
                                    "Y035_039", "Y040_044", "Y045_049", "Y015_049")) %>%
-    # age group label already in the naomi_ind_labelled data frame
-    dplyr::left_join(outputs$meta_age_group , by = dplyr::join_by(age_group)) %>%
     dplyr::select(names(df1)) %>%
     # Add aggregate indicators
     dplyr::bind_rows(df1) %>%
@@ -179,7 +185,36 @@ agyw_disaggregate_fsw <- function(outputs,
     dplyr::left_join(naomi_pop %>% dplyr::filter(sex == "female"),
                      by = dplyr::join_by(iso3, area_id, age_group)) %>%
     dplyr::mutate(total_fsw = population * prop_fsw) %>%
-    dplyr::select(iso3, area_id, total_fsw, age_group, area_level)
+    dplyr::select(iso3, area_id, total_fsw, age_group, area_level, spectrum_region_code)
+
+  #' Check for consensus estimate of FSW
+  kp_consensus <- extract_kp_workbook(pjnz)
+  fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$population_size
+
+  if(!is.na(fsw_consensus)){
+
+    # Check if consensus estimate is larger than age matched population denominator
+    pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "female",]$population
+    stopifnot(fsw_consensus < pop)
+
+    # Scale total FSW population to consensus PSE estimate
+    fsw_scaled <- fsw %>%
+      dplyr::mutate(
+        relative_prop = total_fsw/sum(total_fsw),
+        consensus_pse = fsw_consensus,
+        total_fsw = consensus_pse * relative_prop)
+
+    fsw <- fsw_scaled %>% dplyr::select(-consensus_pse, relative_prop)
+
+  }
+
+
+
+
+
+
+
+
 
   #' FSW age distribution parameters in ZAF from Thembisa
   #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
@@ -265,7 +300,8 @@ agyw_disaggregate_fsw <- function(outputs,
     # Calculate FSW proportions
     dplyr::mutate(
       fsw = dist * total_fsw,
-      fsw_prop = fsw / population
+      fsw_prop = fsw / population,
+      consensus_estimate = fsw_consensus
     ) %>%
     dplyr::select(-eversexpop, -eversexpop_prop, -propensity, -dist, -total_fsw)
 
@@ -307,6 +343,27 @@ agyw_disaggregate_pwid <- function(outputs,
     dplyr::mutate(total_pwid = population * prop_pwid) %>%
     dplyr::select(iso3, area_id, total_pwid, age_group, area_level)
 
+  #' Check for consensus estimate of MSM
+  kp_consensus <- extract_kp_workbook(pjnz)
+  pwid_consensus <- kp_consensus[kp_consensus$key_population == "PWID", ]$population_size
+
+  if(!is.na(pwid_consensus)){
+
+    # Check if consensus estimate is larger than age matched population denominator
+    pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
+    stopifnot(pwid_consensus < pop)
+
+    # Scale total PWID population to consensus PSE estimate
+    pwid_scaled <- pwid %>%
+      dplyr::mutate(
+        relative_prop = total_pwid/sum(total_pwid),
+        consensus_pse = pwid_consensus,
+        total_pwid = consensus_pse * relative_prop)
+
+    pwid <- pwid_scaled %>% dplyr::select(-consensus_pse, relative_prop)
+  }
+
+
   #' Assumption from literature that 9% of PWID are female so remove them from
   #' the male denominator
 
@@ -340,7 +397,8 @@ agyw_disaggregate_pwid <- function(outputs,
       by = c("area_id", "iso3")
     ) %>%
     dplyr::mutate(pwid = dist * total_pwid,
-                  pwid_prop = pwid / population) %>%
+                  pwid_prop = pwid / population,
+                  consensus_estimate = pwid_consensus) %>%
     dplyr::select( -dist, -total_pwid, -sex)
 
   pwid_est
@@ -362,7 +420,7 @@ agyw_disaggregate_msm <- function(outputs,
   #' Extract country specific national MSM PSEs
   iso3 <- options$area_scope
   pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
-    dplyr::filter(kp == "PWID", indicator == "pse_prop")
+    dplyr::filter(kp == "MSM", indicator == "pse_prop")
 
   msm_pse <- pse %>%
     dplyr::rename(prop_msm = median) %>%
@@ -378,6 +436,25 @@ agyw_disaggregate_msm <- function(outputs,
     dplyr::mutate(total_msm = population * prop_msm) %>%
     dplyr::select(iso3, area_id, total_msm, age_group, area_level)
 
+  #' Check for consensus estimate of MSM
+  kp_consensus <- extract_kp_workbook(pjnz)
+  msm_consensus <- kp_consensus[kp_consensus$key_population == "MSM", ]$population_size
+
+  if(!is.na(msm_consensus)){
+
+    # Check if consensus estimate is larger than age matched population denominator
+    pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
+    stopifnot(msm_consensus < pop)
+
+    # Scale total MSM population to consensus PSE estimate
+    msm_scaled <- msm %>%
+      dplyr::mutate(
+        relative_prop = total_msm/sum(total_msm),
+        consensus_pse = msm_consensus,
+        total_msm = consensus_pse * relative_prop)
+
+    msm <- msm_scaled %>% dplyr::select(-consensus_pse, relative_prop)
+  }
 
 
   #' MSM age distribution parameters in ZAF from Thembisa
@@ -464,7 +541,8 @@ agyw_disaggregate_msm <- function(outputs,
     ) %>%
     # Calculate FSW proportions
     dplyr::mutate(msm = dist * total_msm,
-                  msm_prop = msm / population) %>%
+                  msm_prop = msm / population,
+                  consensus_estimate = msm_consensus) %>%
     dplyr::select(-eversexpop, -eversexpop_prop, -propensity, - dist, -total_msm)
 
   msm_est
@@ -1359,9 +1437,15 @@ agyw_calculate_incidence_male <- function(naomi_output,
 #' for countries without recent household surveys, leave at 2018 - the spatiotemporal
 #' model of sexual behaviour fitted to all countries has the most data for in roughly 2018
 
+naomi_output <-"~/Downloads/MWI 2023 naomi_outputs.zip"
+pjnz <- "~/Downloads/Malawi_2023_National_HIV_estimates_Spectrum_AIM_model.pjnz"
+# #
+ naomi_output <- agyw_output_demo$model_output_path
+ pjnz <- a_hintr_data$pjnz
 
 
 agyw_generate_risk_populations <- function(naomi_output,
+                                           pjnz,
                                            survey_year = 2018) {
 
   # Read in naomi outputs
@@ -1384,7 +1468,8 @@ agyw_generate_risk_populations <- function(naomi_output,
   #' Naomi population
   naomi_pop <- naomi$naomi_long %>%
     dplyr::filter(indicator == "population") %>%
-    dplyr::select(area_id, area_level,sex, age_group, area_level, population = mean)
+    dplyr::select(area_id, area_level,sex, age_group, area_level,
+                  spectrum_region_code, population = mean)
 
   naomi_pop$iso3 <- options$area_scope
 
@@ -1424,11 +1509,17 @@ agyw_generate_risk_populations <- function(naomi_output,
                                                   male_logit_prevalence,
                                                   survey_year)
 
+  meta <- data.frame(kp = c("FSW", "MSM", "PWID"),
+                     consensus_estimate = c(unique(fsw_est$consensus_estimate),
+                                            unique(msm_est$consensus_estimate),
+                                            unique(pwid_est$consensus_estimate)))
+
 
 
   v <- list(female_incidence = female_incidence,
             male_incidence = male_incidence,
-            naomi_output = naomi$naomi_wide)
+            naomi_output = naomi$naomi_wide,
+            meta_consensus = meta)
 
   v
 

From ee61e50b342046fbca337796eb5564991a3f0e0a Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Fri, 22 Dec 2023 14:49:00 +0000
Subject: [PATCH 19/53] scale to spectrum kp workbook consensus estimates

---
 R/downloads.R                                 |   3 +-
 R/inputs-spectrum.R                           |  45 ++++++++++++++++--
 tests/testthat/test-downloads.R               |  28 ++++++++++-
 .../testdata/kp_workbook_spectrum.rds         | Bin 0 -> 376 bytes
 4 files changed, 70 insertions(+), 6 deletions(-)
 create mode 100644 tests/testthat/testdata/kp_workbook_spectrum.rds

diff --git a/R/downloads.R b/R/downloads.R
index 4aab096d..0deb9253 100644
--- a/R/downloads.R
+++ b/R/downloads.R
@@ -114,7 +114,8 @@ hintr_prepare_agyw_download <- function(output, pjnz,
 
   template_path <- naomi.resources::get_agyw_workbook_path()
 
-  risk_populations <- agyw_generate_risk_populations(output$model_output_path)
+  risk_populations <- agyw_generate_risk_populations(output$model_output_path,
+                                                     pjnz)
 
   sheets <- list(
     "All outputs - F" = risk_populations$female_incidence,
diff --git a/R/inputs-spectrum.R b/R/inputs-spectrum.R
index 482b26b2..b1db7c17 100644
--- a/R/inputs-spectrum.R
+++ b/R/inputs-spectrum.R
@@ -127,6 +127,8 @@ extract_pjnz_one <- function(pjnz, extract_shiny90) {
 #' @export
 #'
 extract_pjnz_program_data <- function(pjnz_list) {
+
+
   pjnz_list <- unroll_pjnz(pjnz_list)
 
   region_code <- lapply(pjnz_list, read_spectrum_region_code)
@@ -185,7 +187,7 @@ read_dp_art_dec31 <- function(dp) {
 
   ## In Spectrum 2023, "<NeedARTDec31 MV>" was updated to include children in the totals
   ## -> now need to sum over 5-year age groups for age 15+ to get the adult ART need
-  
+
   male_15plus_needart <- dpsub("<NeedARTDec31 MV>", 4:17*3 + 3, timedat.idx)
   male_15plus_needart <- vapply(lapply(male_15plus_needart, as.numeric), sum, numeric(1))
 
@@ -195,14 +197,14 @@ read_dp_art_dec31 <- function(dp) {
   art15plus_need <- rbind(male_15plus_needart, female_15plus_needart)
   dimnames(art15plus_need) <- list(sex = c("male", "female"), year = proj.years)
 
-  
+
   if (any(art15plus_num[art15plus_isperc == 1] < 0 |
           art15plus_num[art15plus_isperc == 1] > 100)) {
     stop("Invalid percentage on ART entered for adult ART")
   }
 
   ## # Adult on ART adjustment factor
-  ## 
+  ##
   ## * Implemented from around Spectrum 6.2 (a few versions before)
   ## * Allows user to specify scalar to reduce number on ART in each year ("<AdultARTAdjFactor>")
   ## * Enabled / disabled by checkbox flag ("<AdultARTAdjFactorFlag>")
@@ -281,7 +283,7 @@ read_dp_art_dec31 <- function(dp) {
   names(child_art) <- proj.years
 
   ## # Child on ART adjustment factor
-  ## 
+  ##
   ## * Implemented same as adult adjustment factor above
 
   if (exists_dptag("<ChildARTAdjFactorFlag>") &&
@@ -684,11 +686,46 @@ extract_eppasm_pregprev <- function(mod, fp, years = NULL) {
   df
 }
 
+
+
 read_dp <- function(pjnz) {
   dpfile <- grep(".DP$", utils::unzip(pjnz, list = TRUE)$Name, value = TRUE)
   utils::read.csv(unz(pjnz, dpfile), as.is = TRUE)
 }
 
+#' Read key population summary data from PJNZ
+#'
+#' Reads key population summary data from Spectrum PJNZ.
+#'
+#' @param pjnz_list path to PJNZ file or zip of multiple PJNZ files
+#'
+#'
+#' @noRd
+#'
+
+extract_kp_workbook <- function(pjnz_list){
+
+  # Extract spectrum files
+  pjnz_list <- unroll_pjnz(pjnz_list)
+
+  # Extract .DP files
+  dp <- lapply(pjnz_list, read_dp)
+
+  # Extract kp workbook summary
+  kp <- lapply(dp, read_dp_keypop_summary)
+
+  # Filter for spectrum file with consensus estimates
+  kp_out <- kp %>%
+    dplyr::bind_rows() %>%
+    dplyr::filter(!is.na(year))
+
+  # If no consensus estimates present, return empty dataframe
+  if(nrow(kp_out) == 0){kp_out <- kp[[1]]}
+
+  kp_out
+
+}
+
 
 #' Read key population summary data from PJNZ
 #'
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index 1d785609..fb408156 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -195,8 +195,9 @@ test_that("AGYW download can be created", {
   agyw_output_demo <- a_hintr_output_calibrated
   agyw_output_demo$model_output_path <- out_demo
 
-  # Test agyw download
+
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
+
   with_mocked_bindings(
     messages <- naomi_evaluate_promise(
       out <- hintr_prepare_agyw_download(agyw_output_demo,
@@ -219,6 +220,31 @@ test_that("AGYW download can be created", {
   ## Progress messages printed
   expect_length(messages$progress, 1)
   expect_equal(messages$progress[[1]]$message, "Generating AGYW tool")
+
+
+  # Test agyw workbook with no kp workbook saved into spectrum
+  risk_prop <- agyw_generate_risk_populations(agyw_output_demo$model_output_path,
+                                              a_hintr_data$pjnz)
+
+  expect_equal(risk_prop$meta_consensus,
+               data.frame(kp = c("FSW", "MSM", "PWID"),
+                          consensus_estimate = NA))
+
+  # Test agyw workbook with mock workbook saved into spectrum
+
+  mock_extract_kp_workbook <- mockery::mock(readRDS(test_path("testdata/kp_workbook_spectrum.rds")))
+  mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
+
+  with_mock(new_simple_progress = mock_new_simple_progress,
+            extract_kp_workbook = mock_extract_kp_workbook, {
+    risk_prop_scaled <- agyw_generate_risk_populations(agyw_output_demo$model_output_path,
+                                                    a_hintr_data$pjnz)
+  })
+
+  expect_equal(risk_prop_scaled$meta_consensus,
+               data.frame(kp = c("FSW", "MSM", "PWID"),
+                          consensus_estimate = c(40000, 35500, 5000)))
+
 })
 
 
diff --git a/tests/testthat/testdata/kp_workbook_spectrum.rds b/tests/testthat/testdata/kp_workbook_spectrum.rds
new file mode 100644
index 0000000000000000000000000000000000000000..2477dd723a7336b63b768230b97fa0f61f249909
GIT binary patch
literal 376
zcmV-;0f+t{iwFP!000001B>8dU|?WoU}0ipU}gm}8CXL@+;lB~V!}WU2M`MYF$)lK
zf)u(1hr^h@!M;!?Q;0j1#S#$i=>nvA!Fm|jpQ2(0K9IP>;|`ET3=R)N8^H8|3<!TS
zL>#8h{{H*9n{@fY><=XQUi!H5kNrjWbs|*{J?syz{QW$q+Xb%ft=x{l*RAsQZ)bwQ
zECx&fv3J@9h&@RUAaoE!(!mt24%x3lC8@<F@#XnN*-82N*?JW@#T8VRfrk@1U||9h
zj8L7dd5O8H#Zbp_Kn3}-Q!C>O@(W6H5=%1k^I*YWnVMJxW%DCs<BKz^Qehf1GRxu%
zic-rGb5iq?VWK>VMJ4gc`DLj^iRmyguFSl&)MSwUVyFl&%=z(YnK>|JBE<zjRYj$_
z@kOcWAj^{TQ&Q2qz*&@Eu7~UmSQxXoB$gyX*^Cjelog?8h+rC_2S;*FVlm7dm>^dQ
W&>X$ABA^vO-ai0@mvA}N0ssIQ5UZ#F

literal 0
HcmV?d00001


From 454424502d2dd1cfb84867d75d48bc56ab88a920 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Fri, 22 Dec 2023 14:54:43 +0000
Subject: [PATCH 20/53] remove tetsing files :(

---
 R/agyw-integration.R | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 8bfd6188..4aedfccf 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -209,13 +209,6 @@ agyw_disaggregate_fsw <- function(outputs,
   }
 
 
-
-
-
-
-
-
-
   #' FSW age distribution parameters in ZAF from Thembisa
   #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
   gamma_mean <- 29
@@ -1437,13 +1430,6 @@ agyw_calculate_incidence_male <- function(naomi_output,
 #' for countries without recent household surveys, leave at 2018 - the spatiotemporal
 #' model of sexual behaviour fitted to all countries has the most data for in roughly 2018
 
-naomi_output <-"~/Downloads/MWI 2023 naomi_outputs.zip"
-pjnz <- "~/Downloads/Malawi_2023_National_HIV_estimates_Spectrum_AIM_model.pjnz"
-# #
- naomi_output <- agyw_output_demo$model_output_path
- pjnz <- a_hintr_data$pjnz
-
-
 agyw_generate_risk_populations <- function(naomi_output,
                                            pjnz,
                                            survey_year = 2018) {

From 3a48a4096486c862171e2907c2639afdfd739c34 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Fri, 22 Dec 2023 14:57:13 +0000
Subject: [PATCH 21/53] Update NEWS.md

---
 NEWS.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 7fd3f4dc..43e9e6db 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # naomi 2.9.19
 
-* Add function `xxx()` to extract key population totals from the Spectrum PJNZ file. Data are extracted from summary table saved in AIM Programme Statistics input for key populations.
+* Generate PSE workbook from naomi outputs.
+* Add function `extract_kp_workbook()` to extract key population totals from the Spectrum PJNZ file. Data are extracted from summary table saved in AIM Programme Statistics input for key populations.
 
 
 # naomi 2.9.18

From 4f1358c75f57c802a5bff50183fd6944633ab10a Mon Sep 17 00:00:00 2001
From: Rob Ashton <r.ashton@imperial.ac.uk>
Date: Fri, 22 Dec 2023 17:13:43 +0000
Subject: [PATCH 22/53] Use with_mocked_bindings over with_mock

---
 tests/testthat/test-downloads.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index fb408156..a530548f 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -195,7 +195,6 @@ test_that("AGYW download can be created", {
   agyw_output_demo <- a_hintr_output_calibrated
   agyw_output_demo$model_output_path <- out_demo
 
-
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
 
   with_mocked_bindings(
@@ -235,11 +234,12 @@ test_that("AGYW download can be created", {
   mock_extract_kp_workbook <- mockery::mock(readRDS(test_path("testdata/kp_workbook_spectrum.rds")))
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
 
-  with_mock(new_simple_progress = mock_new_simple_progress,
-            extract_kp_workbook = mock_extract_kp_workbook, {
-    risk_prop_scaled <- agyw_generate_risk_populations(agyw_output_demo$model_output_path,
-                                                    a_hintr_data$pjnz)
-  })
+  with_mocked_bindings(
+    risk_prop_scaled <- agyw_generate_risk_populations(
+      agyw_output_demo$model_output_path, a_hintr_data$pjnz),
+    new_simple_progress = mock_new_simple_progress,
+    extract_kp_workbook = mock_extract_kp_workbook
+  )
 
   expect_equal(risk_prop_scaled$meta_consensus,
                data.frame(kp = c("FSW", "MSM", "PWID"),

From b8721eae08ba4e8eccd35c6bc9118ed31d69913a Mon Sep 17 00:00:00 2001
From: Rob Ashton <r.ashton@imperial.ac.uk>
Date: Fri, 22 Dec 2023 17:14:27 +0000
Subject: [PATCH 23/53] Add note about with_mocked_bindings

---
 tests/testthat/helper-testing.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/testthat/helper-testing.R b/tests/testthat/helper-testing.R
index 137fda0b..d06ec7fa 100644
--- a/tests/testthat/helper-testing.R
+++ b/tests/testthat/helper-testing.R
@@ -3,6 +3,8 @@ expect_no_error <- function(object) {
 }
 
 with_mock <- function(..., .parent = parent.frame()) {
+  ## Don't use this, this should be removed, now testthat have
+  ## added with_mocked_bindings that should be preferred
   mockr::with_mock(..., .parent = .parent, .env = "naomi")
 }
 

From dd18af039c8ed5961971bd1197fdfb2ad6d61e01 Mon Sep 17 00:00:00 2001
From: Rob Ashton <r.ashton@imperial.ac.uk>
Date: Fri, 22 Dec 2023 17:27:09 +0000
Subject: [PATCH 24/53] Fix up test, regen docs

---
 .gitignore                              |  1 +
 DESCRIPTION                             |  2 +-
 NAMESPACE                               |  3 +-
 R/agyw-integration.R                    | 22 +++++-----
 man/agyw_adjust_sexbehav_fsw.Rd         | 42 +++++++++++++++++++
 man/agyw_adjust_sexbehav_msm_pwid.Rd    | 45 +++++++++++++++++++++
 man/agyw_calculate_incidence_female.Rd  | 51 +++++++++++++++++++++++
 man/agyw_calculate_incidence_male.Rd    | 31 ++++++++++++++
 man/agyw_calculate_prevalence_female.Rd | 54 +++++++++++++++++++++++++
 man/agyw_calculate_prevalence_male.Rd   | 46 +++++++++++++++++++++
 man/agyw_disaggregate_fsw.Rd            | 23 +++++++++++
 man/agyw_disaggregate_msm.Rd            | 34 ++++++++++++++++
 man/agyw_disaggregate_pwid.Rd           | 30 ++++++++++++++
 man/agyw_format_naomi.Rd                | 19 +++++++++
 man/agyw_generate_risk_populations.Rd   | 25 ++++++++++++
 man/align_inputs_outputs.Rd             |  3 --
 man/calculate_ywkp_pr_lor.Rd            | 19 +++++++++
 man/district_barplot.Rd                 |  3 --
 man/logit_scale_prev.Rd                 | 24 +++++++++++
 man/map_outputs.Rd                      |  3 --
 man/odds.Rd                             | 14 +++++++
 man/pop_pyramid_outputs.Rd              |  3 --
 man/prepare_input_time_series_anc.Rd    |  5 ++-
 man/prepare_input_time_series_art.Rd    |  5 ++-
 man/select_naomi_data.Rd                | 12 +++---
 man/write_navigator_checklist.Rd        |  3 --
 man/write_xlsx_sheets.Rd                | 23 +++++++++++
 tests/testthat/test-downloads.R         |  5 +--
 28 files changed, 510 insertions(+), 40 deletions(-)
 create mode 100644 man/agyw_adjust_sexbehav_fsw.Rd
 create mode 100644 man/agyw_adjust_sexbehav_msm_pwid.Rd
 create mode 100644 man/agyw_calculate_incidence_female.Rd
 create mode 100644 man/agyw_calculate_incidence_male.Rd
 create mode 100644 man/agyw_calculate_prevalence_female.Rd
 create mode 100644 man/agyw_calculate_prevalence_male.Rd
 create mode 100644 man/agyw_disaggregate_fsw.Rd
 create mode 100644 man/agyw_disaggregate_msm.Rd
 create mode 100644 man/agyw_disaggregate_pwid.Rd
 create mode 100644 man/agyw_format_naomi.Rd
 create mode 100644 man/agyw_generate_risk_populations.Rd
 create mode 100644 man/calculate_ywkp_pr_lor.Rd
 create mode 100644 man/logit_scale_prev.Rd
 create mode 100644 man/odds.Rd
 create mode 100644 man/write_xlsx_sheets.Rd

diff --git a/.gitignore b/.gitignore
index 93f9451e..cd724ac7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,4 @@ vignettes_src/model-workflow.Rmd
 tests/testthat/testdata/fit.RDS
 .vscode
 .Rprofile
+.idea
diff --git a/DESCRIPTION b/DESCRIPTION
index 17f1f64e..cc9fd6c0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -13,7 +13,7 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
 Additional_repositories:
     https://mrc-ide.r-universe.dev
 Imports:
diff --git a/NAMESPACE b/NAMESPACE
index 9fb1b386..39a468f1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,6 +5,7 @@ export(add_output_labels)
 export(age_bar_plotly)
 export(aggregate_anc)
 export(aggregate_art)
+export(agyw_format_naomi)
 export(align_inputs_outputs)
 export(anc_testing_artcov_mf)
 export(anc_testing_clients_mf)
@@ -59,8 +60,6 @@ export(naomi_objective_function_r)
 export(naomi_output_frame)
 export(output_package)
 export(pop_pyramid_outputs)
-export(prepare_input_time_series_anc)
-export(prepare_input_time_series_art)
 export(prepare_tmb_inputs)
 export(quarter_id_to_calendar_quarter)
 export(quarter_labels)
diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 4aedfccf..7f2f320a 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -155,6 +155,7 @@ agyw_format_naomi <- function(outputs, options){
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
 #' @param naomi_population Naomi population estimates for T2.
+#' @param kp_consensus Key pop consensus estimates.
 #'
 #'
 #' @return District level FSW estimates by 5-year age bands for ages 15-49.
@@ -163,7 +164,8 @@ agyw_format_naomi <- function(outputs, options){
 
 agyw_disaggregate_fsw <- function(outputs,
                                   options,
-                                  naomi_pop)
+                                  naomi_pop,
+                                  kp_consensus)
 {
 
   #' Extract country specific national FSW PSEs
@@ -188,7 +190,6 @@ agyw_disaggregate_fsw <- function(outputs,
     dplyr::select(iso3, area_id, total_fsw, age_group, area_level, spectrum_region_code)
 
   #' Check for consensus estimate of FSW
-  kp_consensus <- extract_kp_workbook(pjnz)
   fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$population_size
 
   if(!is.na(fsw_consensus)){
@@ -308,12 +309,14 @@ agyw_disaggregate_fsw <- function(outputs,
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
 #' @param naomi_population Naomi population estimates for T2.
+#' @param kp_consensus Key pop consensus estimates.
 #'
 #' @return District level PWID estimates by 5-year age bands for ages 15-49.
 
 agyw_disaggregate_pwid <- function(outputs,
                                    options,
-                                   naomi_pop)
+                                   naomi_pop,
+                                   kp_consensus)
 {
 
   #' Extract country specific national PWID PSEs
@@ -337,7 +340,6 @@ agyw_disaggregate_pwid <- function(outputs,
     dplyr::select(iso3, area_id, total_pwid, age_group, area_level)
 
   #' Check for consensus estimate of MSM
-  kp_consensus <- extract_kp_workbook(pjnz)
   pwid_consensus <- kp_consensus[kp_consensus$key_population == "PWID", ]$population_size
 
   if(!is.na(pwid_consensus)){
@@ -402,12 +404,14 @@ agyw_disaggregate_pwid <- function(outputs,
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
 #' @param naomi_population Naomi population estimates for T2.
+#' @param kp_consensus Key pop consensus estimates.
 #'
 #' @return District level MSM estimates by 5-year age bands for ages 15-49.
 
 agyw_disaggregate_msm <- function(outputs,
                                   options,
-                                  naomi_pop)
+                                  naomi_pop,
+                                  kp_consensus)
 {
 
   #' Extract country specific national MSM PSEs
@@ -430,7 +434,6 @@ agyw_disaggregate_msm <- function(outputs,
     dplyr::select(iso3, area_id, total_msm, age_group, area_level)
 
   #' Check for consensus estimate of MSM
-  kp_consensus <- extract_kp_workbook(pjnz)
   msm_consensus <- kp_consensus[kp_consensus$key_population == "MSM", ]$population_size
 
   if(!is.na(msm_consensus)){
@@ -1460,9 +1463,10 @@ agyw_generate_risk_populations <- function(naomi_output,
   naomi_pop$iso3 <- options$area_scope
 
   #' Disaggregate KP PSEs from Oli's analysis to 5-year bands
-  fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop)
-  pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop)
-  msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop)
+  kp_consensus <- extract_kp_workbook(pjnz)
+  fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
+  pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
+  msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
 
   #' Adjust SAE model output with KP proportions
   female_srb <- agyw_adjust_sexbehav_fsw(outputs, options, fsw_est)
diff --git a/man/agyw_adjust_sexbehav_fsw.Rd b/man/agyw_adjust_sexbehav_fsw.Rd
new file mode 100644
index 00000000..a526a8f9
--- /dev/null
+++ b/man/agyw_adjust_sexbehav_fsw.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_adjust_sexbehav_fsw}
+\alias{agyw_adjust_sexbehav_fsw}
+\title{Adjust female sexual behavior risk groups by FSW proportions}
+\usage{
+agyw_adjust_sexbehav_fsw(outputs, options, fsw_est)
+}
+\arguments{
+\item{outputs}{Naomi output.}
+
+\item{options}{Naomi model options.}
+
+\item{fsw_est}{5-year estimates of FSW PSEs generated from \code{agyw_disaggregate_fsw()}.}
+
+\item{female_sae_path}{Path to female estimates of sexual behavior risk group.}
+}
+\value{
+District level estimates of female sexual risk behaviour groups
+
+Estimates are generated for the following groups:
+\itemize{
+\item \code{nosex12m}:
+\item \code{sexcohab}:
+\item \code{sexnonregplus}:
+\item \code{sexnonreg}:
+\item \code{sexpaid12m}:
+\item \code{nosex12m}:
+}
+
+Calculation steps:
+\enumerate{
+\item Align admin0/admin1 FSW proportions with SRB SAE estimates.
+\item Subtract the proportion of FSW from total high risk female population.
+Match FSW estimates (admin0 or admin1) with SAE estimates
+Allocate admin1 FSW proportions
+Load female SRB proportions
+}
+}
+\description{
+Adjust female sexual behavior risk groups by FSW proportions
+}
diff --git a/man/agyw_adjust_sexbehav_msm_pwid.Rd b/man/agyw_adjust_sexbehav_msm_pwid.Rd
new file mode 100644
index 00000000..e9bb18d5
--- /dev/null
+++ b/man/agyw_adjust_sexbehav_msm_pwid.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_adjust_sexbehav_msm_pwid}
+\alias{agyw_adjust_sexbehav_msm_pwid}
+\title{Adjust male sexual behavior risk groups by MSM + PWID proportions}
+\usage{
+agyw_adjust_sexbehav_msm_pwid(outputs, options, msm_est, pwid_est)
+}
+\arguments{
+\item{outputs}{Naomi output.}
+
+\item{options}{Naomi model options.}
+
+\item{msm_est}{5-year estimates of MSM PSEs generated from \code{agyw__disaggregate_msm()}.}
+
+\item{pwid_est}{5-year estimates of MSM PSEs generated from \code{agyw__disaggregate_pwid()}.}
+
+\item{sae_path}{Path to female estimates of sexual behavior risk group.}
+}
+\value{
+District level estimates of male sexual risk behaviour groups
+
+Estimates are generated for the following groups:
+\itemize{
+\item \code{nosex12m}:
+\item \code{sexcohab}:
+\item \code{sexonregplus}:
+\item \code{sexonreg}:
+\item \code{msm}:
+\item \code{pwid}:
+}
+
+Calculation steps:
+\enumerate{
+\item Align admin0/admin1 MSM and PWID proportions with SRB SAE estimates.
+\item Subtracting MSM and PWID proportionally from all SRB groups.
+}
+
+Load male SRB proportions
+Subtracting MSM and PWID proportionally from all SRB risk groups
+(FSW was just from high-risk females)
+}
+\description{
+Adjust male sexual behavior risk groups by MSM + PWID proportions
+}
diff --git a/man/agyw_calculate_incidence_female.Rd b/man/agyw_calculate_incidence_female.Rd
new file mode 100644
index 00000000..8f789fed
--- /dev/null
+++ b/man/agyw_calculate_incidence_female.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_calculate_incidence_female}
+\alias{agyw_calculate_incidence_female}
+\title{Calculate incidence for female SRB groups.}
+\usage{
+agyw_calculate_incidence_female(
+  naomi_output,
+  options,
+  female_srb,
+  female_logit_prevalence,
+  survey_year = 2018
+)
+}
+\arguments{
+\item{options}{Naomi options extracted from outputs}
+
+\item{female_srb}{Estimates of female sexual risk groups generated by \code{agyw_adjust_sexbehav_fsw()}}
+
+\item{female_logit_prevalence}{Risk adjusted estimates of female prevalence in sexual risk groups generated by \code{agyw_calculate_prevalence_female()}}
+
+\item{survey_year}{Survey year to sample from the SAE model. Default is 2018.}
+
+\item{outputs}{Naomi output.}
+}
+\value{
+Wide format output required for the AGYW workbook.
+
+While maintaining age/sex/district-specific HIV incidence from Naomi, distribute
+HIV incidence between our 4 different behavioural groups utilizing IRRs from the
+literature
+
+Risk ratios for people non-regular sex partners relative to those with a
+single cohabiting sex partner
+ALPHA Network pooled analysis (Slaymaker et al CROI 2020), Jia et al systematic review, Ssempijja et al JAIDS 2022
+Tiered HIV risk ratio for the FSW group depending on district-level HIV
+incidence in general population
+Jones et al medRxiv "HIV incidence among women engaging in sex work in sub-Saharan Africa: a systematic review and meta-analysis"
+https://www.medrxiv.org/content/10.1101/2023.10.17.23297108v2
+linear relationship between log(FSW incidence) and log(gen pop incidence)
+regression points shared in confidence, y = mx + b slope is 0.604104017 and
+intercept is 0.075090952
+TODO: Get distributions on these and using a sampling method to get
+uncertainty in economic analysis e.g.
+Calculate risk group incidence
+Calculate risk group incidence for aggregate age groups
+Check that sum of disaggregated infections is the same as total infections
+}
+\description{
+Calculate incidence for female SRB groups.
+}
diff --git a/man/agyw_calculate_incidence_male.Rd b/man/agyw_calculate_incidence_male.Rd
new file mode 100644
index 00000000..6a02659b
--- /dev/null
+++ b/man/agyw_calculate_incidence_male.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_calculate_incidence_male}
+\alias{agyw_calculate_incidence_male}
+\title{Calculate incidence in high risk male key populations}
+\usage{
+agyw_calculate_incidence_male(
+  naomi_output,
+  options,
+  male_srb,
+  male_logit_prevalence,
+  survey_year = 2018
+)
+}
+\arguments{
+\item{options}{Naomi options extracted from outputs}
+
+\item{male_srb}{Estimates of male sexual risk groups generated by \code{agyw_adjust_sexbehav_msm_pwid()}}
+
+\item{male_logit_prevalence}{Risk adjusted estimates of male prevalence in sexual risk groups generated by \code{agyw_calculate_prevalence_male()}}
+
+\item{survey_year}{NOTES:: add in description + when this should be adjusted. Hardcoded to 2018.}
+
+\item{outputs}{Naomi output.}
+}
+\value{
+Wide format output required for the AGYW workbook
+}
+\description{
+Calculate incidence in high risk male key populations
+}
diff --git a/man/agyw_calculate_prevalence_female.Rd b/man/agyw_calculate_prevalence_female.Rd
new file mode 100644
index 00000000..ea329414
--- /dev/null
+++ b/man/agyw_calculate_prevalence_female.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_calculate_prevalence_female}
+\alias{agyw_calculate_prevalence_female}
+\title{Calculate prevalence for female SRB groups.}
+\usage{
+agyw_calculate_prevalence_female(
+  naomi_output,
+  options,
+  fsw_est,
+  female_srb,
+  survey_year_sample = 2018
+)
+}
+\arguments{
+\item{options}{Naomi model options.}
+
+\item{fsw_est}{5-year estimates of MSM PSEs generated from \code{agyw_disaggregate_fse()}.}
+
+\item{outputs}{Naomi output.}
+
+\item{female_sexbehav}{KP adjusted estimates of female SRB groups generated by \code{agyw_adjust_sexbehav_fsw()}}
+
+\item{female_hiv_path}{Path to SRB HIV estimates from household surveys (last updated XX-XX-XX).}
+
+\item{pse_path}{Path to KP PSEs last updated (XX-XX-XX).}
+
+\item{survey_year}{Year of survey to sample estimates.}
+}
+\value{
+SRB PSEs with logit prevalence estimates.
+
+To calculate district-age-sex-sexual behaviour-specific HIV prevalence, we maintain
+HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
+risk behaviours using:
+(1) HIV prevalence ratios from household surveys for those reporting no
+sex vs one cohabiting vs non-regular sexual partner(s), and
+(2) a linear regression through admin-1 level estimates of the ratio of KP
+prevalence to gen-pop prevalence used to predict an age-district-specific
+FSW to general population prevalence ratio.
+
+Naomi estimates of PLHIV and population by district and age band
+Extract country specific national FSW prevalence
+THIS IS NOW USING SINGLE COUNTRY INSTEAD OF ALL COUNTRIES
+KP regression: FSW prevalence relative to general prevalence
+########## THIS REGRESSION SHOULD BE TAKING DATA FROM ALL ADMIN-1 LEVEL
+Modelled estimates of proportion in each risk group
+Calculate prevalence in each category
+Log odds ratio from SRB group survey prevalence
+Calculate logit prevalence and format
+}
+\description{
+Calculate prevalence for female SRB groups.
+}
diff --git a/man/agyw_calculate_prevalence_male.Rd b/man/agyw_calculate_prevalence_male.Rd
new file mode 100644
index 00000000..cdd8a6d3
--- /dev/null
+++ b/man/agyw_calculate_prevalence_male.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_calculate_prevalence_male}
+\alias{agyw_calculate_prevalence_male}
+\title{Calculate prevalence for male SRB groups.}
+\usage{
+agyw_calculate_prevalence_male(
+  naomi_output,
+  areas,
+  options,
+  msm_est,
+  male_srb,
+  survey_year_sample = 2018
+)
+}
+\arguments{
+\item{options}{Naomi model options.}
+
+\item{msm_est}{.}
+
+\item{outputs}{Naomi output.}
+
+\item{survey_year}{Year of survey to sample estimates.}
+}
+\value{
+SRB PSEs with logit prevalence estimates.
+
+To calculate district-age-sex-sexual behaviour-specific HIV prevalence, we maintain
+HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
+risk behaviours using:
+(1) HIV prevalence ratios from household surveys for those reporting no
+sex vs one cohabiting vs non-regular sexual partner(s), and
+(2) admin-1 level estimates of the ratio of KP prevalence to gen-pop prevalence
+among 15-24 year olds for MSM (due to the young age distribution of MSM) or
+among 15-49 year olds for PWID (due to the older age distribution of PWID)
+applied to all age groups among MSM and PWID in districts by admin-1 unit.
+Naomi estimates of PLHIV and population by district and age band
+Extract country specific national MSM + PWID prevalence
+Modelled estimates of proportion in each risk group
+Calculate prevalence in each category
+Log odds ratio from SRB group survey prevalence
+Calculate logit prevalence and format
+}
+\description{
+Calculate prevalence for male SRB groups.
+}
diff --git a/man/agyw_disaggregate_fsw.Rd b/man/agyw_disaggregate_fsw.Rd
new file mode 100644
index 00000000..356b054b
--- /dev/null
+++ b/man/agyw_disaggregate_fsw.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_disaggregate_fsw}
+\alias{agyw_disaggregate_fsw}
+\title{Dissagreggate admin1 FSW proportions from Oli's KP model to 5-age groups}
+\usage{
+agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
+}
+\arguments{
+\item{outputs}{Naomi output.}
+
+\item{options}{Naomi model options.}
+
+\item{kp_consensus}{Key pop consensus estimates.}
+
+\item{naomi_population}{Naomi population estimates for T2.}
+}
+\value{
+District level FSW estimates by 5-year age bands for ages 15-49.
+}
+\description{
+Dissagreggate admin1 FSW proportions from Oli's KP model to 5-age groups
+}
diff --git a/man/agyw_disaggregate_msm.Rd b/man/agyw_disaggregate_msm.Rd
new file mode 100644
index 00000000..e6e905bc
--- /dev/null
+++ b/man/agyw_disaggregate_msm.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_disaggregate_msm}
+\alias{agyw_disaggregate_msm}
+\title{Disaggregate admin1 MSM proportions from Oli's KP model to 5-age groups}
+\usage{
+agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
+}
+\arguments{
+\item{outputs}{Naomi output.}
+
+\item{options}{Naomi model options.}
+
+\item{kp_consensus}{Key pop consensus estimates.}
+
+\item{naomi_population}{Naomi population estimates for T2.}
+}
+\value{
+District level MSM estimates by 5-year age bands for ages 15-49.
+Extract country specific national MSM PSEs
+Check for consensus estimate of MSM
+MSM age distribution parameters in ZAF from Thembisa
+Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3report
+Distribution function of the gamma
+Calculate proportion of sexually active population using Kinh's country specific
+estimates of age at first sex and naomi population
+Select birth cohort from 2000, to turn 15 in 2015
+Calculate sexually active population by age and sex for each district
+Adjusting country specific sexual debut estimates with age distribution of
+MSM from Thembisa
+}
+\description{
+Disaggregate admin1 MSM proportions from Oli's KP model to 5-age groups
+}
diff --git a/man/agyw_disaggregate_pwid.Rd b/man/agyw_disaggregate_pwid.Rd
new file mode 100644
index 00000000..09f2117b
--- /dev/null
+++ b/man/agyw_disaggregate_pwid.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_disaggregate_pwid}
+\alias{agyw_disaggregate_pwid}
+\title{Disaggregate admin1 PWID proportions from Oli's KP model to 5-age groups}
+\usage{
+agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
+}
+\arguments{
+\item{outputs}{Naomi output.}
+
+\item{options}{Naomi model options.}
+
+\item{kp_consensus}{Key pop consensus estimates.}
+
+\item{naomi_population}{Naomi population estimates for T2.}
+}
+\value{
+District level PWID estimates by 5-year age bands for ages 15-49.
+Extract country specific national PWID PSEs
+Check for consensus estimate of MSM
+Assumption from literature that 9\% of PWID are female so remove them from
+the male denominator
+PWID age distribution
+Review of literature - Hines et al Lancet Global Health 2020
+Distribution function of the gamma
+}
+\description{
+Disaggregate admin1 PWID proportions from Oli's KP model to 5-age groups
+}
diff --git a/man/agyw_format_naomi.Rd b/man/agyw_format_naomi.Rd
new file mode 100644
index 00000000..72d406eb
--- /dev/null
+++ b/man/agyw_format_naomi.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_format_naomi}
+\alias{agyw_format_naomi}
+\title{Format naomi outputs for PSE tool}
+\usage{
+agyw_format_naomi(outputs, options)
+}
+\arguments{
+\item{outputs}{Naomi output}
+
+\item{options}{Naomi model options.}
+}
+\value{
+Naomi indicators formatted for the AGYW workbook.
+}
+\description{
+Format naomi outputs for PSE tool
+}
diff --git a/man/agyw_generate_risk_populations.Rd b/man/agyw_generate_risk_populations.Rd
new file mode 100644
index 00000000..8e7001d0
--- /dev/null
+++ b/man/agyw_generate_risk_populations.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{agyw_generate_risk_populations}
+\alias{agyw_generate_risk_populations}
+\title{Calculate incidence in high risk male key populations}
+\usage{
+agyw_generate_risk_populations(naomi_output, pjnz, survey_year = 2018)
+}
+\arguments{
+\item{survey_year}{NOTES:: add in description + when this should be adjusted. Hardcoded to 2018.}
+
+\item{outputs}{Naomi output.}
+
+\item{options}{Naomi options extracted from outputs}
+
+\item{male_srb}{Estimates of male sexual risk groups generated by \code{agyw_adjust_sexbehav_msm_pwid()}}
+
+\item{male_logit_prevalence}{Risk adjusted estimates of male prevalence in sexual risk groups generated by \code{agyw_calculate_prevalence_male()}}
+}
+\value{
+Wide format output required for the AGYW workbook
+}
+\description{
+Calculate incidence in high risk male key populations
+}
diff --git a/man/align_inputs_outputs.Rd b/man/align_inputs_outputs.Rd
index 45b259d9..02cb7e0b 100644
--- a/man/align_inputs_outputs.Rd
+++ b/man/align_inputs_outputs.Rd
@@ -16,6 +16,3 @@ align_inputs_outputs(naomi_data, indicators, meta_area)
 \description{
 Align model data inputs and model estimates
 }
-\details{
-
-}
diff --git a/man/calculate_ywkp_pr_lor.Rd b/man/calculate_ywkp_pr_lor.Rd
new file mode 100644
index 00000000..1982bd8b
--- /dev/null
+++ b/man/calculate_ywkp_pr_lor.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{calculate_ywkp_pr_lor}
+\alias{calculate_ywkp_pr_lor}
+\title{Calculate YWKP prevalence ratio and log odds ratio}
+\usage{
+calculate_ywkp_pr_lor(prev, fit = ywkp_fit)
+}
+\arguments{
+\item{prev}{(General population) prevalence}
+
+\item{fit}{A model relating log-odds prevalence to YWKP log odds prevalence
+Ensure that the LOR is above that of e.g. the sexnonreg risk group
+Prevalence ratio
+Log-odds ratio}
+}
+\description{
+Calculate YWKP prevalence ratio and log odds ratio
+}
diff --git a/man/district_barplot.Rd b/man/district_barplot.Rd
index 609f9fa2..86824f02 100644
--- a/man/district_barplot.Rd
+++ b/man/district_barplot.Rd
@@ -46,9 +46,6 @@ district_barplot(
 \item{above_label}{Label for districts with values higher than national average, default is "Above"}
 
 \item{below_label}{Label for districts with values higher than national average, default is "Below"}
-}
-\value{
-
 }
 \description{
 Create district bar plot
diff --git a/man/logit_scale_prev.Rd b/man/logit_scale_prev.Rd
new file mode 100644
index 00000000..2caa1089
--- /dev/null
+++ b/man/logit_scale_prev.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{logit_scale_prev}
+\alias{logit_scale_prev}
+\title{Calculate prevalence and PLHIV using logit-scale disaggregation}
+\usage{
+logit_scale_prev(lor, N_fine, plhiv)
+}
+\arguments{
+\item{lor}{Log odds-ratios}
+
+\item{N_fine}{Number of individuals in each group}
+
+\item{plhiv}{Total number of people living with HIV
+theta represents prevalence in baseline risk group
+plogis(lor + theta) is prevalence in each risk group
+plogis(lor + theta) * N_fine is PLHIV in each risk group
+Optimisation for baseline risk group prevalence
+On the logit scale should be more numerically stable
+Return prevalence}
+}
+\description{
+Calculate prevalence and PLHIV using logit-scale disaggregation
+}
diff --git a/man/map_outputs.Rd b/man/map_outputs.Rd
index 628ab4a7..0abb27b5 100644
--- a/man/map_outputs.Rd
+++ b/man/map_outputs.Rd
@@ -40,9 +40,6 @@ map_outputs(
 \item{legend_label}{Legend label}
 
 \item{breaks}{Number of break points to create in scale}
-}
-\value{
-
 }
 \description{
 Create output map plot
diff --git a/man/odds.Rd b/man/odds.Rd
new file mode 100644
index 00000000..2cda6442
--- /dev/null
+++ b/man/odds.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/agyw-integration.R
+\name{odds}
+\alias{odds}
+\title{Calculate the odds}
+\usage{
+odds(p)
+}
+\arguments{
+\item{p}{Probability in \link{0, 1}}
+}
+\description{
+Calculate the odds
+}
diff --git a/man/pop_pyramid_outputs.Rd b/man/pop_pyramid_outputs.Rd
index 09b29896..8052177e 100644
--- a/man/pop_pyramid_outputs.Rd
+++ b/man/pop_pyramid_outputs.Rd
@@ -33,9 +33,6 @@ pop_pyramid_outputs(
 \item{masc_label}{Label for male sex group, default is "Male"}
 
 \item{fem_label}{Label for female sex group, default is "Female"}
-}
-\value{
-
 }
 \description{
 Create population pyramid plot
diff --git a/man/prepare_input_time_series_anc.Rd b/man/prepare_input_time_series_anc.Rd
index f5f9dac3..4fe2f844 100644
--- a/man/prepare_input_time_series_anc.Rd
+++ b/man/prepare_input_time_series_anc.Rd
@@ -13,8 +13,9 @@ prepare_input_time_series_anc(anc, shape)
 }
 \value{
 Data formatted for plotting input ANC time series containing columns
-area_id, area_name, area_level, area_level_label, age_group, time_period,
-time_step, plot and value
+area_id, area_name, area_level, area_level_label, parent_area_id, area_sort_order,
+time_period, year, quarter, calendar_quarter, area_hierarchy, plot, value and
+missing_ids
 }
 \description{
 Take uploaded ANC and shape file paths or objects and format as data which
diff --git a/man/prepare_input_time_series_art.Rd b/man/prepare_input_time_series_art.Rd
index 612f0dc9..81504ce2 100644
--- a/man/prepare_input_time_series_art.Rd
+++ b/man/prepare_input_time_series_art.Rd
@@ -13,8 +13,9 @@ prepare_input_time_series_art(art, shape)
 }
 \value{
 Data formatted for plotting input time series containing columns
-area_id, area_name, area_level, area_level_label, time_period, year,
-quarter, plot and value
+area_id, area_name, area_level, area_level_label, parent_area_id, area_sort_order,
+time_period, year, quarter, calendar_quarter, area_hierarchy, plot, value and
+missing_ids
 }
 \description{
 Take uploaded ART and shape file paths and format as data which
diff --git a/man/select_naomi_data.Rd b/man/select_naomi_data.Rd
index 55a3c28d..94aa869b 100644
--- a/man/select_naomi_data.Rd
+++ b/man/select_naomi_data.Rd
@@ -15,13 +15,13 @@ select_naomi_data(
   vls_survey_ids = NULL,
   artnum_calendar_quarter_t1 = naomi_mf[["calendar_quarter1"]],
   artnum_calendar_quarter_t2 = naomi_mf[["calendar_quarter2"]],
- 
-    anc_clients_year_t2 = year_labels(calendar_quarter_to_quarter_id(naomi_mf[["calendar_quarter2"]])),
+  anc_clients_year_t2 =
+    year_labels(calendar_quarter_to_quarter_id(naomi_mf[["calendar_quarter2"]])),
   anc_clients_year_t2_num_months = 12,
- 
-    anc_prev_year_t1 = year_labels(calendar_quarter_to_quarter_id(naomi_mf[["calendar_quarter1"]])),
- 
-    anc_prev_year_t2 = year_labels(calendar_quarter_to_quarter_id(naomi_mf[["calendar_quarter2"]])),
+  anc_prev_year_t1 =
+    year_labels(calendar_quarter_to_quarter_id(naomi_mf[["calendar_quarter1"]])),
+  anc_prev_year_t2 =
+    year_labels(calendar_quarter_to_quarter_id(naomi_mf[["calendar_quarter2"]])),
   anc_artcov_year_t1 = anc_prev_year_t1,
   anc_artcov_year_t2 = anc_prev_year_t2,
   use_kish_prev = TRUE,
diff --git a/man/write_navigator_checklist.Rd b/man/write_navigator_checklist.Rd
index 3291b892..6747cb36 100644
--- a/man/write_navigator_checklist.Rd
+++ b/man/write_navigator_checklist.Rd
@@ -14,6 +14,3 @@ write_navigator_checklist(naomi_output, path)
 \description{
 Write UNAIDS Estimates Navigator checklist CSV
 }
-\details{
-
-}
diff --git a/man/write_xlsx_sheets.Rd b/man/write_xlsx_sheets.Rd
new file mode 100644
index 00000000..30b50d84
--- /dev/null
+++ b/man/write_xlsx_sheets.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{write_xlsx_sheets}
+\alias{write_xlsx_sheets}
+\title{Write list of data frames into an xlsx file}
+\usage{
+write_xlsx_sheets(template, sheets, path)
+}
+\arguments{
+\item{template}{Path to xlsx file with empty sheets}
+
+\item{sheets}{Named list of data frames to write into template. The names
+must match the destination sheet in the xlsx}
+
+\item{path}{Path to output the filled in xlsx}
+}
+\value{
+Path to complete xlsx file
+}
+\description{
+Write list of data frames into an xlsx file
+}
+\keyword{internal}
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index a530548f..3c1da15f 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -220,7 +220,6 @@ test_that("AGYW download can be created", {
   expect_length(messages$progress, 1)
   expect_equal(messages$progress[[1]]$message, "Generating AGYW tool")
 
-
   # Test agyw workbook with no kp workbook saved into spectrum
   risk_prop <- agyw_generate_risk_populations(agyw_output_demo$model_output_path,
                                               a_hintr_data$pjnz)
@@ -230,8 +229,8 @@ test_that("AGYW download can be created", {
                           consensus_estimate = NA))
 
   # Test agyw workbook with mock workbook saved into spectrum
-
-  mock_extract_kp_workbook <- mockery::mock(readRDS(test_path("testdata/kp_workbook_spectrum.rds")))
+  kp_consensus <- readRDS(file.path("testdata/kp_workbook_spectrum.rds"))
+  mock_extract_kp_workbook <- mockery::mock(kp_consensus)
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
 
   with_mocked_bindings(

From 5b6893116bc1610d2fc4b938f8b747abcddfa00c Mon Sep 17 00:00:00 2001
From: Rob Ashton <r.ashton@imperial.ac.uk>
Date: Fri, 22 Dec 2023 18:05:09 +0000
Subject: [PATCH 25/53] Set min naomi.resources version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index cc9fd6c0..f7e3cadc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -36,7 +36,7 @@ Imports:
     knitr,
     magrittr,
     mvtnorm,
-    naomi.resources,
+    naomi.resources (>= 0.0.2),
     naomi.options (>= 1.2.0),
     openxlsx,
     plotly,

From 3e83bfe40c8fa5905b88cca16e96f9c642e51039 Mon Sep 17 00:00:00 2001
From: Rob Ashton <r.ashton@imperial.ac.uk>
Date: Thu, 11 Jan 2024 09:33:13 +0000
Subject: [PATCH 26/53] Move hintr agyw test helper into Naomi

---
 R/test-helpers.R                | 49 +++++++++++++++++++++++++++++++++
 man/make_agyw_testfiles.Rd      | 19 +++++++++++++
 tests/testthat/test-downloads.R | 30 +-------------------
 3 files changed, 69 insertions(+), 29 deletions(-)
 create mode 100644 R/test-helpers.R
 create mode 100644 man/make_agyw_testfiles.Rd

diff --git a/R/test-helpers.R b/R/test-helpers.R
new file mode 100644
index 00000000..042949d4
--- /dev/null
+++ b/R/test-helpers.R
@@ -0,0 +1,49 @@
+## File contains test helpers which we want to use both here and in hintr
+
+#' Build JSON from template and a set of params
+#'
+#' @param naomi_output Calibrated naomi output
+#'
+#' @return Calibrated naomi output matched to MWI test data on
+#'   `naomi.resources` to be used to generate the agyw tool.
+#' @keywords internal
+make_agyw_testfiles <- function(naomi_output) {
+  # Create naomi outputs align with testing data in naomi.resources:
+  #   - Change iso3 to "MWI_demo"
+  #   - Restrict outputs to admin2
+  output <- naomi::read_hintr_output(naomi_output$model_output_path)
+
+  # Areas
+  meta_area_demo <- dplyr::mutate(output$output_package$meta_area,
+                                  area_id = dplyr::if_else(area_id == "MWI", "MWI_demo", area_id),
+                                  parent_area_id = dplyr::if_else(parent_area_id == "MWI", "MWI_demo", parent_area_id))
+
+  meta_area_demo <- dplyr::filter(meta_area_demo, area_level <= 2)
+
+  # Indicators
+  ind_demo <- dplyr::mutate(output$output_package$indicators,
+                            area_id = dplyr::if_else(area_id == "MWI", "MWI_demo", area_id))
+
+  ind_demo <- dplyr::filter(ind_demo, area_id %in% meta_area_demo$area_id)
+
+
+  # Options
+  options_demo <- output$output_package$fit$model_options
+  options_demo$area_scope <- "MWI_demo"
+  options_demo$area_level <- 2
+
+  # Save out demo output package
+  demo <- output
+  demo$output_package$indicators <- ind_demo
+  demo$output_package$fit$model_options <- options_demo
+  demo$output_package$meta_area <- meta_area_demo
+
+  out_demo <- tempfile(fileext = ".qs")
+  naomi:::hintr_save(demo, out_demo)
+
+  # Add to existing hintr_test data
+  agyw_output_demo <- naomi_output
+  agyw_output_demo$model_output_path <- out_demo
+
+  agyw_output_demo
+}
diff --git a/man/make_agyw_testfiles.Rd b/man/make_agyw_testfiles.Rd
new file mode 100644
index 00000000..8a1396b7
--- /dev/null
+++ b/man/make_agyw_testfiles.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/test-helpers.R
+\name{make_agyw_testfiles}
+\alias{make_agyw_testfiles}
+\title{Build JSON from template and a set of params}
+\usage{
+make_agyw_testfiles(naomi_output)
+}
+\arguments{
+\item{naomi_output}{Calibrated naomi output}
+}
+\value{
+Calibrated naomi output matched to MWI test data on
+\code{naomi.resources} to be used to generate the agyw tool.
+}
+\description{
+Build JSON from template and a set of params
+}
+\keyword{internal}
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index 3c1da15f..d0d1099f 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -165,35 +165,7 @@ test_that("comparison report download can be created", {
 
 test_that("AGYW download can be created", {
 
-  #' Create naomi outputs with "MWI_demo" iso3 to align with testing data in
-  #' naomi.resources
-  output <- read_hintr_output(a_hintr_output_calibrated$model_output_path)
-
-  # Create demo datasets
-  # Indicators
-  ind_demo <- output$output_package$indicators %>%
-    dplyr::mutate(area_id = dplyr::if_else(area_id == "MWI", "MWI_demo", area_id))
-  # Options
-  options_demo <- output$output_package$fit$model_options
-  options_demo$area_scope <- "MWI_demo"
-
-  # Areas
-  meta_area_demo <- output$output_package$meta_area %>%
-    dplyr::mutate(area_id = dplyr::if_else(area_id == "MWI", "MWI_demo", area_id),
-                  parent_area_id = dplyr::if_else(parent_area_id == "MWI", "MWI_demo", parent_area_id))
-
-  # Save out demo output package
-  demo <- output
-  demo$output_package$indicators <- ind_demo
-  demo$output_package$fit$model_options <- options_demo
-  demo$output_package$meta_area <- meta_area_demo
-
-  out_demo <- tempfile(fileext = ".qs")
-  hintr_save(demo, out_demo)
-
-  # Add to existing hintr_test data
-  agyw_output_demo <- a_hintr_output_calibrated
-  agyw_output_demo$model_output_path <- out_demo
+  agyw_output_demo <- make_agyw_testfiles(a_hintr_output_calibrated)
 
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
 

From 1dffc41174c9cc1ba8e4c469d0c34fb97908d3ea Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Fri, 12 Jan 2024 12:14:04 +0200
Subject: [PATCH 27/53] Update R/agyw-integration.R

Co-authored-by: Rob <39248272+r-ash@users.noreply.github.com>
---
 R/agyw-integration.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 7f2f320a..510a8aae 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -848,7 +848,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
                                            male_srb,
                                            survey_year_sample = 2018) {
 
-  #' Naomi estimates of PLHIV and population by district and age band
+  # Naomi estimates of PLHIV and population by district and age band
   naomi_est <- naomi_output %>%
     dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
                   sex == "male",

From 873a7b18a15c68e1817510b262f79522022e1c85 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Fri, 12 Jan 2024 12:18:41 +0200
Subject: [PATCH 28/53] fix up documentation

---
 R/agyw-integration.R | 292 ++++++++++++++++++++++---------------------
 1 file changed, 148 insertions(+), 144 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 7f2f320a..30017883 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -5,8 +5,8 @@
 #'
 #'
 #' @return Naomi indicators formatted for the AGYW workbook.
-#'
-#' @export
+#' @keywords internal
+
 
 agyw_format_naomi <- function(outputs, options){
 
@@ -154,21 +154,20 @@ agyw_format_naomi <- function(outputs, options){
 #'
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
-#' @param naomi_population Naomi population estimates for T2.
+#' @param naomi_pop Naomi population estimates for T2.
 #' @param kp_consensus Key pop consensus estimates.
 #'
 #'
 #' @return District level FSW estimates by 5-year age bands for ages 15-49.
-#'
-#' @export
+#' @keywords internal
+
 
 agyw_disaggregate_fsw <- function(outputs,
                                   options,
                                   naomi_pop,
-                                  kp_consensus)
-{
+                                  kp_consensus){
 
-  #' Extract country specific national FSW PSEs
+  # Extract country specific national FSW PSEs
   iso3 <- options$area_scope
 
   pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
@@ -181,7 +180,7 @@ agyw_disaggregate_fsw <- function(outputs,
   age_groups <- c("Y015_019", "Y020_024", "Y025_029", "Y030_034",
                   "Y035_039", "Y040_044", "Y045_049")
 
-  #'Calculating FSW proportion of total female population
+  # Calculating FSW proportion of total female population
   fsw <- fsw_pse %>%
     dplyr::mutate(age_group = "Y015_049") %>%
     dplyr::left_join(naomi_pop %>% dplyr::filter(sex == "female"),
@@ -189,7 +188,7 @@ agyw_disaggregate_fsw <- function(outputs,
     dplyr::mutate(total_fsw = population * prop_fsw) %>%
     dplyr::select(iso3, area_id, total_fsw, age_group, area_level, spectrum_region_code)
 
-  #' Check for consensus estimate of FSW
+  # Check for consensus estimate of FSW
   fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$population_size
 
   if(!is.na(fsw_consensus)){
@@ -210,14 +209,14 @@ agyw_disaggregate_fsw <- function(outputs,
   }
 
 
-  #' FSW age distribution parameters in ZAF from Thembisa
-  #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
+  # FSW age distribution parameters in ZAF from Thembisa
+  # Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
   gamma_mean <- 29
   gamma_sd <- 9
-  beta <- gamma_mean / gamma_sd^2 #' rate
-  alpha <- gamma_mean * beta #' shape
+  beta <- gamma_mean / gamma_sd^2 # rate
+  alpha <- gamma_mean * beta # shape
 
-  #' Distribution function of the gamma
+  # Distribution function of the gamma
   zaf_gamma <- data.frame(
     dist = diff(pgamma(c(15, 20, 25, 30, 35, 40, 45, 50), shape = alpha, rate = beta)),
     age_group = age_groups) %>%
@@ -227,11 +226,11 @@ agyw_disaggregate_fsw <- function(outputs,
     (1 + (scale * t)^-shape)^-skew
   }
 
-  #' Calculate proportion of sexually active population using Kinh's country specific
-  #' estimates of age at first sex and naomi population
+  # Calculate proportion of sexually active population using Kinh's country specific
+  # estimates of age at first sex and naomi population
   afs <- naomi.resources::load_agyw_exdata("afs", iso3)
 
-  #' Select birth cohort from 2000, to turn 15 in 2015
+  # Select birth cohort from 2000, to turn 15 in 2015
   cohort <- 2000
 
   afs <- afs %>%
@@ -240,7 +239,7 @@ agyw_disaggregate_fsw <- function(outputs,
 
   df <- data.frame()
 
-  #' Calculate sexually active population by age and sex for each district
+  # Calculate sexually active population by age and sex for each district
   for (x in unique(afs$area_id)) {
     afs_x <- dplyr::filter(afs, area_id == x)
     ages <- 15:49
@@ -272,9 +271,9 @@ agyw_disaggregate_fsw <- function(outputs,
     df <- dplyr::bind_rows(df, df_x)
   }
 
-  #' Adjusting country specific sexual debut estimates with age distribution of
-  #' FSW from Thembisa
-  #'Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
+  # Adjusting country specific sexual debut estimates with age distribution of
+  # FSW from Thembisa
+  #Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
   zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF") %>%
     dplyr::filter(kp == "FSW")
 
@@ -308,10 +307,11 @@ agyw_disaggregate_fsw <- function(outputs,
 #'
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
-#' @param naomi_population Naomi population estimates for T2.
+#' @param naomi_pop Naomi population estimates for T2.
 #' @param kp_consensus Key pop consensus estimates.
 #'
 #' @return District level PWID estimates by 5-year age bands for ages 15-49.
+#' @keywords internal
 
 agyw_disaggregate_pwid <- function(outputs,
                                    options,
@@ -319,7 +319,7 @@ agyw_disaggregate_pwid <- function(outputs,
                                    kp_consensus)
 {
 
-  #' Extract country specific national PWID PSEs
+  # Extract country specific national PWID PSEs
   iso3 <- options$area_scope
 
   pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
@@ -339,7 +339,7 @@ agyw_disaggregate_pwid <- function(outputs,
     dplyr::mutate(total_pwid = population * prop_pwid) %>%
     dplyr::select(iso3, area_id, total_pwid, age_group, area_level)
 
-  #' Check for consensus estimate of MSM
+  # Check for consensus estimate of MSM
   pwid_consensus <- kp_consensus[kp_consensus$key_population == "PWID", ]$population_size
 
   if(!is.na(pwid_consensus)){
@@ -359,19 +359,19 @@ agyw_disaggregate_pwid <- function(outputs,
   }
 
 
-  #' Assumption from literature that 9% of PWID are female so remove them from
-  #' the male denominator
+  # Assumption from literature that 9% of PWID are female so remove them from
+  # the male denominator
 
   pwid$total_pwid <- pwid$total_pwid * 0.91
 
-  #' PWID age distribution
-  #' Review of literature - Hines et al Lancet Global Health 2020
+  # PWID age distribution
+  # Review of literature - Hines et al Lancet Global Health 2020
   gamma_mean <- 29.4
   gamma_sd <- 7
-  beta <- gamma_mean / gamma_sd^2 #' rate
-  alpha <- gamma_mean * beta #' shape
+  beta <- gamma_mean / gamma_sd^2 # rate
+  alpha <- gamma_mean * beta # shape
 
-  #' Distribution function of the gamma
+  # Distribution function of the gamma
   zaf_gamma <- data.frame(
     dist = diff(pgamma(c(15, 20, 25, 30, 35, 40, 45, 50), shape = alpha, rate = beta)),
     age_group = age_groups) %>%
@@ -403,10 +403,11 @@ agyw_disaggregate_pwid <- function(outputs,
 #'
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
-#' @param naomi_population Naomi population estimates for T2.
+#' @param naomi_pop Naomi population estimates for T2.
 #' @param kp_consensus Key pop consensus estimates.
 #'
 #' @return District level MSM estimates by 5-year age bands for ages 15-49.
+#' @keywords internal
 
 agyw_disaggregate_msm <- function(outputs,
                                   options,
@@ -414,7 +415,7 @@ agyw_disaggregate_msm <- function(outputs,
                                   kp_consensus)
 {
 
-  #' Extract country specific national MSM PSEs
+  # Extract country specific national MSM PSEs
   iso3 <- options$area_scope
   pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
     dplyr::filter(kp == "MSM", indicator == "pse_prop")
@@ -433,7 +434,7 @@ agyw_disaggregate_msm <- function(outputs,
     dplyr::mutate(total_msm = population * prop_msm) %>%
     dplyr::select(iso3, area_id, total_msm, age_group, area_level)
 
-  #' Check for consensus estimate of MSM
+  # Check for consensus estimate of MSM
   msm_consensus <- kp_consensus[kp_consensus$key_population == "MSM", ]$population_size
 
   if(!is.na(msm_consensus)){
@@ -453,14 +454,14 @@ agyw_disaggregate_msm <- function(outputs,
   }
 
 
-  #' MSM age distribution parameters in ZAF from Thembisa
-  #' Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3report
+  # MSM age distribution parameters in ZAF from Thembisa
+  # Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3report
   gamma_mean <- 25
   gamma_sd <- 7
-  beta <- gamma_mean / gamma_sd^2 #' rate
-  alpha <- gamma_mean * beta #' shape
+  beta <- gamma_mean / gamma_sd^2 # rate
+  alpha <- gamma_mean * beta # shape
 
-  #' Distribution function of the gamma
+  # Distribution function of the gamma
   zaf_gamma <- data.frame(
     dist = diff(pgamma(c(15, 20, 25, 30, 35, 40, 45, 50), shape = alpha, rate = beta)),
     age_group = age_groups) %>%
@@ -471,11 +472,11 @@ agyw_disaggregate_msm <- function(outputs,
     (1 + (scale * t)^-shape)^-skew
   }
 
-  #' Calculate proportion of sexually active population using Kinh's country specific
-  #' estimates of age at first sex and naomi population
+  # Calculate proportion of sexually active population using Kinh's country specific
+  # estimates of age at first sex and naomi population
   afs <- naomi.resources::load_agyw_exdata("afs", iso3)
 
-  #' Select birth cohort from 2000, to turn 15 in 2015
+  # Select birth cohort from 2000, to turn 15 in 2015
   cohort <- 2000
 
   afs <- afs %>%
@@ -484,7 +485,7 @@ agyw_disaggregate_msm <- function(outputs,
 
   df <- data.frame()
 
-  #' Calculate sexually active population by age and sex for each district
+  # Calculate sexually active population by age and sex for each district
   for(x in unique(afs$area_id)) {
     afs_x <- dplyr::filter(afs, area_id == x)
     ages <- 15:49
@@ -516,8 +517,8 @@ agyw_disaggregate_msm <- function(outputs,
     df <- dplyr::bind_rows(df, df_x)
   }
 
-  #' Adjusting country specific sexual debut estimates with age distribution of
-  #' MSM from Thembisa
+  # Adjusting country specific sexual debut estimates with age distribution of
+  # MSM from Thembisa
   zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF") %>%
     dplyr::filter(kp == "MSM")
 
@@ -549,9 +550,7 @@ agyw_disaggregate_msm <- function(outputs,
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
 #' @param fsw_est 5-year estimates of FSW PSEs generated from `agyw_disaggregate_fsw()`.
-#' @param female_sae_path Path to female estimates of sexual behavior risk group.
 #'
-#' @return District level estimates of female sexual risk behaviour groups
 #'
 #' Estimates are generated for the following groups:
 #'
@@ -565,6 +564,9 @@ agyw_disaggregate_msm <- function(outputs,
 #' Calculation steps:
 #' 1. Align admin0/admin1 FSW proportions with SRB SAE estimates.
 #' 2. Subtract the proportion of FSW from total high risk female population.
+#'
+#' @return District level estimates of female sexual risk behaviour groups
+#' @keywords internal
 
 
 agyw_adjust_sexbehav_fsw <- function(outputs,
@@ -573,7 +575,7 @@ agyw_adjust_sexbehav_fsw <- function(outputs,
 {
 
 
-  #' Match FSW estimates (admin0 or admin1) with SAE estimates
+  # Match FSW estimates (admin0 or admin1) with SAE estimates
   fsw_analysis_level <- paste0("area_id",unique(fsw_est$area_level))
 
   areas_wide <-  naomi::spread_areas(outputs$meta_area) %>%
@@ -582,10 +584,10 @@ agyw_adjust_sexbehav_fsw <- function(outputs,
   map <- dplyr::select(areas_wide, area_id, dplyr::all_of(fsw_analysis_level)) %>%
     dplyr::rename(fsw_match_area = 2)
 
-  #' Allocate admin1 FSW proportions
+  # Allocate admin1 FSW proportions
   fsw_df <- fsw_est %>% dplyr::select(age_group, fsw_match_area = area_id, fsw_prop)
 
-  #' Load female SRB proportions
+  # Load female SRB proportions
   female_srb <- naomi.resources::load_agyw_exdata("srb_female", options$area_scope)
 
   adj_female_srb <- female_srb %>%
@@ -619,9 +621,9 @@ agyw_adjust_sexbehav_fsw <- function(outputs,
 #' @param options Naomi model options.
 #' @param msm_est 5-year estimates of MSM PSEs generated from `agyw__disaggregate_msm()`.
 #' @param pwid_est 5-year estimates of MSM PSEs generated from `agyw__disaggregate_pwid()`.
-#' @param sae_path Path to female estimates of sexual behavior risk group.
 #'
 #' @return District level estimates of male sexual risk behaviour groups
+#' @keywords internal
 #'
 #' Estimates are generated for the following groups:
 #'
@@ -636,7 +638,7 @@ agyw_adjust_sexbehav_fsw <- function(outputs,
 #' Calculation steps:
 #' 1. Align admin0/admin1 MSM and PWID proportions with SRB SAE estimates.
 #' 2. Subtracting MSM and PWID proportionally from all SRB groups.
-#'
+
 
 agyw_adjust_sexbehav_msm_pwid <- function(outputs,
                                           options,
@@ -657,7 +659,7 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
   msm_df <- msm_est %>% dplyr::select(age_group, kp_match_area = area_id, msm_prop)
   pwid_df <- pwid_est %>% dplyr::select(age_group, kp_match_area = area_id, pwid_prop)
 
-  #' Load male SRB proportions
+  # Load male SRB proportions
   male_srb <- naomi.resources::load_agyw_exdata("srb_male", options$area_scope)
 
   adj_male_srb <- male_srb %>%
@@ -671,8 +673,8 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
       names_from = indicator,
       values_from = estimate_smoothed
     ) %>%
-    #' Subtracting MSM and PWID proportionally from all SRB risk groups
-    #' (FSW was just from high-risk females)
+    # Subtracting MSM and PWID proportionally from all SRB risk groups
+    # (FSW was just from high-risk females)
     dplyr::mutate(
       nosex12m = nosex12m * (1 - pwid_prop - msm_prop),
       sexcohab = sexcohab * (1 - pwid_prop - msm_prop),
@@ -693,16 +695,12 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
 
 #' Calculate prevalence for female SRB groups.
 #'
-#' @param outputs Naomi output.
+#' @param naomi_output Naomi output.
 #' @param options Naomi model options.
-#' @param fsw_est 5-year estimates of MSM PSEs generated from `agyw_disaggregate_fse()`.
-#' @param female_sexbehav KP adjusted estimates of female SRB groups generated by `agyw_adjust_sexbehav_fsw()`
-#' @param female_hiv_path Path to SRB HIV estimates from household surveys (last updated XX-XX-XX).
-#' @param pse_path Path to KP PSEs last updated (XX-XX-XX).
+#' @param fsw_est 5-year estimates of FSW PSEs generated from `agyw_disaggregate_fse()`.
+#' @param female_srb FSW adjusted estimates of female SRB groups generated by `agyw_adjust_sexbehav_fsw()`
 #' @param survey_year Year of survey to sample estimates.
 #'
-#' @return SRB PSEs with logit prevalence estimates.
-#'
 #' To calculate district-age-sex-sexual behaviour-specific HIV prevalence, we maintain
 #' HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
 #' risk behaviours using:
@@ -712,13 +710,16 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
 #'  prevalence to gen-pop prevalence used to predict an age-district-specific
 #'  FSW to general population prevalence ratio.
 #'
+#' @return SRB PSEs with logit prevalence estimates.
+#' @keywords internal
+
 agyw_calculate_prevalence_female <- function(naomi_output,
                                              options,
                                              fsw_est,
                                              female_srb,
                                              survey_year_sample = 2018) {
 
-  #' Naomi estimates of PLHIV and population by district and age band
+  # Naomi estimates of PLHIV and population by district and age band
   naomi_est <- naomi_output %>%
     dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
                   sex == "female",
@@ -732,9 +733,9 @@ agyw_calculate_prevalence_female <- function(naomi_output,
     dplyr::filter(age_group == "Y015_049") %>%
     dplyr::select(area_id, gen_prev)
 
-  #' Extract country specific national FSW prevalence
+  # Extract country specific national FSW prevalence
   iso3 <- options$area_scope
-  #' THIS IS NOW USING SINGLE COUNTRY INSTEAD OF ALL COUNTRIES
+  # THIS IS NOW USING SINGLE COUNTRY INSTEAD OF ALL COUNTRIES
   fsw_prev <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
     dplyr::filter(kp == "FSW", indicator == "prevalence")
 
@@ -744,11 +745,11 @@ agyw_calculate_prevalence_female <- function(naomi_output,
     dplyr::mutate(prev_fsw_logodds = log(median / (1 - median)),
                   prev_logodds = log(gen_prev / (1 - gen_prev)))
 
-  #' KP regression: FSW prevalence relative to general prevalence
-  #' ########## THIS REGRESSION SHOULD BE TAKING DATA FROM ALL ADMIN-1 LEVEL
+  # KP regression: FSW prevalence relative to general prevalence
+  # ########## THIS REGRESSION SHOULD BE TAKING DATA FROM ALL ADMIN-1 LEVEL
   kp_fit <- lm(prev_fsw_logodds ~ prev_logodds, data = kp_prev)
 
-  #' Modelled estimates of proportion in each risk group
+  # Modelled estimates of proportion in each risk group
   risk_group_prop <- female_srb %>%
     dplyr::filter(year == survey_year_sample) %>%
     dplyr::select(area_id, age_group, indicator, estimate_smoothed) %>%
@@ -763,10 +764,10 @@ agyw_calculate_prevalence_female <- function(naomi_output,
       population_sexpaid12m = population * prop_sexpaid12m
     )
 
-  #' Calculate prevalence in each category
+  # Calculate prevalence in each category
   calculate_prevalence <- function(x, iso3){
 
-    #' Log odds ratio from SRB group survey prevalence
+    # Log odds ratio from SRB group survey prevalence
     lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", iso3) %>%
       dplyr::filter(sex == "female")
 
@@ -792,7 +793,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
     dplyr::bind_rows(x, y)
   }
 
-  #' Calculate logit prevalence and format
+  # Calculate logit prevalence and format
   logit_prev <- risk_group_prop %>%
     dplyr::mutate(ywkp_lor = calculate_ywkp_pr_lor(gen_prev, fit = kp_fit)$lor) %>%
     dplyr::select(-starts_with("pr_"), -gen_prev) %>%
@@ -821,14 +822,13 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 
 #' Calculate prevalence for male SRB groups.
 #'
-#' @param outputs Naomi output.
+#' @param naomi_output Naomi output.
+#' @param areas Naomi boundary file.
 #' @param options Naomi model options.
-#' @param male_srb
-#' @param areas
-#' @param msm_est .
+#' @param msm_est 5-year estimates of MSM PSEs generated from `agyw__disaggregate_msm()`.
+#' @param male_srb MSM and PWID adjusted estimates of male SRB groups generated by `agyw_adjust_sexbehav_msm_pwid()`.
 #' @param survey_year Year of survey to sample estimates.
 #'
-#' @return SRB PSEs with logit prevalence estimates.
 #'
 #' To calculate district-age-sex-sexual behaviour-specific HIV prevalence, we maintain
 #' HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
@@ -839,6 +839,9 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 #'   among 15-24 year olds for MSM (due to the young age distribution of MSM) or
 #'   among 15-49 year olds for PWID (due to the older age distribution of PWID)
 #'   applied to all age groups among MSM and PWID in districts by admin-1 unit.
+#'
+#' @return SRB PSEs with logit prevalence estimates.
+#' @keywords internal
 
 
 agyw_calculate_prevalence_male <- function(naomi_output,
@@ -848,7 +851,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
                                            male_srb,
                                            survey_year_sample = 2018) {
 
-  #' Naomi estimates of PLHIV and population by district and age band
+  # Naomi estimates of PLHIV and population by district and age band
   naomi_est <- naomi_output %>%
     dplyr::filter(calendar_quarter == options$calendar_quarter_t2,
                   sex == "male",
@@ -866,7 +869,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
                   logit_gen_prev_pwid = log(Y015_049 / (1 - Y015_049))) %>%
     dplyr::select(area_id, logit_gen_prev_msm, logit_gen_prev_pwid, area_level)
 
-  #' Extract country specific national MSM + PWID prevalence
+  # Extract country specific national MSM + PWID prevalence
   iso3 <- options$area_scope
 
   msm_pwid_prev <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
@@ -894,7 +897,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
   map <- dplyr::select(areas_wide, area_id, dplyr::all_of(msm_analysis_level)) %>%
     dplyr::rename(kp_match_area = 2)
 
-  #' Modelled estimates of proportion in each risk group
+  # Modelled estimates of proportion in each risk group
   risk_group_prop <- male_srb %>%
     dplyr::left_join(map, by = dplyr::join_by(area_id)) %>%
     dplyr::filter(year == survey_year_sample, iso3 == options$area_scope) %>%
@@ -912,10 +915,10 @@ agyw_calculate_prevalence_male <- function(naomi_output,
       population_pwid = population * prop_pwid
     )
 
-  #' Calculate prevalence in each category
+  # Calculate prevalence in each category
   calculate_prevalence <- function(x, iso3){
 
-    #' Log odds ratio from SRB group survey prevalence
+    # Log odds ratio from SRB group survey prevalence
     lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", iso3) %>%
       dplyr::filter(sex == "male")
 
@@ -942,7 +945,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
   }
 
 
-  #' Calculate logit prevalence and format
+  # Calculate logit prevalence and format
   logit_prev <- risk_group_prop %>%
     dplyr::left_join(kp_prev, by = c("kp_match_area" = "area_id")) %>%
     tidyr::pivot_longer(
@@ -973,15 +976,17 @@ odds <- function(p) p / (1 - p)
 #'
 #' @param prev (General population) prevalence
 #' @param fit A model relating log-odds prevalence to YWKP log odds prevalence
+#' @keywords internal
+
 calculate_ywkp_pr_lor <- function(prev, fit = ywkp_fit) {
   prev_logodds <- qlogis(prev)
   prev_ywkp_logodds <- predict(fit, data.frame(prev_logodds = prev_logodds))
-  #' Ensure that the LOR is above that of e.g. the sexnonreg risk group
+  # Ensure that the LOR is above that of e.g. the sexnonreg risk group
   prev_ywkp_logodds <- pmax(prev_ywkp_logodds, prev_logodds + 0.25)
   prev_ywkp <- plogis(prev_ywkp_logodds)
-  #' Prevalence ratio
+  # Prevalence ratio
   pr <- prev_ywkp / prev
-  #' Log-odds ratio
+  # Log-odds ratio
   lor <- prev_ywkp_logodds - prev_logodds
   return(list(pr = pr, lor = lor, prev = prev, prev_ywkp = prev_ywkp))
 }
@@ -991,15 +996,18 @@ calculate_ywkp_pr_lor <- function(prev, fit = ywkp_fit) {
 #' @param lor Log odds-ratios
 #' @param N_fine Number of individuals in each group
 #' @param plhiv Total number of people living with HIV
+#'
+#' @keywords internal
+
 logit_scale_prev <- function(lor, N_fine, plhiv) {
-  #' theta represents prevalence in baseline risk group
-  #' plogis(lor + theta) is prevalence in each risk group
-  #' plogis(lor + theta) * N_fine is PLHIV in each risk group
+  # theta represents prevalence in baseline risk group
+  # plogis(lor + theta) is prevalence in each risk group
+  # plogis(lor + theta) * N_fine is PLHIV in each risk group
   optfn <- function(theta) (sum(plogis(lor + theta) * N_fine) - plhiv)^2
-  #' Optimisation for baseline risk group prevalence
-  #' On the logit scale should be more numerically stable
+  # Optimisation for baseline risk group prevalence
+  # On the logit scale should be more numerically stable
   opt <- optimise(optfn, c(-10, 10), tol = .Machine$double.eps^0.5)
-  #' Return prevalence
+  # Return prevalence
   plogis(lor + opt$minimum)
 
 }
@@ -1007,19 +1015,19 @@ logit_scale_prev <- function(lor, N_fine, plhiv) {
 
 #' Calculate incidence for female SRB groups.
 #'
-#' @param outputs Naomi output.
-#' @param options Naomi options extracted from outputs
-#' @param female_srb Estimates of female sexual risk groups generated by `agyw_adjust_sexbehav_fsw()`
-#' @param female_logit_prevalence Risk adjusted estimates of female prevalence in sexual risk groups generated by `agyw_calculate_prevalence_female()`
-#' @param survey_year Survey year to sample from the SAE model. Default is 2018.
-#'
-#'
-#' @return Wide format output required for the AGYW workbook.
-#'
 #' While maintaining age/sex/district-specific HIV incidence from Naomi, distribute
 #' HIV incidence between our 4 different behavioural groups utilizing IRRs from the
 #' literature
 #'
+#' @param naomi_output Naomi output.
+#' @param options Naomi model options.
+#' @param female_srb FSW adjusted estimates of female sexual risk groups generated by `agyw_adjust_sexbehav_fsw()`.
+#' @param female_logit_prevalence Risk adjusted estimates of female prevalence in sexual risk groups generated by `agyw_calculate_prevalence_female()`.
+#' @param survey_year Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
+#' model of sexual behaviour fitted to all countries has the most data for in roughly 2018.
+#'
+#' @return Wide format output required for the AGYW workbook.
+#' @keywords internal
 
 agyw_calculate_incidence_female <- function(naomi_output,
                                             options,
@@ -1049,20 +1057,20 @@ agyw_calculate_incidence_female <- function(naomi_output,
     dplyr::filter(!is.na(population))
 
 
-  #' Risk ratios for people non-regular sex partners relative to those with a
-  #' single cohabiting sex partner
-  #' ALPHA Network pooled analysis (Slaymaker et al CROI 2020), Jia et al systematic review, Ssempijja et al JAIDS 2022
+  # Risk ratios for people non-regular sex partners relative to those with a
+  # single cohabiting sex partner
+  # ALPHA Network pooled analysis (Slaymaker et al CROI 2020), Jia et al systematic review, Ssempijja et al JAIDS 2022
   rr_sexcohab <- 1
   rr_sexnonreg_young <- 1.72
   rr_sexnonreg_old <- 2.1
 
-  #' Tiered HIV risk ratio for the FSW group depending on district-level HIV
-  #' incidence in general population
-  #' Jones et al medRxiv "HIV incidence among women engaging in sex work in sub-Saharan Africa: a systematic review and meta-analysis"
-  #' https://www.medrxiv.org/content/10.1101/2023.10.17.23297108v2
-  #' linear relationship between log(FSW incidence) and log(gen pop incidence)
-  #' regression points shared in confidence, y = mx + b slope is 0.604104017 and
-  #' intercept is 0.075090952
+  # Tiered HIV risk ratio for the FSW group depending on district-level HIV
+  # incidence in general population
+  # Jones et al medRxiv "HIV incidence among women engaging in sex work in sub-Saharan Africa: a systematic review and meta-analysis"
+  # https://www.medrxiv.org/content/10.1101/2023.10.17.23297108v2
+  # linear relationship between log(FSW incidence) and log(gen pop incidence)
+  # regression points shared in confidence, y = mx + b slope is 0.604104017 and
+  # intercept is 0.075090952
 
   rr_reg_dat <- data.frame(genpop_incidence = df$incidence/100) %>%
     dplyr::mutate(log_gen = log(genpop_incidence),
@@ -1075,13 +1083,13 @@ agyw_calculate_incidence_female <- function(naomi_output,
   # capping at 100
   rr_sexpaid12m[rr_sexpaid12m > 100] <- 100
 
-  #' TODO: Get distributions on these and using a sampling method to get
-  #' uncertainty in economic analysis e.g.
+  # TODO: Get distributions on these and using a sampling method to get
+  # uncertainty in economic analysis e.g.
   rr_sexnonreg_se <- 0.2
   rr_sexnonreg_se <- 1
 
 
-  #' Calculate risk group incidence
+  # Calculate risk group incidence
   Y015_024 <-  c("Y015_019", "Y020_024")
   Y025_049 <-  c("Y025_029","Y030_034","Y035_039","Y040_044", "Y045_049")
 
@@ -1113,7 +1121,7 @@ agyw_calculate_incidence_female <- function(naomi_output,
       infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
       infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg)
 
-  #' Calculate risk group incidence for aggregate age groups
+  # Calculate risk group incidence for aggregate age groups
 
   summarise_age_cat_female <- function(dat, age_cat) {
 
@@ -1178,7 +1186,7 @@ agyw_calculate_incidence_female <- function(naomi_output,
                                       labels = c("Low", "Moderate", "High", "Very High"),
                                       include.lowest = TRUE, right = TRUE))
 
-  #' Check that sum of disaggregated infections is the same as total infections
+  # Check that sum of disaggregated infections is the same as total infections
   sum_infections <- df3$infections_nosex12m + df3$infections_sexcohab + df3$infections_sexnonreg + df3$infections_sexpaid12m
 
   # TO DO: Flag this to add in warning - stop please contact support (usually an issue with mapping boundaries)
@@ -1213,14 +1221,13 @@ agyw_calculate_incidence_female <- function(naomi_output,
 #' Calculate incidence in high risk male key populations
 #'
 #' @param outputs Naomi output.
-#' @param options Naomi options extracted from outputs
-#' @param male_srb Estimates of male sexual risk groups generated by `agyw_adjust_sexbehav_msm_pwid()`
-#' @param male_logit_prevalence Risk adjusted estimates of male prevalence in sexual risk groups generated by `agyw_calculate_prevalence_male()`
-#' @param survey_year NOTES:: add in description + when this should be adjusted. Hardcoded to 2018.
-#'
+#' @param options Naomi model options.
+#' @param male_srb MSM and PWId adjusted estimated of male SRB groups generated by `agyw_adjust_sexbehav_msm_pwid()`.
+#' @param male_logit_prevalence Risk adjusted estimates of male HIV prevalence in sexual risk groups generated by `agyw_calculate_prevalence_male()`.
+#' @param survey_year Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
+#' model of sexual behaviour fitted to all countries has the most data for in roughly 2018.
+
 #' @return Wide format output required for the AGYW workbook
-#'
-#' @export
 
 agyw_calculate_incidence_male <- function(naomi_output,
                                           options,
@@ -1258,12 +1265,12 @@ agyw_calculate_incidence_male <- function(naomi_output,
   rr_sexnonreg_young <- 1.89
   rr_sexnonreg_old <- 2.1
 
-  #' TODO: Get distributions on these and using a sampling method to get uncertainty in economic analysis e.g.
+  # TODO: Get distributions on these and using a sampling method to get uncertainty in economic analysis e.g.
   rr_sexnonreg_se <- 0.2
   rr_sexnonreg_se <- 1
 
 
-  #' Calculate risk group incidence
+  # Calculate risk group incidence
   Y015_024 <-  c("Y015_019", "Y020_024")
   Y025_049 <-  c("Y025_029","Y030_034","Y035_039","Y040_044", "Y045_049")
 
@@ -1309,7 +1316,7 @@ agyw_calculate_incidence_male <- function(naomi_output,
 
     )
 
-  #' Calculate risk group incidence for aggregate age groups
+  # Calculate risk group incidence for aggregate age groups
 
   summarise_age_cat_male <- function(dat, age_cat) {
 
@@ -1383,7 +1390,7 @@ agyw_calculate_incidence_male <- function(naomi_output,
 
 
 
-  #' Check that sum of disaggregated infections is the same as total infections
+  # Check that sum of disaggregated infections is the same as total infections
   #  TO DO: add warning for sum not matching - contact admin
   sum_infections <- df3$infections_nosex12m + df3$infections_sexcohab + df3$infections_sexnonreg + df3$infections_msm + df3$infections_pwid
 
@@ -1417,21 +1424,18 @@ agyw_calculate_incidence_male <- function(naomi_output,
 }
 
 
-#' Calculate incidence in high risk male key populations
+#' Generate outputs to update AGYW tool.
 #'
-#' @param outputs Naomi output.
-#' @param options Naomi options extracted from outputs
+#' @param naomi_output Path to naomi output (zip file or hintr object).
+#' @param pjnz Path to spectrum file.
 #' @param male_srb Estimates of male sexual risk groups generated by `agyw_adjust_sexbehav_msm_pwid()`
 #' @param male_logit_prevalence Risk adjusted estimates of male prevalence in sexual risk groups generated by `agyw_calculate_prevalence_male()`
-#' @param survey_year NOTES:: add in description + when this should be adjusted. Hardcoded to 2018.
+#' @param survey_year Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
+#' model of sexual behaviour fitted to all countries has the most data for in roughly 2018.
 #'
-#' @return Wide format output required for the AGYW workbook
-#'
-#' @export
-#'
-#' Survey year should be updated to most current household survey in the country -
-#' for countries without recent household surveys, leave at 2018 - the spatiotemporal
-#' model of sexual behaviour fitted to all countries has the most data for in roughly 2018
+#' @return Output files to update AGYW excel workbook.
+#' @keywords internal
+
 
 agyw_generate_risk_populations <- function(naomi_output,
                                            pjnz,
@@ -1451,10 +1455,10 @@ agyw_generate_risk_populations <- function(naomi_output,
     options <- outputs$fit$model_options
   }
 
-  #' Format naomi output
+  # Format naomi output
   naomi <- agyw_format_naomi(outputs, options)
 
-  #' Naomi population
+  # Naomi population
   naomi_pop <- naomi$naomi_long %>%
     dplyr::filter(indicator == "population") %>%
     dplyr::select(area_id, area_level,sex, age_group, area_level,
@@ -1462,17 +1466,17 @@ agyw_generate_risk_populations <- function(naomi_output,
 
   naomi_pop$iso3 <- options$area_scope
 
-  #' Disaggregate KP PSEs from Oli's analysis to 5-year bands
+  # Disaggregate KP PSEs from Oli's analysis to 5-year bands
   kp_consensus <- extract_kp_workbook(pjnz)
   fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
   pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
   msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
 
-  #' Adjust SAE model output with KP proportions
+  # Adjust SAE model output with KP proportions
   female_srb <- agyw_adjust_sexbehav_fsw(outputs, options, fsw_est)
   male_srb <- agyw_adjust_sexbehav_msm_pwid(outputs, options, msm_est, pwid_est)
 
-  #' Calculate risk group prevalence
+  # Calculate risk group prevalence
   female_logit_prevalence <- agyw_calculate_prevalence_female(naomi$naomi_long,
                                                               options,
                                                               fsw_est,
@@ -1486,7 +1490,7 @@ agyw_generate_risk_populations <- function(naomi_output,
                                                           male_srb,
                                                           survey_year)
 
-  #' Calculate risk group incidence
+  # Calculate risk group incidence
   female_incidence <- agyw_calculate_incidence_female(naomi$naomi_long,
                                                       options,
                                                       female_srb,

From d4fc44f4f5ec2e95025d523923c30e1705739c93 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Fri, 12 Jan 2024 17:22:28 +0200
Subject: [PATCH 29/53] add checks for alignment between agyw resources and
 naomi estimates

---
 R/agyw-integration.R              | 72 ++++++++++++++++++++++++++++---
 inst/traduire/en-translation.json | 11 ++++-
 inst/traduire/fr-translation.json |  7 ++-
 inst/traduire/pt-translation.json |  7 ++-
 tests/testthat/test-downloads.R   | 13 ++++++
 5 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 30017883..9755bccd 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -1189,10 +1189,9 @@ agyw_calculate_incidence_female <- function(naomi_output,
   # Check that sum of disaggregated infections is the same as total infections
   sum_infections <- df3$infections_nosex12m + df3$infections_sexcohab + df3$infections_sexnonreg + df3$infections_sexpaid12m
 
-  # TO DO: Flag this to add in warning - stop please contact support (usually an issue with mapping boundaries)
-  # ADD IN WARNIING HERE
-  stopifnot(max(df3$infections - sum_infections) < 10^{-9})
-
+  if(max(df3$infections - sum_infections) > 10^{-9}){
+    stop("Risk group proportions do not sum correctly. Please contact suppport.")
+  }
 
   df3 %>%
     dplyr::mutate(concat = paste0(area_id, age_group), iso3 = options$area_scope) %>%
@@ -1394,7 +1393,9 @@ agyw_calculate_incidence_male <- function(naomi_output,
   #  TO DO: add warning for sum not matching - contact admin
   sum_infections <- df3$infections_nosex12m + df3$infections_sexcohab + df3$infections_sexnonreg + df3$infections_msm + df3$infections_pwid
 
-  stopifnot(max(df3$infections - sum_infections) < 10^{-9})
+  if(max(df3$infections - sum_infections) > 10^{-9}){
+    stop("Risk group proportions do not sum correctly. Please contact suppport.")
+  }
 
 
   df3 %>%
@@ -1455,6 +1456,10 @@ agyw_generate_risk_populations <- function(naomi_output,
     options <- outputs$fit$model_options
   }
 
+  # Check for concordence between area_ids in agyw resources from `naomi.resources`
+  #  and Naomi estimates
+  assert_agyw_resource_hierarchy(outputs, options)
+
   # Format naomi output
   naomi <- agyw_format_naomi(outputs, options)
 
@@ -1519,3 +1524,60 @@ agyw_generate_risk_populations <- function(naomi_output,
 
 }
 
+#' Throw warning when area hierarchy in external AGYW resources read in from
+#'  `naomi.resources` do not match Naomi outputs used to update AGYW estimates.
+#'
+#'  @param naomi_output Naomi outputs.
+#'  @param options Naomi options.
+
+
+assert_agyw_resource_hierarchy <- function(naomi_output,
+                                           options){
+
+  # iso3 from model options
+  iso3 <- options$area_scope
+
+  # KP PSE's
+  pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3)
+
+  # SRB SAE model estimates
+  female_srb <- naomi.resources::load_agyw_exdata("srb_female", options$area_scope)
+  male_srb <- naomi.resources::load_agyw_exdata("srb_male", options$area_scope)
+
+  # Naomi area_ids at lowest admin-level
+  naomi_hierarchy <- outputs$meta_area %>% dplyr::filter(area_level == options$area_level)
+  naomi_ids <- unique(naomi_hierarchy$area_id)
+
+  pse_diff <- setdiff(unique(pse$area_id), unique(outputs$meta_area$area_id))
+  female_srb_diff <- setdiff(unique(female_srb$area_id), naomi_ids)
+  male_srb_diff <- setdiff(unique(male_srb$area_id), naomi_ids)
+
+  if(length(pse_diff) != 0 ){
+
+    stop(paste0(t_("AGYW_ERROR_KP_PREFIX"),
+             paste0(unique(pse$area_id), collapse = "; "),
+             t_("AGYW_ERROR_NAOMI_MISMATCH"),
+             paste0(unique(naomi_ids), collapse = "; "),
+             t_("AGYW_ERROR_CONTACT_SUPPORT")))
+  }
+
+  if(length(female_srb_diff) != 0 ){
+
+    stop(paste0(t_("AGYW_ERROR_FSRB_PREFIX"),
+                paste0(unique(pse$area_id), collapse = "; "),
+                t_("AGYW_ERROR_NAOMI_MISMATCH"),
+                paste0(unique(naomi_ids), collapse = "; "),
+                t_("AGYW_ERROR_CONTACT_SUPPORT")))
+  }
+
+  if(length(male_srb_diff) != 0 ){
+
+    stop(paste0(t_("AGYW_ERROR_MSRB_PREFIX"),
+                paste0(unique(pse$area_id), collapse = "; "),
+                t_("AGYW_ERROR_NAOMI_MISMATCH"),
+                paste0(unique(naomi_ids), collapse = "; "),
+                t_("AGYW_ERROR_CONTACT_SUPPORT")))
+  }
+
+}
+
diff --git a/inst/traduire/en-translation.json b/inst/traduire/en-translation.json
index e33afc4b..d8135952 100644
--- a/inst/traduire/en-translation.json
+++ b/inst/traduire/en-translation.json
@@ -275,5 +275,14 @@
     "DOWNLOAD_OUTPUT_DESCRIPTION": "Naomi output uploaded from Naomi web app",
     "DOWNLOAD_SUMMARY_DESCRIPTION": "Naomi summary report uploaded from Naomi web app",
     "DOWNLOAD_COMPARISON_DESCRIPTION": "Naomi comparison report uploaded from Naomi web app",
-    "DOWNLOAD_AGYW_DESCRIPTION": "Naomi AGYW tool uploaded from Naomi web app"
+    "DOWNLOAD_AGYW_DESCRIPTION": "Naomi AGYW tool uploaded from Naomi web app",
+    "AGYW_ERROR_KP_PREFIX": "Available KP PSE estimates for: \n",
+    "AGYW_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
+    "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
+    "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
+    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support."
+
+
+
+
 }
diff --git a/inst/traduire/fr-translation.json b/inst/traduire/fr-translation.json
index 7bc5a32b..285ef767 100644
--- a/inst/traduire/fr-translation.json
+++ b/inst/traduire/fr-translation.json
@@ -273,5 +273,10 @@
     "BIRTHS_FACILITY_DESC": "Nombre de naissances dans les établissements de santé",
     "DOWNLOAD_OUTPUT_DESCRIPTION": "Paquet Naomi téléchargée depuis l'application web Naomi",
     "DOWNLOAD_SUMMARY_DESCRIPTION": "Rapport de synthèse Naomi téléchargé depuis l'application web Naomi",
-    "DOWNLOAD_COMPARISON_DESCRIPTION": "Rapport de comparaison Naomi téléchargé à partir de l'application web Naomi"
+    "DOWNLOAD_COMPARISON_DESCRIPTION": "Rapport de comparaison Naomi téléchargé à partir de l'application web Naomi",
+    "AGYW_ERROR_KP_PREFIX": " Available KP PSE estimates for: \n",
+    "AGYW_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
+    "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
+    "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
+    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support."
 }
diff --git a/inst/traduire/pt-translation.json b/inst/traduire/pt-translation.json
index 07ed65c2..f9157f91 100644
--- a/inst/traduire/pt-translation.json
+++ b/inst/traduire/pt-translation.json
@@ -273,5 +273,10 @@
     "BIRTHS_FACILITY_DESC": "Número de nascimentos em estabelecimentos de saúde",
     "DOWNLOAD_OUTPUT_DESCRIPTION": "Pacote Naomi descarregado a partir da aplicação web Naomi",
     "DOWNLOAD_SUMMARY_DESCRIPTION": "Relatório de síntese da Naomi carregado da aplicação web Naomi",
-    "DOWNLOAD_COMPARISON_DESCRIPTION": "Relatório de comparação Naomi carregado a partir da aplicação web Naomi"
+    "DOWNLOAD_COMPARISON_DESCRIPTION": "Relatório de comparação Naomi carregado a partir da aplicação web Naomi",
+    "AGYW_ERROR_KP_PREFIX": " Available KP PSE estimates for: \n",
+    "AGYW_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
+    "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
+    "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
+    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support."
 }
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index d0d1099f..f35ecba0 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -218,6 +218,19 @@ test_that("AGYW download can be created", {
 
 })
 
+test_that("Error thrown when AGYW resources are out of date", {
+
+  kp_error <- paste0("Available KP PSE estimates for: \n",
+                     "MWI_1_1; MWI_1_2; MWI_1_3",
+                     "\n\n Do not match Naomi estimates for: \n",
+                     "MWI_2_1_demo; MWI_2_2_demo; MWI_2_3_demo; MWI_2_4_demo; MWI_2_5_demo",
+                     "\n\nTo update estimates, please contact Naomi support.")
+
+ expect_error(hintr_prepare_agyw_download(a_hintr_output_calibrated,
+                                          a_hintr_data$pjnz), kp_error)
+
+})
+
 
 test_that("output description is translated", {
   text <- build_output_description(a_hintr_options)

From bd34f085d381f51904524efd34a04021b76f64c3 Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Tue, 16 Jan 2024 17:30:03 +0000
Subject: [PATCH 30/53] Regen docs

---
 man/write_datapack_csv.Rd | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/man/write_datapack_csv.Rd b/man/write_datapack_csv.Rd
index bcc48a3e..6bcc3622 100644
--- a/man/write_datapack_csv.Rd
+++ b/man/write_datapack_csv.Rd
@@ -4,7 +4,7 @@
 \alias{write_datapack_csv}
 \title{Export naomi outputs to PEPFAR Data Pack format}
 \usage{
-write_datapack_csv(naomi_output, path, psnu_level = NULL)
+write_datapack_csv(naomi_output, path, psnu_level = NULL, dmppt2_output = NULL)
 }
 \arguments{
 \item{naomi_output}{a naomi_output object.}
@@ -14,6 +14,9 @@ write_datapack_csv(naomi_output, path, psnu_level = NULL)
 \item{psnu_level}{area_level for PEPFAR PSNU to export. If NULL,
 first looks in lookup table for the correct area_level, and
 if not defaults to the highest level of the area hierarchy.}
+
+\item{dmppt2_output}{data frame containing the \emph{Datapack inputs}
+sheet of DMPPT2 output file.}
 }
 \description{
 Export naomi outputs to PEPFAR Data Pack format

From ba7d6a26169e78b7e3ded91548b5648f5d23a91d Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Tue, 16 Jan 2024 17:39:44 +0000
Subject: [PATCH 31/53] Fix issue with incorrect function arg

---
 R/agyw-integration.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 9755bccd..5d046141 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -1527,11 +1527,13 @@ agyw_generate_risk_populations <- function(naomi_output,
 #' Throw warning when area hierarchy in external AGYW resources read in from
 #'  `naomi.resources` do not match Naomi outputs used to update AGYW estimates.
 #'
-#'  @param naomi_output Naomi outputs.
+#'  @param outputs Naomi outputs.
 #'  @param options Naomi options.
+#'  @return Resource hierarchy
+#'  @keywords internal
 
 
-assert_agyw_resource_hierarchy <- function(naomi_output,
+assert_agyw_resource_hierarchy <- function(outputs,
                                            options){
 
   # iso3 from model options

From eaa0d05b189629ce9cfc9ec3b7eafdb527188a94 Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Wed, 17 Jan 2024 08:44:51 +0000
Subject: [PATCH 32/53] Remove unused helper function

---
 R/utils.R                         | 15 ---------------
 inst/traduire/en-translation.json |  4 ----
 tests/testthat/test-utils.R       |  6 ------
 3 files changed, 25 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index 1dcac24d..06d2b468 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -109,21 +109,6 @@ is_empty <- function(x) {
   length(x) == 0 || is.null(x) || is.na(x) || !nzchar(x)
 }
 
-area_level_from_id <- function(area_ids) {
-  ## Area ids are of format <iso3>_<area_level>_<area_num>
-  ## so we can split and return the 2nd
-  split_ids <- strsplit(area_ids, "_")
-  vnapply(split_ids, function(id)  {
-    if (length(id) == 1) {
-      ## This is our top level ID i.e. level = 0
-      level = 0
-    } else {
-      level = as.numeric(id[2])
-    }
-    level
-  })
-}
-
 #' Write list of data frames into an xlsx file
 #'
 #' @param template Path to xlsx file with empty sheets
diff --git a/inst/traduire/en-translation.json b/inst/traduire/en-translation.json
index d8135952..8f1c8f35 100644
--- a/inst/traduire/en-translation.json
+++ b/inst/traduire/en-translation.json
@@ -281,8 +281,4 @@
     "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
     "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support."
-
-
-
-
 }
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 793ede42..96af513f 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -26,9 +26,3 @@ test_that("is_empty", {
   expect_true(is_empty(c()))
   expect_false(is_empty(c("things")))
 })
-
-test_that("can get area level from area id", {
-  expect_equal(area_level_from_id("MWI_2_3"), 2)
-  expect_equal(area_level_from_id("MWI"), 0)
-  expect_equal(area_level_from_id(c("MWI_4_3", "MWI_23_25")), c(4, 23))
-})

From d7aee399912768e5381096627d1023d34121dfe9 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Mon, 22 Jan 2024 13:38:23 +0200
Subject: [PATCH 33/53] add agyw vignette

---
 R/agyw-integration.R            |   2 +-
 vignettes/hiv-prev-workflow.Rmd | 198 ++++++++++++++++++++++++++++++++
 2 files changed, 199 insertions(+), 1 deletion(-)
 create mode 100644 vignettes/hiv-prev-workflow.Rmd

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 5d046141..69580f94 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -1221,7 +1221,7 @@ agyw_calculate_incidence_female <- function(naomi_output,
 #'
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
-#' @param male_srb MSM and PWId adjusted estimated of male SRB groups generated by `agyw_adjust_sexbehav_msm_pwid()`.
+#' @param male_srb MSM and PWID adjusted estimated of male SRB groups generated by `agyw_adjust_sexbehav_msm_pwid()`.
 #' @param male_logit_prevalence Risk adjusted estimates of male HIV prevalence in sexual risk groups generated by `agyw_calculate_prevalence_male()`.
 #' @param survey_year Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
 #' model of sexual behaviour fitted to all countries has the most data for in roughly 2018.
diff --git a/vignettes/hiv-prev-workflow.Rmd b/vignettes/hiv-prev-workflow.Rmd
new file mode 100644
index 00000000..0fd09ecf
--- /dev/null
+++ b/vignettes/hiv-prev-workflow.Rmd
@@ -0,0 +1,198 @@
+---
+title: "HIV prevention prioritization tool"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{HIV prevention prioritization tool workflow}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, echo = FALSE, warning = FALSE, results = 'asis'}
+library(tibble)
+library(gt)
+library(naomi)
+```
+
+## Background
+
+Many HIV prevention programmes aim to reduce new infections but is not feasible to target all individuals living in locations with HIV incidence. In different geographic settings, with different background incidence, sexual behaviours confer different levels of risk of HIV infection. 
+
+Recent HIV programming guidance introduces thresholds for prioritization considering both HIV incidence and sexual behaviours to reach the largest population at risk of HIV. This tool provides the “denominator” for HIV prevention categorised by sex, age, geography, and sexual behaviour.
+
+## Data inputs
+
+These categorizations are calculated using subnational estimates of PLHIV burden by age and sex produced by the Naomi model. [Naomi](https://github.com/mrc-ide/naomi) is a small-area estimation model for estimating HIV prevalence and PLHIV, ART coverage, and new HIV infections at district level by sex and five-year age group. The model combines district-level data about multiple outcomes from several sources in a Bayesian statistical model to produce robust indicators of subnational HIV burden. Naomi is used to annual update estimates of subnational HIV burden as part of the [UNAIDS HIV estimates process](https://www.unaids.org/en/dataanalysis/knowyourresponse/HIVdata_estimates).
+
+The tool synthesises the most recent estimates of subnational PLHIV with outputs from additional mathematical models 
+describing subnational variation in sexual risk behaviour and estimates of key populations at an elevated risk of HIV infection:
+
+* **[Howes et al.](https://journals.plos.org/globalpublichealth/article?id=10.1371/journal.pgph.0001731)** model estimating subnational prevalence of HIV risk behaviours and associated HIV incidence. 
+* **[Stevens et al.](https://www.medrxiv.org/content/10.1101/2022.07.27.22278071v2)** model for population size estimates (PSEs) of key population in sub-Saharan Africa, including female sex workers (FSW), men-who-have-sex-with-men (MSM) and people who inject drugs (PWID).
+* **[Nguyen et al.](https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-022-13451-y)**: model for age of sexual debut sub-Saharan Africa.
+
+These outputs of these models are stored in an external repository, [naomi-resources](http://github/mrc-ide/naomi.resources) and are updated regularly to incorporate newly released survey data and to align with changes in geographic changes in country specific administrative boundaries required for planning.
+
+In addition to estimates produced by subnational models, the tool incorporates consensus estimates of KP population size that are developed by national HIV estimates teams as part of the annual UNAIDS HIV estimates process. For more information on this exercise please see [14G Key Population Workbook](https://hivtools.unaids.org/hiv-estimates-training-material-en/).
+
+## Tool workflow
+
+This tool is now integrated into the [Naomi web application](https://naomi.unaids.org/login) and is generated after completing Naomi model as described in [22G Naomi sub-national estimates: Creating subnational HIV estimates](https://hivtools.unaids.org/hiv-estimates-training-material-en/).
+
+If you are running a Naomi model fit with updated administrative boundaries, you may receive an error that the external database containing the SRB or KP PSE model is out of date:
+
+```{r, echo = FALSE, results = 'asis', warning = FALSE}
+
+cat("Error: Available KP PSE estimates for: \n",
+                     "MWI_1_1; MWI_1_2; MWI_1_3",
+                     "\n\n Do not match Naomi estimates for: \n",
+                     "MWI_1_1xc; MWI_2_25d; MWI_2_3cv",
+                     "\n\nTo update estimates, please contact Naomi support.")
+```
+
+## HIV prevention need calculation
+
+**1.Calculate key population sizes by district and age**: 
+
+Regional KP proportion estimates from [Stevens et al.](https://www.medrxiv.org/content/10.1101/2022.07.27.22278071v2) are disaggregated by age and district. 
+
+  * _Calculating sexually active population by age and sex_: Country specific estimates of age of first sexual debut are applied to district level population estimates from Naomi:
+    * For age distribution of MSM and women who sell sex, we assume a combination of:
+      * Age at first sex from [Nguyen et al.](https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-022-13451-y)
+      * Age-specific MSM and FSW propensity estimates from [Thembisa](https://www.thembisa.org/content/downloadPage/Thembisa4_3) of:
+        * MSM: Mean age of 25, SD 7
+        * FSW: Mean age of 29, SD 9
+    * For age distribution of PWID literature estimate of age distribution from [Hines et al](web)
+        * PWID: Mean age 29.4, SD 7
+  * _Calculating total KP population by age_: Country specific regional KP proportions from the _Stevens et al_ model are applied to district level population estimates from Naomi and adjusted by age distribution of sexually active population calculated above.
+    * This includes an assumption that a nominal number of PWID (~9%) are female and as a result these are removed from the denominator and PWID are assumed as male from this point onwards.
+    
+  
+**2. Separate general population sexual risk behaviour groups removing KP populations calculated in (1)**:
+
+```{r, echo = FALSE, results = 'asis', warning = FALSE}
+
+tibble::tribble(
+  ~"SRB category", ~ "HIV related risk", 
+  "Low: No sex", "Not sexually active", 
+  "Mid: One regular", "Sexually active, one cohabiting partner", 
+  "High: Non-regular", "Non-regular sexual partner(s)", 
+  "KPs", "FSW, MSM and PWID"
+) %>%
+  gt() %>%
+  tab_header("HIV prevention priority groups") %>%
+  gt::tab_options(
+    table.align = "left",
+    heading.align = "left",
+    column_labels.font.size = "small",
+    column_labels.background.color = "grey",
+    table.font.size = "smaller",
+    data_row.padding = gt::px(3),
+    row_group.background.color = "lightgrey"
+  )
+
+```
+
+Subtract the proportion of KPs from low, mid and high sexual risk behaviour groups estimated in _Risher et al_ model:
+
+  * FSWs only subtracted from high risk SRB group of females aged 15-49.
+  * MSM and PWID subtracted proportionally from all male SRB groups.
+
+**3. Calculate risk group logit prevalence**
+
+To calculate HIV prevalence for specific sexual risk behaviour groups by district and age, we maintain
+HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
+risk behaviours using HIV prevalence ratios from household surveys for those reporting no
+sex vs one cohabiting vs non-regular sexual partner(s).
+
+* Prevalence ratios by behaviour group are used to distribute PLHIV between behavioural risk groups.
+* Household survey data is used to estimate HIV prevalence in the no sex, one regular and non-regular partner(s) groups to calculate log odds-ratios for each behavioural category.
+* HIV prevalence ratio for KPs is based on the ratio of KP HIV prevalence1 to HIV prevalence among all women (FSW) or men (MSM and PWID).
+* HIV prevalence by behaviour is not explicitly presented – it is used to subtract off population sizes to present population susceptible to HIV (HIV-negative) in the previous caclculation step.
+
+
+_For females, this is adjusted by:_
+
+  * A linear regression through admin-1 level estimates of the ratio of KP prevalence to gen-pop prevalence used to predict an age-district-specific FSW to general population prevalence ratio.
+  
+_For males this this is adjusted by:_
+
+  * Admin-1 level estimates of the ratio of KP prevalence to gen-pop prevalence among 15-24 year olds for MSM (due to the young age distribution of MSM) or among 15-49 year olds for PWID (due to the older age distribution of PWID) applied to all age groups among MSM and PWID in districts by admin-1 unit.
+  
+**3. Calculate risk group incidence and new infections**
+
+While maintaining age/sex/district-specific HIV incidence from Naomi, distribute HIV incidence between our 4 different behavioural groups utilizing IRRs from the literature:
+
+* Risk ratios for non-regular sex partners relative to those with a single cohabiting sex partner for females <sup>_1,2,3_</sup> and males <sup>_1,2,4_</sup>. 
+* For FSW, ratio of HIV incidence among women in key populations vs single cohabiting partner women derived based on HIV incidence category in district and tiered risk ratio <sup> 5 </sup>. 
+* For MSM and PWID, using ddmin 1 KP prevalence estimates relative to general population prevalence <sup>6</sup> and estimates from systematic review & meta-regression <sup> 7 </sup>.
+
+
+Number of new infections by risk group is derived by multiplying these estimated incidence rates by risk behaviour times the population sizes of HIV-negative individuals by risk behaviour, in each of the 5-year age groups
+
+```{r, echo = FALSE, results = 'asis', warning = FALSE}
+
+tibble::tribble(
+  ~"SRB category", ~ "Females",  ~"Males" , 
+  "Low: No sex", "0", "0", 
+  "Mid: One regular", "1 (reference category)", "1 (reference category)", 
+  "High: Non-regular",
+  "_Aged 15-24_: 1.72 <br> Aged 25-49: 2.1 <sup>_1,2,3_</sup>", 
+  "_Aged 15-24_: 1.89 <br> Aged 25-49: 2.1 <sup>_1,2,4_</sup>", 
+  "KPs", 
+  "**FSW**: <br> Very high: 3 <br> High: 6 <br>Moderate: 9 <br>Low: 13 <br>Very low: 25 <sup>_5_</sup>",
+  "**MSM**: 2.5-250<sup>_6,7_</sup> <br> **PWID** 2.5-55 <sup>_6_</sup>"
+) %>%
+  gt() %>%
+    gt::tab_options(
+    table.align = "left",
+    heading.align = "left",
+    column_labels.font.size = "small",
+    column_labels.background.color = "grey",
+    table.font.size = "smaller",
+    data_row.padding = gt::px(3),
+    row_group.background.color = "lightgrey"
+  ) %>%
+  fmt_markdown(columns = everything())
+
+```
+
+
+
+
+
+<sup> 1 </sup> ALPHA Network pooled analysis (Slaymaker et al. CROI 2020)
+
+<sup> 2 </sup> [Jia et al. 2022](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8743366/)
+
+<sup> 3 </sup> [Ssempijja et al. 2022](https://journals.lww.com/jaids/fulltext/2022/07010/high_rates_of_pre_exposure_prophylaxis_eligibility.7.aspx).
+
+<sup> 4 </sup> [Hoffman et al. 2022](https://journals.lww.com/jaids/Fulltext/2022/06001/Implementing_PrEP_Services_in_Diverse_Health_Care.15.aspx)
+
+<sup> 5 </sup> [Jones et al. 2023](https://www.medrxiv.org/content/10.1101/2023.10.17.23297108v2)
+
+<sup> 6 </sup> [Stevens et al. 2023](https://www.medrxiv.org/content/10.1101/2022.07.27.22278071v2)
+
+<sup> 7 </sup> [Stannah et al. 2022](https://www.medrxiv.org/content/10.1101/2022.11.14.22282329v1)
+
+
+
+## Limitations of tool
+
+Risk behaviour population size estimates at a district level have a high degree of uncertainty, which is not captured in the current version of the tool
+
+Uncertainty in:
+
+* KP sizes and their age and geographical disaggregation
+* Small area estimates based on survey data
+* Behavioural reports in surveys
+* Incidence rate ratios by behaviour
+* Naomi estimates
+
+PSEs should be considered indicative rather than exact 
+
+
+
+## Usage of tool outputs
+
+Usage of tool output for prioritising groups for HIV prevention can be found [on the UNAIDS website](https://hivtools.unaids.org/pse/).
+

From 31162e8f815e20ee4b2f9acde169372a578ed7dd Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Mon, 22 Jan 2024 16:04:05 +0200
Subject: [PATCH 34/53] Squashed commit of the following:

commit 551ada7fa85e8753da155062d0faf12824dd4eb7
Author: Katie Risher <krisher@ic.ac.uk>
Date:   Fri Jan 19 16:53:00 2024 -0500

    Additional edits to the agyw integration functions

    -New infections instead of incidence for the Naomi outputs tab
    -Naming the countries to match the tool
    -some edits so the area names are formatted more nicely
    -max RR for FSW capped at 35 - if over 35 leads to impossible incidence estimates in some districts

commit 666c504621671089d8deadb32b93dc22989738ce
Author: Katie Risher <krisher@ic.ac.uk>
Date:   Sun Dec 17 22:08:52 2023 -0500

    Add outputs argument to agyw_calculate_prevalence_male

    Add outputs argument to agyw_calculate_prevalence_male

commit 0d3529618cea12c668335816512785f50a3f9078
Author: Katie Risher <krisher@ic.ac.uk>
Date:   Thu Dec 14 23:05:36 2023 -0500

    Remove rr_sexnonreg

    Remove rr_sexnonreg from female output
---
 .gitignore           |  1 +
 R/agyw-integration.R | 72 +++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/.gitignore b/.gitignore
index cd724ac7..9b09966a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,4 @@ tests/testthat/testdata/fit.RDS
 .vscode
 .Rprofile
 .idea
+
diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 69580f94..de7f76f4 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -87,12 +87,57 @@ agyw_format_naomi <- function(outputs, options){
                                           TRUE ~ NA_character_),
                   indicator = "Incicategory")
 
-  # Incidence for all age groups + sexes
-  df5 <- naomi_ind_labelled %>%
-    dplyr::filter(indicator == "incidence", age_group == "Y000_999", sex == "both",
+  # New infections for all age groups + sexes
+  df5 <- naomi_ind %>%
+    dplyr::filter(indicator == "infections", age_group == "Y000_999", sex == "both",
                   area_level == options$area_level)
 
-  country <- outputs$meta_area$area_name[outputs$meta_area$area_id == options$area_scope]
+  # Not all country names in meta_area match the country names in the spreadsheet - need
+  # to match to populate tabs
+  # country <- outputs$meta_area$area_name[outputs$meta_area$area_id == options$area_scope]
+  country <- dplyr::mutate(data.frame(iso3 = options$area_scope),
+                           country = fct_recode(iso3,
+                                             "Botswana" = "BWA",
+                                             "Cameroon" = "CMR",
+                                             "Kenya" = "KEN",
+                                             "Lesotho" = "LSO",
+                                             "Mozambique" = "MOZ",
+                                             "Malawi" = "MWI",
+                                             "Namibia" = "NAM",
+                                             "Eswatini" = "SWZ",
+                                             "Tanzania" = "TZA",
+                                             "Uganda" = "UGA",
+                                             "South Africa" = "ZAF",
+                                             "Zambia" = "ZMB",
+                                             "Zimbabwe" = "ZWE",
+                                             "Angola" = "AGO",
+                                             "Burundi" = "BDI",
+                                             "Democratic Republic of the Congo" = "COD",
+                                             "Gabon" = "GAB",
+                                             "Rwanda" = "RWA",
+                                             "Ethiopia" = "ETH",
+                                             "Haiti" = "HTI",
+                                             "Chad" = "TCD",
+                                             "Cote D'Ivoire" = "CIV",
+                                             "Ghana" = "GHA",
+                                             "Guinea" = "GIN",
+                                             "Liberia" = "LBR",
+                                             "Mali" = "MLI",
+                                             "Niger" = "NER",
+                                             "Sierra Leone" = "SLE",
+                                             "Togo" = "TGO",
+                                             "Burkina Faso" = "BFA",
+                                             "Congo" = "COG",
+                                             "Benin" = "BEN",
+                                             "Central African Republic" = "CAF",
+                                             "The Gambia" = "GMB",
+                                             "Guinea bissau" = "GNB",
+                                             "Equatorial Guinea" = "GNQ",
+                                             "Niger" = "NER",
+                                             "Nigeria" = "NGA",
+                                             "Senegal" = "SEN"))
+
+
 
 
   # Format
@@ -101,7 +146,7 @@ agyw_format_naomi <- function(outputs, options){
   tidyr::pivot_wider(id_cols = c(area_id,area_name),
                      names_from = c(indicator,age_group_label,sex),
                      names_sep = "", values_from = mean) %>%
-    dplyr::mutate(Country = country, newAll = df5$mean) %>%
+    dplyr::mutate(Country = country$country[1], newAll = df5$mean) %>%
     dplyr::select(Country,area_id,area_name,`Pop15-24all`,`Pop15-24f`,`Pop15-24m`,
                   `PLHIV15-24all`,`PLHIV15-24f`,`PLHIV15-24m`,
                   newAll, `new15-24all`,`new15-24f`,`new15-24m`,
@@ -143,6 +188,17 @@ agyw_format_naomi <- function(outputs, options){
                   `new15-49all`,`new15-49f`,`new15-49m`,
                   `Inci15-49f`,`Incicategory15-49f`,`Inci15-49m`,`Incicategory15-49m`)
 
+  ## Clean up area names and Country names
+  if(options$area_scope=="AGO") {
+    naomi_wide$area_name <- stringr::str_to_title(naomi_wide$area_name)
+  }
+  if(options$area_scope=="COD") {
+    naomi_wide$Country <- "Democratic Republic of the Congo"
+  }
+  if(options$area_scope %in% c("TCD","GIN")) {
+    naomi_wide$area_name <- iconv(naomi_wide$area_name, from="UTF-8",to="LATIN1")
+  }
+
   v <- list(naomi_long = df2,
             naomi_wide = naomi_wide)
 
@@ -1080,8 +1136,8 @@ agyw_calculate_incidence_female <- function(naomi_output,
 
   rr_sexpaid12m <- rr_reg_dat$rr_sexpaid12m
   # This gives implausibly high RRs for very low districts (e.g. IRR = 297!)
-  # capping at 100
-  rr_sexpaid12m[rr_sexpaid12m > 100] <- 100
+  # capping at 35
+  rr_sexpaid12m[rr_sexpaid12m > 35] <- 35
 
   # TODO: Get distributions on these and using a sampling method to get
   # uncertainty in economic analysis e.g.
@@ -1200,7 +1256,7 @@ agyw_calculate_incidence_female <- function(naomi_output,
                   iso3, area_level,
                   population, plhiv, infections, incidence, incidence_cat,
                   prev_nosex12m, prev_sexcohab, prev_sexnonreg, prev_sexpaid12m,
-                  rr_sexpaid12m, rr_sexnonreg,
+                  rr_sexpaid12m, # rr_sexnonreg,
                   population_nosex12m, population_sexcohab,
                   population_sexnonreg, population_sexpaid12m,
                   plhiv_nosex12m, plhiv_sexcohab,

From 04e7e40f49d84007ec53620b039077c57439697e Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Mon, 22 Jan 2024 16:50:03 +0200
Subject: [PATCH 35/53] update code to clean up country names for workbook

---
 R/agyw-integration.R | 95 +++++++++++++++++++++-----------------------
 1 file changed, 46 insertions(+), 49 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index de7f76f4..8fc15492 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -88,57 +88,56 @@ agyw_format_naomi <- function(outputs, options){
                   indicator = "Incicategory")
 
   # New infections for all age groups + sexes
-  df5 <- naomi_ind %>%
+  df5 <- naomi_ind_labelled %>%
     dplyr::filter(indicator == "infections", age_group == "Y000_999", sex == "both",
                   area_level == options$area_level)
 
   # Not all country names in meta_area match the country names in the spreadsheet - need
   # to match to populate tabs
-  # country <- outputs$meta_area$area_name[outputs$meta_area$area_id == options$area_scope]
-  country <- dplyr::mutate(data.frame(iso3 = options$area_scope),
-                           country = fct_recode(iso3,
-                                             "Botswana" = "BWA",
-                                             "Cameroon" = "CMR",
-                                             "Kenya" = "KEN",
-                                             "Lesotho" = "LSO",
-                                             "Mozambique" = "MOZ",
-                                             "Malawi" = "MWI",
-                                             "Namibia" = "NAM",
-                                             "Eswatini" = "SWZ",
-                                             "Tanzania" = "TZA",
-                                             "Uganda" = "UGA",
-                                             "South Africa" = "ZAF",
-                                             "Zambia" = "ZMB",
-                                             "Zimbabwe" = "ZWE",
-                                             "Angola" = "AGO",
-                                             "Burundi" = "BDI",
-                                             "Democratic Republic of the Congo" = "COD",
-                                             "Gabon" = "GAB",
-                                             "Rwanda" = "RWA",
-                                             "Ethiopia" = "ETH",
-                                             "Haiti" = "HTI",
-                                             "Chad" = "TCD",
-                                             "Cote D'Ivoire" = "CIV",
-                                             "Ghana" = "GHA",
-                                             "Guinea" = "GIN",
-                                             "Liberia" = "LBR",
-                                             "Mali" = "MLI",
-                                             "Niger" = "NER",
-                                             "Sierra Leone" = "SLE",
-                                             "Togo" = "TGO",
-                                             "Burkina Faso" = "BFA",
-                                             "Congo" = "COG",
-                                             "Benin" = "BEN",
-                                             "Central African Republic" = "CAF",
-                                             "The Gambia" = "GMB",
-                                             "Guinea bissau" = "GNB",
-                                             "Equatorial Guinea" = "GNQ",
-                                             "Niger" = "NER",
-                                             "Nigeria" = "NGA",
-                                             "Senegal" = "SEN"))
-
-
 
+  country_name_db <- tibble::tribble(~country, ~iso3,
+                                     "Botswana", "BWA",
+                                     "Cameroon", "CMR",
+                                     "Kenya", "KEN",
+                                     "Lesotho", "LSO",
+                                     "Mozambique", "MOZ",
+                                     "Malawi", "MWI_demo",
+                                     "Malawi", "MWI",
+                                     "Namibia", "NAM",
+                                     "Eswatini", "SWZ",
+                                     "Tanzania", "TZA",
+                                     "Uganda", "UGA",
+                                     "South Africa", "ZAF",
+                                     "Zambia", "ZMB",
+                                     "Zimbabwe", "ZWE",
+                                     "Angola", "AGO",
+                                     "Burundi", "BDI",
+                                     "Democratic Republic of the Congo", "COD",
+                                     "Gabon", "GAB",
+                                     "Rwanda", "RWA",
+                                     "Ethiopia", "ETH",
+                                     "Haiti", "HTI",
+                                     "Chad", "TCD",
+                                     "Cote D'Ivoire", "CIV",
+                                     "Ghana", "GHA",
+                                     "Guinea", "GIN",
+                                     "Liberia", "LBR",
+                                     "Mali", "MLI",
+                                     "Niger", "NER",
+                                     "Sierra Leone", "SLE",
+                                     "Togo", "TGO",
+                                     "Burkina Faso", "BFA",
+                                     "Congo", "COG",
+                                     "Benin", "BEN",
+                                     "Central African Republic", "CAF",
+                                     "The Gambia", "GMB",
+                                     "Guinea bissau", "GNB",
+                                     "Equatorial Guinea", "GNQ",
+                                     "Niger", "NER",
+                                     "Nigeria", "NGA",
+                                     "Senegal", "SEN")
+
+  country_name <- country_name_db[country_name_db$iso3 == options$area_scope,]$country
 
   # Format
   naomi_wide <- dplyr::bind_rows(df3, df4) %>%
@@ -146,7 +145,7 @@ agyw_format_naomi <- function(outputs, options){
   tidyr::pivot_wider(id_cols = c(area_id,area_name),
                      names_from = c(indicator,age_group_label,sex),
                      names_sep = "", values_from = mean) %>%
-    dplyr::mutate(Country = country$country[1], newAll = df5$mean) %>%
+    dplyr::mutate(Country = country_name, newAll = df5$mean) %>%
     dplyr::select(Country,area_id,area_name,`Pop15-24all`,`Pop15-24f`,`Pop15-24m`,
                   `PLHIV15-24all`,`PLHIV15-24f`,`PLHIV15-24m`,
                   newAll, `new15-24all`,`new15-24f`,`new15-24m`,
@@ -192,9 +191,7 @@ agyw_format_naomi <- function(outputs, options){
   if(options$area_scope=="AGO") {
     naomi_wide$area_name <- stringr::str_to_title(naomi_wide$area_name)
   }
-  if(options$area_scope=="COD") {
-    naomi_wide$Country <- "Democratic Republic of the Congo"
-  }
+
   if(options$area_scope %in% c("TCD","GIN")) {
     naomi_wide$area_name <- iconv(naomi_wide$area_name, from="UTF-8",to="LATIN1")
   }

From e923fceed2f0c943e3a3a6423fb6e9407bdd7d73 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Tue, 23 Jan 2024 13:50:06 +0200
Subject: [PATCH 36/53] add DOI for Hines 2020 paper

---
 R/agyw-integration.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 8fc15492..3c002628 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -418,7 +418,8 @@ agyw_disaggregate_pwid <- function(outputs,
   pwid$total_pwid <- pwid$total_pwid * 0.91
 
   # PWID age distribution
-  # Review of literature - Hines et al Lancet Global Health 2020
+  # Review of literature - Hines et al Lancet Global Health 2020:
+  # DOI:https://doi.org/10.1016/S2214-109X(19)30462-0
   gamma_mean <- 29.4
   gamma_sd <- 7
   beta <- gamma_mean / gamma_sd^2 # rate

From 611c9fd3645f2e890454255ec182af0aa95cc115 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Tue, 23 Jan 2024 15:20:35 +0200
Subject: [PATCH 37/53] add warning for bad KP workbook consensus estimates

---
 R/agyw-integration.R                          |  68 +++++++++++-------
 inst/traduire/en-translation.json             |   5 +-
 inst/traduire/fr-translation.json             |   5 +-
 inst/traduire/pt-translation.json             |   5 +-
 tests/testthat/test-downloads.R               |  11 +++
 .../testdata/kp_workbook_spectrum_bad.rds     | Bin 0 -> 367 bytes
 6 files changed, 67 insertions(+), 27 deletions(-)
 create mode 100644 tests/testthat/testdata/kp_workbook_spectrum_bad.rds

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 3c002628..fa5c62ef 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -248,16 +248,21 @@ agyw_disaggregate_fsw <- function(outputs,
 
     # Check if consensus estimate is larger than age matched population denominator
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "female",]$population
-    stopifnot(fsw_consensus < pop)
+    prop_pop <- fsw_consensus / pop
 
-    # Scale total FSW population to consensus PSE estimate
-    fsw_scaled <- fsw %>%
-      dplyr::mutate(
-        relative_prop = total_fsw/sum(total_fsw),
-        consensus_pse = fsw_consensus,
-        total_fsw = consensus_pse * relative_prop)
+    if(prop_pop >= 0.05) {
+      t_("FSW_CONSENSUS_WARNING")
+    } else {
 
-    fsw <- fsw_scaled %>% dplyr::select(-consensus_pse, relative_prop)
+      # Scale total FSW population to consensus PSE estimate
+      fsw_scaled <- fsw %>%
+        dplyr::mutate(
+          relative_prop = total_fsw/sum(total_fsw),
+          consensus_pse = fsw_consensus,
+          total_fsw = consensus_pse * relative_prop)
+
+      fsw <- fsw_scaled %>% dplyr::select(-consensus_pse, relative_prop)
+    }
 
   }
 
@@ -396,22 +401,28 @@ agyw_disaggregate_pwid <- function(outputs,
   pwid_consensus <- kp_consensus[kp_consensus$key_population == "PWID", ]$population_size
 
   if(!is.na(pwid_consensus)){
-
     # Check if consensus estimate is larger than age matched population denominator
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
-    stopifnot(pwid_consensus < pop)
+    prop_pop <- pwid_consensus / pop
 
-    # Scale total PWID population to consensus PSE estimate
-    pwid_scaled <- pwid %>%
-      dplyr::mutate(
-        relative_prop = total_pwid/sum(total_pwid),
-        consensus_pse = pwid_consensus,
-        total_pwid = consensus_pse * relative_prop)
+    if(prop_pop >= 0.05) {
+      t_("PWID_CONSENSUS_WARNING")
+    } else {
+
+      # Scale total PWID population to consensus PSE estimate
+      pwid_scaled <- pwid %>%
+        dplyr::mutate(
+          relative_prop = total_pwid/sum(total_pwid),
+          consensus_pse = pwid_consensus,
+          total_pwid = consensus_pse * relative_prop)
+
+      pwid <- pwid_scaled %>% dplyr::select(-consensus_pse, relative_prop)
+    }
 
-    pwid <- pwid_scaled %>% dplyr::select(-consensus_pse, relative_prop)
   }
 
 
+
   # Assumption from literature that 9% of PWID are female so remove them from
   # the male denominator
 
@@ -497,17 +508,26 @@ agyw_disaggregate_msm <- function(outputs,
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
     stopifnot(msm_consensus < pop)
 
-    # Scale total MSM population to consensus PSE estimate
-    msm_scaled <- msm %>%
-      dplyr::mutate(
-        relative_prop = total_msm/sum(total_msm),
-        consensus_pse = msm_consensus,
-        total_msm = consensus_pse * relative_prop)
+    if(prop_pop >= 0.05) {
+      t_("MSM_CONSENSUS_WARNING")
+    } else {
+
+      # Scale total MSM population to consensus PSE estimate
+      msm_scaled <- msm %>%
+        dplyr::mutate(
+          relative_prop = total_msm/sum(total_msm),
+          consensus_pse = msm_consensus,
+          total_msm = consensus_pse * relative_prop)
+
+      msm <- msm_scaled %>% dplyr::select(-consensus_pse, relative_prop)
+    }
 
-    msm <- msm_scaled %>% dplyr::select(-consensus_pse, relative_prop)
   }
 
 
+
+
+
   # MSM age distribution parameters in ZAF from Thembisa
   # Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3report
   gamma_mean <- 25
diff --git a/inst/traduire/en-translation.json b/inst/traduire/en-translation.json
index 8f1c8f35..3421000b 100644
--- a/inst/traduire/en-translation.json
+++ b/inst/traduire/en-translation.json
@@ -280,5 +280,8 @@
     "AGYW_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
-    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support."
+    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
+    "FSW_CONSENSUS_WARNING": "Consensus estimate for FSW is larger than 5% of total females aged 15-59. Modelled estimates for FWS PSEs will be used.",
+    "MSM_CONSENSUS_WARNING": "Consensus estimate for MSM is larger than 5% of total males aged 15-59. Modelled estimates for FWS PSEs will be used.",
+    "PWID_CONSENSUS_WARNING": "Consensus estimate for PWID is larger than 5% of total males aged 15-59. Modelled estimates for PWID PSEs will be used."
 }
diff --git a/inst/traduire/fr-translation.json b/inst/traduire/fr-translation.json
index 285ef767..869df166 100644
--- a/inst/traduire/fr-translation.json
+++ b/inst/traduire/fr-translation.json
@@ -278,5 +278,8 @@
     "AGYW_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
-    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support."
+    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
+    "FSW_CONSENSUS_WARNING": "Consensus estimate for FSW is larger than 5% of total females aged 15-59. Modelled estimates for FWS PSEs will be used.",
+    "MSM_CONSENSUS_WARNING": "Consensus estimate for MSM is larger than 5% of total males aged 15-59. Modelled estimates for FWS PSEs will be used.",
+    "PWID_CONSENSUS_WARNING": "Consensus estimate for PWID is larger than 5% of total males aged 15-59. Modelled estimates for PWID PSEs will be used."
 }
diff --git a/inst/traduire/pt-translation.json b/inst/traduire/pt-translation.json
index f9157f91..d20583b8 100644
--- a/inst/traduire/pt-translation.json
+++ b/inst/traduire/pt-translation.json
@@ -278,5 +278,8 @@
     "AGYW_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
-    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support."
+    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
+    "FSW_CONSENSUS_WARNING": "Consensus estimate for FSW is larger than 5% of total females aged 15-59. Modelled estimates for FWS PSEs will be used.",
+    "MSM_CONSENSUS_WARNING": "Consensus estimate for MSM is larger than 5% of total males aged 15-59. Modelled estimates for FWS PSEs will be used.",
+    "PWID_CONSENSUS_WARNING": "Consensus estimate for PWID is larger than 5% of total males aged 15-59. Modelled estimates for PWID PSEs will be used."
 }
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index c9e2248a..ca985cd3 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -246,6 +246,17 @@ test_that("AGYW download can be created", {
                data.frame(kp = c("FSW", "MSM", "PWID"),
                           consensus_estimate = c(40000, 35500, 5000)))
 
+  # Test for warning when KP workbook has PSE consensus estimates that
+  # are >5% of the age matched population
+  kp_consensus_bad <- readRDS(file.path("testdata/kp_workbook_spectrum_bad.rds"))
+  mock_extract_kp_workbook <- mockery::mock(kp_consensus_bad)
+  mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
+
+
+  expect_equal(risk_prop_scaled$meta_consensus,
+               data.frame(kp = c("FSW", "MSM", "PWID"),
+                          consensus_estimate = c(260000, 260000, 260000)))
+
 })
 
 test_that("Error thrown when AGYW resources are out of date", {
diff --git a/tests/testthat/testdata/kp_workbook_spectrum_bad.rds b/tests/testthat/testdata/kp_workbook_spectrum_bad.rds
new file mode 100644
index 0000000000000000000000000000000000000000..871fd1f0bf51a3f2c8a6034ee2f3ce93f06912ac
GIT binary patch
literal 367
zcmV-#0g(P5iwFP!000001B>8dU|?WoU}0ipU}gm}8CXL@+;lB~V!}WU2M`MYF$)lK
zf)u(1hr^h@!M;!?Q;0j1#S#$i=>nvA!Fm|jpQ2(0K9IO0|6Y(q47g~RI{W+Y=Wf#F
z3$s6v<a_Dk%0KoO-Peg!J@l|YxbpY&oNgDmy0>yW0$;bv+rOO&0<#z}0a%^Gv<qO4
zL(&5X9R!hdFoml__N!1yYH>+?d45rLQht86UPVrE1(jvs;e-xYn1BQ$R3~d*Vs2_N
z)Nve8LB8zN%J_o(g3_GClFa-(SnyY-CKf^2{0Q0j;>@a4n8u9EviO3c)Uw2!)VySv
zC{JQhNqlmCS!z*YI!uf!GcPSQ8Kl1$D#8nMetcSH4osOyaRE?OQE6^`QEEEKvgG`f
zR5UMe7Uh@gA$tQB#w;$0C5ccrV+1T^Md%qKm`3Qqk(`rQ3^NBN$dv*#M=z}iXa$h>
N4**=_3l-G@00581p?d%T

literal 0
HcmV?d00001


From f84c756f2b114330fc7ad601ee1e4ada5c0ea52a Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Wed, 24 Jan 2024 15:22:42 +0000
Subject: [PATCH 38/53] Bump required version of naomi.resource and update
 branch pin

---
 DESCRIPTION | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8b2f0efc..c5595fba 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -36,7 +36,7 @@ Imports:
     knitr,
     magrittr,
     mvtnorm,
-    naomi.resources (>= 0.0.2),
+    naomi.resources (>= 0.0.3),
     naomi.options (>= 1.2.0),
     openxlsx,
     plotly,
@@ -78,6 +78,7 @@ Remotes:
     first90=mrc-ide/first90release,
     reside-ic/traduire,
     mrc-ide/naomi.options,
+    mrc-ide/naomi.resources@update-workbook,
     mrc-ide/mockr,
     mrc-ide/testthat.buildkite,
     duckdb/duckdb-r@v0.9.1

From a4c6fe0d2a28063055d0334ba4d6010ec7ee213a Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Wed, 24 Jan 2024 15:23:34 +0000
Subject: [PATCH 39/53] Update write sheet to work with file which has existing
 column headers

---
 R/utils.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/utils.R b/R/utils.R
index 06d2b468..1b7b7ead 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -121,7 +121,9 @@ is_empty <- function(x) {
 write_xlsx_sheets <- function(template, sheets, path) {
   wb <- openxlsx::loadWorkbook(template)
   for (sheet in names(sheets)) {
-    openxlsx::writeData(wb, sheet, sheets[[sheet]])
+    openxlsx::writeData(wb, sheet, sheets[[sheet]],
+                        startRow = 2,
+                        colNames = FALSE)
   }
 
   openxlsx::saveWorkbook(wb, path)

From a10bfcd0a8f60407c69acf400e402f42736afb25 Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Wed, 24 Jan 2024 15:45:53 +0000
Subject: [PATCH 40/53] Return error message with context should generation
 fail

---
 R/agyw-integration.R                  | 9 +++++++++
 R/downloads.R                         | 4 +---
 R/utils.R                             | 5 ++++-
 inst/traduire/en-translation.json     | 1 +
 inst/traduire/fr-translation.json     | 1 +
 inst/traduire/pt-translation.json     | 1 +
 man/agyw_calculate_incidence_male.Rd  | 2 +-
 man/assert_agyw_resource_hierarchy.Rd | 6 ++++--
 man/write_xlsx_sheets.Rd              | 5 +++--
 tests/testthat/test-downloads.R       | 7 +++++++
 10 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index fa5c62ef..6b6b27b6 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -1657,3 +1657,12 @@ assert_agyw_resource_hierarchy <- function(outputs,
 
 }
 
+write_agyw_workbook <- function(sheets, dest) {
+  template_path <- naomi.resources::get_agyw_workbook_path()
+  withCallingHandlers(
+    write_xlsx_sheets(template_path, sheets, dest),
+    error = function(e) {
+      e$message <- t_("AGYW_ERROR_WRITE", list(message = e$message))
+      stop(e)
+    })
+}
diff --git a/R/downloads.R b/R/downloads.R
index d67924dd..302d05fc 100644
--- a/R/downloads.R
+++ b/R/downloads.R
@@ -116,8 +116,6 @@ hintr_prepare_agyw_download <- function(output, pjnz,
   progress <- new_simple_progress()
   progress$update_progress("PROGRESS_DOWNLOAD_AGYW")
 
-  template_path <- naomi.resources::get_agyw_workbook_path()
-
   risk_populations <- agyw_generate_risk_populations(output$model_output_path,
                                                      pjnz)
 
@@ -126,7 +124,7 @@ hintr_prepare_agyw_download <- function(output, pjnz,
     "All outputs - M" = risk_populations$male_incidence,
     "NAOMI outputs" = risk_populations$naomi_output
   )
-  write_xlsx_sheets(template_path, sheets, path = path)
+  write_agyw_workbook(sheets, dest = path)
 
   model_output <- read_hintr_output(output$model_output_path)
   options <- yaml::read_yaml(text = model_output$info$options.yml)
diff --git a/R/utils.R b/R/utils.R
index 1b7b7ead..94f2f598 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -111,7 +111,10 @@ is_empty <- function(x) {
 
 #' Write list of data frames into an xlsx file
 #'
-#' @param template Path to xlsx file with empty sheets
+#' This doesn't write colmn headers into the workbook, it expects
+#' that these already exist.
+#'
+#' @param template Path to xlsx file with sheets
 #' @param sheets Named list of data frames to write into template. The names
 #'   must match the destination sheet in the xlsx
 #' @param path Path to output the filled in xlsx
diff --git a/inst/traduire/en-translation.json b/inst/traduire/en-translation.json
index 3421000b..0186d3d8 100644
--- a/inst/traduire/en-translation.json
+++ b/inst/traduire/en-translation.json
@@ -281,6 +281,7 @@
     "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
     "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
+    "AGYW_ERROR_WRITE": "Failed to build workbook, please contact support: {{message}}",
     "FSW_CONSENSUS_WARNING": "Consensus estimate for FSW is larger than 5% of total females aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "MSM_CONSENSUS_WARNING": "Consensus estimate for MSM is larger than 5% of total males aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "PWID_CONSENSUS_WARNING": "Consensus estimate for PWID is larger than 5% of total males aged 15-59. Modelled estimates for PWID PSEs will be used."
diff --git a/inst/traduire/fr-translation.json b/inst/traduire/fr-translation.json
index 869df166..02f7abb7 100644
--- a/inst/traduire/fr-translation.json
+++ b/inst/traduire/fr-translation.json
@@ -279,6 +279,7 @@
     "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
     "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
+    "AGYW_ERROR_WRITE": "Échec de la création du classeur, veuillez contacter l'assistance: {{message}}",
     "FSW_CONSENSUS_WARNING": "Consensus estimate for FSW is larger than 5% of total females aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "MSM_CONSENSUS_WARNING": "Consensus estimate for MSM is larger than 5% of total males aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "PWID_CONSENSUS_WARNING": "Consensus estimate for PWID is larger than 5% of total males aged 15-59. Modelled estimates for PWID PSEs will be used."
diff --git a/inst/traduire/pt-translation.json b/inst/traduire/pt-translation.json
index d20583b8..2490040e 100644
--- a/inst/traduire/pt-translation.json
+++ b/inst/traduire/pt-translation.json
@@ -279,6 +279,7 @@
     "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
     "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
     "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
+    "AGYW_ERROR_WRITE": "Falha ao criar a pasta de trabalho, entre em contato com o suporte: {{message}}",
     "FSW_CONSENSUS_WARNING": "Consensus estimate for FSW is larger than 5% of total females aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "MSM_CONSENSUS_WARNING": "Consensus estimate for MSM is larger than 5% of total males aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "PWID_CONSENSUS_WARNING": "Consensus estimate for PWID is larger than 5% of total males aged 15-59. Modelled estimates for PWID PSEs will be used."
diff --git a/man/agyw_calculate_incidence_male.Rd b/man/agyw_calculate_incidence_male.Rd
index eeca94ab..1b7c4f05 100644
--- a/man/agyw_calculate_incidence_male.Rd
+++ b/man/agyw_calculate_incidence_male.Rd
@@ -15,7 +15,7 @@ agyw_calculate_incidence_male(
 \arguments{
 \item{options}{Naomi model options.}
 
-\item{male_srb}{MSM and PWId adjusted estimated of male SRB groups generated by \code{agyw_adjust_sexbehav_msm_pwid()}.}
+\item{male_srb}{MSM and PWID adjusted estimated of male SRB groups generated by \code{agyw_adjust_sexbehav_msm_pwid()}.}
 
 \item{male_logit_prevalence}{Risk adjusted estimates of male HIV prevalence in sexual risk groups generated by \code{agyw_calculate_prevalence_male()}.}
 
diff --git a/man/assert_agyw_resource_hierarchy.Rd b/man/assert_agyw_resource_hierarchy.Rd
index 75cc39cf..77dd977e 100644
--- a/man/assert_agyw_resource_hierarchy.Rd
+++ b/man/assert_agyw_resource_hierarchy.Rd
@@ -5,9 +5,11 @@
 \title{Throw warning when area hierarchy in external AGYW resources read in from
 \code{naomi.resources} do not match Naomi outputs used to update AGYW estimates.}
 \usage{
-assert_agyw_resource_hierarchy(naomi_output, options)
+assert_agyw_resource_hierarchy(outputs, options)
 }
 \description{
-@param naomi_output Naomi outputs.
+@param outputs Naomi outputs.
 @param options Naomi options.
+@return Resource hierarchy
+@keywords internal
 }
diff --git a/man/write_xlsx_sheets.Rd b/man/write_xlsx_sheets.Rd
index 30b50d84..f8dafe85 100644
--- a/man/write_xlsx_sheets.Rd
+++ b/man/write_xlsx_sheets.Rd
@@ -7,7 +7,7 @@
 write_xlsx_sheets(template, sheets, path)
 }
 \arguments{
-\item{template}{Path to xlsx file with empty sheets}
+\item{template}{Path to xlsx file with sheets}
 
 \item{sheets}{Named list of data frames to write into template. The names
 must match the destination sheet in the xlsx}
@@ -18,6 +18,7 @@ must match the destination sheet in the xlsx}
 Path to complete xlsx file
 }
 \description{
-Write list of data frames into an xlsx file
+This doesn't write colmn headers into the workbook, it expects
+that these already exist.
 }
 \keyword{internal}
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index ca985cd3..86ad34da 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -285,3 +285,10 @@ test_that("output description is translated", {
   expect_match(text, paste0("Paquet Naomi téléchargée depuis l'application ",
                             "web Naomi\\n\\nPérimètre de zone - MWI\\n.+"))
 })
+
+test_that("failing to write data into xlsx sheet gives a useful error", {
+  sheets_to_write <- list(x = data.frame(x = c(1, 2, 3)))
+  dest <- tempfile()
+  expect_error(write_agyw_workbook(sheets_to_write, dest),
+               "Failed to build workbook, please contact support: Sheet 'x' does not exist")
+})

From dff5bf02b8ae05ab9321a1e867dd96f1be12a888 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Thu, 25 Jan 2024 10:43:42 +0200
Subject: [PATCH 41/53] fix failing test for scaling to kp workbook consensus
 estimate

---
 R/agyw-integration.R            |  2 +-
 tests/testthat/test-downloads.R | 58 +++++++++++++++++++++++++++++++--
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 6b6b27b6..f6f853cd 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -506,7 +506,7 @@ agyw_disaggregate_msm <- function(outputs,
 
     # Check if consensus estimate is larger than age matched population denominator
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
-    stopifnot(msm_consensus < pop)
+    prop_pop <- msm_consensus / pop
 
     if(prop_pop >= 0.05) {
       t_("MSM_CONSENSUS_WARNING")
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index 86ad34da..a3a22ed1 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -242,23 +242,77 @@ test_that("AGYW download can be created", {
     extract_kp_workbook = mock_extract_kp_workbook
   )
 
+  # Check that consensus estimates extracted and saved out
   expect_equal(risk_prop_scaled$meta_consensus,
                data.frame(kp = c("FSW", "MSM", "PWID"),
                           consensus_estimate = c(40000, 35500, 5000)))
 
-  # Test for warning when KP workbook has PSE consensus estimates that
-  # are >5% of the age matched population
+  # Test that PSE tool adjusted to KP consensus estimates correctly
+  model_object <- read_hintr_output(agyw_output_demo$model_output_path)
+  outputs <- model_object$output_package
+  options <- outputs$fit$model_options
+  naomi <- agyw_format_naomi(outputs, options)
+
+  # Naomi population
+  naomi_pop <- naomi$naomi_long %>%
+    dplyr::filter(indicator == "population") %>%
+    dplyr::select(area_id, area_level,sex, age_group, area_level,
+                  spectrum_region_code, population = mean)
+
+  naomi_pop$iso3 <- options$area_scope
+
+  # KP PSEs adjusted to consensus estimates when consensus estimates are
+  #  < 5% of age matched population  denominator
+  kp_consensus <- readRDS(file.path("testdata/kp_workbook_spectrum.rds"))
+
+  fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
+  pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
+  msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
+
+  fsw <- sum(fsw_est$fsw)
+  pwid <- sum(pwid_est$pwid)
+  msm <- sum(msm_est$msm)
+
+  # Note that PWID will be 90% of KP workbook consensus estimate due to exclusion
+  # of female PWID
+  expect_equal(c(fsw, pwid, msm), c(40000, 4550, 35500))
+
+
+  # KP PSEs **not** adjusted to consensus estimates when consensus estimates are
+  #  > 5% of age matched population denominator
   kp_consensus_bad <- readRDS(file.path("testdata/kp_workbook_spectrum_bad.rds"))
   mock_extract_kp_workbook <- mockery::mock(kp_consensus_bad)
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
 
+  with_mocked_bindings(
+    risk_prop_scaled <- agyw_generate_risk_populations(
+      agyw_output_demo$model_output_path, a_hintr_data$pjnz),
+    new_simple_progress = mock_new_simple_progress,
+    extract_kp_workbook = mock_extract_kp_workbook
+  )
 
+  # Check that bad consensus estimates extracted and saved out
   expect_equal(risk_prop_scaled$meta_consensus,
                data.frame(kp = c("FSW", "MSM", "PWID"),
                           consensus_estimate = c(260000, 260000, 260000)))
 
+
+  # KP PSEs use default proportions from Oli's mode when consensus estimates are
+  #  >= 5% of age matched population  denominator
+
+  fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus_bad)
+  pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus_bad)
+  msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus_bad)
+
+  fsw <- sum(fsw_est$fsw)
+  pwid <- sum(pwid_est$pwid)
+  msm <- sum(msm_est$msm)
+
+  expect_equal(c(fsw, pwid, msm), c(62306.311, 7398.486, 24794.842))
+
 })
 
+
 test_that("Error thrown when AGYW resources are out of date", {
 
   kp_error <- paste0("Available KP PSE estimates for: \n",

From 95da62ca99325566e9cad1a0551f7b428eb9acba Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Thu, 25 Jan 2024 10:50:12 +0200
Subject: [PATCH 42/53] remove warnings that are not output anywhere

---
 R/agyw-integration.R            | 12 +++---------
 tests/testthat/test-downloads.R |  2 --
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index f6f853cd..7c8fb75c 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -250,9 +250,7 @@ agyw_disaggregate_fsw <- function(outputs,
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "female",]$population
     prop_pop <- fsw_consensus / pop
 
-    if(prop_pop >= 0.05) {
-      t_("FSW_CONSENSUS_WARNING")
-    } else {
+    if(prop_pop < 0.05) {
 
       # Scale total FSW population to consensus PSE estimate
       fsw_scaled <- fsw %>%
@@ -405,9 +403,7 @@ agyw_disaggregate_pwid <- function(outputs,
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
     prop_pop <- pwid_consensus / pop
 
-    if(prop_pop >= 0.05) {
-      t_("PWID_CONSENSUS_WARNING")
-    } else {
+    if(prop_pop < 0.05) {
 
       # Scale total PWID population to consensus PSE estimate
       pwid_scaled <- pwid %>%
@@ -508,9 +504,7 @@ agyw_disaggregate_msm <- function(outputs,
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
     prop_pop <- msm_consensus / pop
 
-    if(prop_pop >= 0.05) {
-      t_("MSM_CONSENSUS_WARNING")
-    } else {
+    if(prop_pop < 0.05) {
 
       # Scale total MSM population to consensus PSE estimate
       msm_scaled <- msm %>%
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index a3a22ed1..4ce9d1a0 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -263,8 +263,6 @@ test_that("AGYW download can be created", {
 
   # KP PSEs adjusted to consensus estimates when consensus estimates are
   #  < 5% of age matched population  denominator
-  kp_consensus <- readRDS(file.path("testdata/kp_workbook_spectrum.rds"))
-
   fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
   pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
   msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)

From 69359ff051e6ad4b05e1bec6bfa2bbe18ccd4d76 Mon Sep 17 00:00:00 2001
From: Rob Ashton <r.ashton@imperial.ac.uk>
Date: Thu, 25 Jan 2024 09:08:28 +0000
Subject: [PATCH 43/53] Remove branch pin, set duckdb name in DESCRIPTION

---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c5595fba..56036ccb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -78,9 +78,9 @@ Remotes:
     first90=mrc-ide/first90release,
     reside-ic/traduire,
     mrc-ide/naomi.options,
-    mrc-ide/naomi.resources@update-workbook,
+    mrc-ide/naomi.resources,
     mrc-ide/mockr,
     mrc-ide/testthat.buildkite,
-    duckdb/duckdb-r@v0.9.1
+    duckdb=duckdb/duckdb-r@v0.9.1
 Config/testthat/edition: 3
 Config/testthat/parallel: true

From 7c8d87a871a758415afcc21b0dd36115a6d17a24 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Thu, 25 Jan 2024 12:34:18 +0200
Subject: [PATCH 44/53] Update agyw-integration.R

add placeholder for incidence adjustment code
---
 R/agyw-integration.R | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index 7c8fb75c..aec4eaa0 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -1101,7 +1101,8 @@ agyw_calculate_incidence_female <- function(naomi_output,
                                             options,
                                             female_srb,
                                             female_logit_prevalence,
-                                            survey_year = 2018) {
+                                            survey_year = 2018,
+                                            kp_consensus) {
 
   naomi_indicators <- naomi_output  %>%
     dplyr::filter(indicator %in% c("population", "plhiv","prevalence","infections", "incidence"),
@@ -1124,7 +1125,6 @@ agyw_calculate_incidence_female <- function(naomi_output,
     dplyr::left_join(risk_group_prevalence, by = dplyr::join_by(area_id, age_group)) %>%
     dplyr::filter(!is.na(population))
 
-
   # Risk ratios for people non-regular sex partners relative to those with a
   # single cohabiting sex partner
   # ALPHA Network pooled analysis (Slaymaker et al CROI 2020), Jia et al systematic review, Ssempijja et al JAIDS 2022
@@ -1283,6 +1283,17 @@ agyw_calculate_incidence_female <- function(naomi_output,
     dplyr::mutate_if(is.numeric, as.numeric) %>%
     dplyr::mutate_if(is.factor, as.character)
 
+
+
+  # Check for consensus estimate of FSW infections
+  fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$infections
+
+  if(!is.na(fsw_consensus)){
+
+    print("Add code here to scale new infections")
+
+    }
+
 }
 
 #' Calculate incidence in high risk male key populations
@@ -1300,7 +1311,8 @@ agyw_calculate_incidence_male <- function(naomi_output,
                                           options,
                                           male_srb,
                                           male_logit_prevalence,
-                                          survey_year = 2018) {
+                                          survey_year = 2018,
+                                          kp_consensus) {
 
 
   naomi_indicators <- naomi_output  %>%
@@ -1489,6 +1501,17 @@ agyw_calculate_incidence_male <- function(naomi_output,
     dplyr::mutate_if(is.factor, as.character)
 
 
+  # Check for consensus estimate of MSM and PWID infections
+  male_consensus <- kp_consensus[kp_consensus$key_population %in% c("MSM","PWID"),]$infections
+
+
+  if(!anyNA(male_consensus)){
+
+    print("Add code here to scale new infections")
+
+  }
+
+
 
 }
 

From e2b00882dfaa7115416291e7180b1fdff9342f43 Mon Sep 17 00:00:00 2001
From: Katie Risher <krisher@ic.ac.uk>
Date: Tue, 30 Jan 2024 22:24:26 -0500
Subject: [PATCH 45/53] Add KP adjustment for new infections

Added KP adjustment to incidence estimates to match Spectrum KP new infection estimates for women & men - results in updated estimates of incidence for other behavioural groups as well
---
 .gitignore           |   1 +
 R/agyw-integration.R | 134 ++++++++++++++++++++++++++++++++++++-------
 2 files changed, 113 insertions(+), 22 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9b09966a..842374bb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,4 @@ tests/testthat/testdata/fit.RDS
 .Rprofile
 .idea
 
+.DS_Store
diff --git a/R/agyw-integration.R b/R/agyw-integration.R
index aec4eaa0..716cb8a9 100644
--- a/R/agyw-integration.R
+++ b/R/agyw-integration.R
@@ -1189,6 +1189,39 @@ agyw_calculate_incidence_female <- function(naomi_output,
       infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
       infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg)
 
+  # Check for consensus estimate of FSW infections
+  fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$infections
+
+  if(!is.na(fsw_consensus)){
+
+    # scale new infections if there's a KP consensus estimate
+    # sum prior count of new infections
+    fsw_sum <- sum(df1$infections_sexpaid12m)
+    # generate a ratio to scale FSW new infections by
+    fsw_ratio <- fsw_consensus / fsw_sum
+    # adjust district-level new infections and incidence for FSW
+    df1 <- df1 %>%
+      dplyr::mutate(
+        infections_sexpaid12m = infections_sexpaid12m * fsw_ratio,
+        incidence_sexpaid12m = infections_sexpaid12m / susceptible_sexpaid12m,
+      )
+    # Error here to catch that the KP adjustment has made the number of new infections
+    # in KPs greater than the estimated population susceptible
+    if(sum(df1$incidence_sexpaid12m > 1) > 0) {
+      stop("KP new infections exceeds susceptible population size. Please contact support.")
+    }
+    # adjust sexcohab and sexnonreg new infections and incidence to scale rest of infections
+    # from the district
+    df1 <- df1 %>%
+      dplyr::mutate(
+        incidence_sexcohab = (infections - infections_sexpaid12m) / (susceptible_sexcohab + rr_sexnonreg * susceptible_sexnonreg),
+        incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
+        infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
+        infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
+        rr_sexpaid12m = incidence_sexpaid12m / incidence_sexcohab
+      )
+  }
+
   # Calculate risk group incidence for aggregate age groups
 
   summarise_age_cat_female <- function(dat, age_cat) {
@@ -1261,6 +1294,14 @@ agyw_calculate_incidence_female <- function(naomi_output,
     stop("Risk group proportions do not sum correctly. Please contact suppport.")
   }
 
+  # Check that new infections are never negative in any behavioural risk group
+  sexcohab_inf_check <- sum(df3$infections_sexcohab < 0)
+  sexnonreg_inf_check <- sum(df3$infections_sexnonreg < 0)
+  sexpaid12m_inf_check <- sum(df3$infections_sexpaid12m < 0)
+  if(sum(sexcohab_inf_check,sexnonreg_inf_check,sexpaid12m_inf_check)>0) {
+    stop("Number of new infections below 0. Please contact support.")
+  }
+
   df3 %>%
     dplyr::mutate(concat = paste0(area_id, age_group), iso3 = options$area_scope) %>%
     dplyr::select(area_id, age_group, concat,
@@ -1285,15 +1326,6 @@ agyw_calculate_incidence_female <- function(naomi_output,
 
 
 
-  # Check for consensus estimate of FSW infections
-  fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$infections
-
-  if(!is.na(fsw_consensus)){
-
-    print("Add code here to scale new infections")
-
-    }
-
 }
 
 #' Calculate incidence in high risk male key populations
@@ -1395,6 +1427,64 @@ agyw_calculate_incidence_male <- function(naomi_output,
 
     )
 
+  # Check for consensus estimate of MSM and PWID infections
+  male_consensus <- kp_consensus[kp_consensus$key_population %in% c("MSM","PWID"),]$infections
+
+
+  if(!anyNA(male_consensus)){
+
+    msm_consensus <- kp_consensus[kp_consensus$key_population %in% c("MSM"),]$infections
+    pwid_consensus <- kp_consensus[kp_consensus$key_population %in% c("PWID"),]$infections
+
+    # scale new infections for MSM if there's an MSM KP consensus estimate
+    if(!is.na(msm_consensus)){
+      # sum prior count of new infections
+      msm_sum <- sum(df1$infections_msm)
+      # generate a ratio to scale MSM new infections by
+      msm_ratio <- msm_consensus / msm_sum
+      # adjust district-level new infections and incidence for MSM
+      df1 <- df1 %>%
+        dplyr::mutate(
+          infections_msm = infections_msm * msm_ratio,
+          incidence_msm = infections_msm / susceptible_msm,
+        )
+    }
+
+    # scale new infections for PWID if there's a PWID KP consensus estimate
+    if(!is.na(pwid_consensus)){
+      # scale consensus that we'll use to account for the 1:10 ratio assumption of
+      # male:female PWID
+      pwid_consensus <- pwid_consensus * 0.91
+      # sum prior count of new infections
+      pwid_sum <- sum(df1$infections_pwid)
+      # generate a ratio to scale PWID new infections by
+      pwid_ratio <- pwid_consensus / pwid_sum
+      # adjust district-level new infections and incidence for MSM
+      df1 <- df1 %>%
+        dplyr::mutate(
+          infections_pwid = infections_pwid * pwid_ratio,
+          incidence_pwid = infections_pwid / susceptible_pwid,
+        )
+    }
+    # Error here to catch that the KP adjustment has made the number of new infections
+    # in KPs greater than the estimated population susceptible
+    if(sum(df1$incidence_msm > 1,df1$incidence_pwid > 1) > 0) {
+      stop("KP new infections exceeds susceptible population size. Please contact support.")
+    }
+    # adjust sexcohab and sexnonreg new infections and incidence to scale rest of infections
+    # from the district
+    df1 <- df1 %>%
+      dplyr::mutate(
+        incidence_sexcohab = (infections - infections_msm - infections_pwid) / (susceptible_sexcohab +
+                                                                                  rr_sexnonreg * susceptible_sexnonreg),
+        incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
+        infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
+        infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
+        rr_msm = incidence_msm / incidence_sexcohab,
+        rr_pwid = incidence_pwid / incidence_sexcohab
+      )
+  }
+
   # Calculate risk group incidence for aggregate age groups
 
   summarise_age_cat_male <- function(dat, age_cat) {
@@ -1477,6 +1567,15 @@ agyw_calculate_incidence_male <- function(naomi_output,
     stop("Risk group proportions do not sum correctly. Please contact suppport.")
   }
 
+  # Check that new infections are never negative in any behavioural risk group
+  sexcohab_inf_check <- sum(df3$infections_sexcohab < 0)
+  sexnonreg_inf_check <- sum(df3$infections_sexnonreg < 0)
+  msm_inf_check <- sum(df3$infections_msm < 0)
+  pwid_inf_check <- sum(df3$infections_pwid < 0)
+  if(sum(sexcohab_inf_check,sexnonreg_inf_check,msm_inf_check,pwid_inf_check)>0) {
+    stop("Number of new infections below 0. Please contact support.")
+  }
+
 
   df3 %>%
     dplyr::mutate(concat = paste0(area_id, age_group), iso3 = options$area_scope) %>%
@@ -1501,17 +1600,6 @@ agyw_calculate_incidence_male <- function(naomi_output,
     dplyr::mutate_if(is.factor, as.character)
 
 
-  # Check for consensus estimate of MSM and PWID infections
-  male_consensus <- kp_consensus[kp_consensus$key_population %in% c("MSM","PWID"),]$infections
-
-
-  if(!anyNA(male_consensus)){
-
-    print("Add code here to scale new infections")
-
-  }
-
-
 
 }
 
@@ -1591,13 +1679,15 @@ agyw_generate_risk_populations <- function(naomi_output,
                                                       options,
                                                       female_srb,
                                                       female_logit_prevalence,
-                                                      survey_year)
+                                                      survey_year,
+                                                      kp_consensus)
 
   male_incidence <- agyw_calculate_incidence_male(naomi$naomi_long,
                                                   options,
                                                   male_srb,
                                                   male_logit_prevalence,
-                                                  survey_year)
+                                                  survey_year,
+                                                  kp_consensus)
 
   meta <- data.frame(kp = c("FSW", "MSM", "PWID"),
                      consensus_estimate = c(unique(fsw_est$consensus_estimate),

From 3063d173c0679953e81a686d4db5aa276e9ee892 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Mon, 5 Feb 2024 11:01:18 +0200
Subject: [PATCH 46/53] rename agyw -> shipp

---
 NAMESPACE                                     |   2 +-
 R/downloads.R                                 |  18 +--
 R/{agyw-integration.R => shipp.R}             | 144 +++++++++---------
 R/test-helpers.R                              |  10 +-
 inst/traduire/en-translation.json             |  16 +-
 inst/traduire/fr-translation.json             |  14 +-
 inst/traduire/pt-translation.json             |  14 +-
 man/assert_agyw_resource_hierarchy.Rd         |  15 --
 man/assert_shipp_resource_hierarchy.Rd        |  15 ++
 man/calculate_ywkp_pr_lor.Rd                  |   2 +-
 ...oad.Rd => hintr_prepare_shipp_download.Rd} |  10 +-
 man/logit_scale_prev.Rd                       |   2 +-
 ...w_testfiles.Rd => make_shipp_testfiles.Rd} |   8 +-
 man/odds.Rd                                   |   2 +-
 ...av_fsw.Rd => shipp_adjust_sexbehav_fsw.Rd} |  10 +-
 ...d.Rd => shipp_adjust_sexbehav_msm_pwid.Rd} |  12 +-
 ...Rd => shipp_calculate_incidence_female.Rd} |  17 ++-
 ...e.Rd => shipp_calculate_incidence_male.Rd} |  17 ++-
 ...d => shipp_calculate_prevalence_female.Rd} |  12 +-
 ....Rd => shipp_calculate_prevalence_male.Rd} |  12 +-
 ...egate_fsw.Rd => shipp_disaggregate_fsw.Rd} |   8 +-
 ...egate_msm.Rd => shipp_disaggregate_msm.Rd} |   8 +-
 ...ate_pwid.Rd => shipp_disaggregate_pwid.Rd} |   8 +-
 ..._format_naomi.Rd => shipp_format_naomi.Rd} |  10 +-
 ....Rd => shipp_generate_risk_populations.Rd} |  18 +--
 tests/testthat/test-downloads.R               |  44 +++---
 26 files changed, 227 insertions(+), 221 deletions(-)
 rename R/{agyw-integration.R => shipp.R} (94%)
 delete mode 100644 man/assert_agyw_resource_hierarchy.Rd
 create mode 100644 man/assert_shipp_resource_hierarchy.Rd
 rename man/{hintr_prepare_agyw_download.Rd => hintr_prepare_shipp_download.Rd} (57%)
 rename man/{make_agyw_testfiles.Rd => make_shipp_testfiles.Rd} (69%)
 rename man/{agyw_adjust_sexbehav_fsw.Rd => shipp_adjust_sexbehav_fsw.Rd} (80%)
 rename man/{agyw_adjust_sexbehav_msm_pwid.Rd => shipp_adjust_sexbehav_msm_pwid.Rd} (81%)
 rename man/{agyw_calculate_incidence_female.Rd => shipp_calculate_incidence_female.Rd} (70%)
 rename man/{agyw_calculate_incidence_male.Rd => shipp_calculate_incidence_male.Rd} (67%)
 rename man/{agyw_calculate_prevalence_female.Rd => shipp_calculate_prevalence_female.Rd} (82%)
 rename man/{agyw_calculate_prevalence_male.Rd => shipp_calculate_prevalence_male.Rd} (83%)
 rename man/{agyw_disaggregate_fsw.Rd => shipp_disaggregate_fsw.Rd} (74%)
 rename man/{agyw_disaggregate_msm.Rd => shipp_disaggregate_msm.Rd} (74%)
 rename man/{agyw_disaggregate_pwid.Rd => shipp_disaggregate_pwid.Rd} (73%)
 rename man/{agyw_format_naomi.Rd => shipp_format_naomi.Rd} (57%)
 rename man/{agyw_generate_risk_populations.Rd => shipp_generate_risk_populations.Rd} (60%)

diff --git a/NAMESPACE b/NAMESPACE
index 9fb1b386..eaf7a342 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -43,9 +43,9 @@ export(get_plotting_metadata)
 export(hintr_calibrate)
 export(hintr_calibrate_plot)
 export(hintr_comparison_plot)
-export(hintr_prepare_agyw_download)
 export(hintr_prepare_coarse_age_group_download)
 export(hintr_prepare_comparison_report_download)
+export(hintr_prepare_shipp_download)
 export(hintr_prepare_spectrum_download)
 export(hintr_prepare_summary_report_download)
 export(hintr_run_model)
diff --git a/R/downloads.R b/R/downloads.R
index 302d05fc..5018cada 100644
--- a/R/downloads.R
+++ b/R/downloads.R
@@ -101,7 +101,7 @@ hintr_prepare_comparison_report_download <- function(output,
   )
 }
 
-#' Prepare AGYW tool download
+#' Prepare SHIPP tool download
 #'
 #' @param hintr_output object
 #' @param path Path to save output file
@@ -109,14 +109,14 @@ hintr_prepare_comparison_report_download <- function(output,
 #'
 #' @return Path to output file and metadata for file
 #' @export
-hintr_prepare_agyw_download <- function(output, pjnz,
+hintr_prepare_shipp_download <- function(output, pjnz,
                                         path = tempfile(fileext = ".xlsx")) {
   ## TODO: Do we need a version restriction on this?
   assert_model_output_version(output, "2.7.16")
   progress <- new_simple_progress()
-  progress$update_progress("PROGRESS_DOWNLOAD_AGYW")
+  progress$update_progress("PROGRESS_DOWNLOAD_SHIPP")
 
-  risk_populations <- agyw_generate_risk_populations(output$model_output_path,
+  risk_populations <- shipp_generate_risk_populations(output$model_output_path,
                                                      pjnz)
 
   sheets <- list(
@@ -124,16 +124,16 @@ hintr_prepare_agyw_download <- function(output, pjnz,
     "All outputs - M" = risk_populations$male_incidence,
     "NAOMI outputs" = risk_populations$naomi_output
   )
-  write_agyw_workbook(sheets, dest = path)
+  write_shipp_workbook(sheets, dest = path)
 
   model_output <- read_hintr_output(output$model_output_path)
   options <- yaml::read_yaml(text = model_output$info$options.yml)
   list(
     path = path,
     metadata = list(
-      description = build_agyw_tool_description(options),
+      description = build_shipp_tool_description(options),
       areas = options$area_scope,
-      type = "agyw"
+      type = "shipp"
     )
   )
 }
@@ -150,8 +150,8 @@ build_comparison_report_description <- function(options) {
   build_description(t_("DOWNLOAD_COMPARISON_DESCRIPTION"), options)
 }
 
-build_agyw_tool_description <- function(options) {
-  build_description(t_("DOWNLOAD_AGYW_DESCRIPTION"), options)
+build_shipp_tool_description <- function(options) {
+  build_description(t_("DOWNLOAD_SHIPP_DESCRIPTION"), options)
 }
 
 build_description <- function(type_text, options) {
diff --git a/R/agyw-integration.R b/R/shipp.R
similarity index 94%
rename from R/agyw-integration.R
rename to R/shipp.R
index 716cb8a9..b74f1ad8 100644
--- a/R/agyw-integration.R
+++ b/R/shipp.R
@@ -4,11 +4,11 @@
 #' @param options Naomi model options.
 #'
 #'
-#' @return Naomi indicators formatted for the AGYW workbook.
+#' @return Naomi indicators formatted for the SHIPP workbook.
 #' @keywords internal
 
 
-agyw_format_naomi <- function(outputs, options){
+shipp_format_naomi <- function(outputs, options){
 
   naomi_ind <- outputs$indicators %>%
     dplyr::filter(indicator %in% c("population", "plhiv", "infections","incidence",
@@ -215,7 +215,7 @@ agyw_format_naomi <- function(outputs, options){
 #' @keywords internal
 
 
-agyw_disaggregate_fsw <- function(outputs,
+shipp_disaggregate_fsw <- function(outputs,
                                   options,
                                   naomi_pop,
                                   kp_consensus){
@@ -223,7 +223,7 @@ agyw_disaggregate_fsw <- function(outputs,
   # Extract country specific national FSW PSEs
   iso3 <- options$area_scope
 
-  pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+  pse <- naomi.resources::load_shipp_exdata("kp_estimates", iso3) %>%
     dplyr::filter(kp == "FSW", indicator == "pse_prop")
 
   fsw_pse <- pse %>%
@@ -284,7 +284,7 @@ agyw_disaggregate_fsw <- function(outputs,
 
   # Calculate proportion of sexually active population using Kinh's country specific
   # estimates of age at first sex and naomi population
-  afs <- naomi.resources::load_agyw_exdata("afs", iso3)
+  afs <- naomi.resources::load_shipp_exdata("afs", iso3)
 
   # Select birth cohort from 2000, to turn 15 in 2015
   cohort <- 2000
@@ -330,7 +330,7 @@ agyw_disaggregate_fsw <- function(outputs,
   # Adjusting country specific sexual debut estimates with age distribution of
   # FSW from Thembisa
   #Downloaded from: https://www.thembisa.org/content/downloadPage/Thembisa4_3
-  zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF") %>%
+  zaf_propensity <- naomi.resources::load_shipp_exdata("zaf_propensity", iso3 = "ZAF") %>%
     dplyr::filter(kp == "FSW")
 
   fsw_est <- df %>%
@@ -369,7 +369,7 @@ agyw_disaggregate_fsw <- function(outputs,
 #' @return District level PWID estimates by 5-year age bands for ages 15-49.
 #' @keywords internal
 
-agyw_disaggregate_pwid <- function(outputs,
+shipp_disaggregate_pwid <- function(outputs,
                                    options,
                                    naomi_pop,
                                    kp_consensus)
@@ -378,7 +378,7 @@ agyw_disaggregate_pwid <- function(outputs,
   # Extract country specific national PWID PSEs
   iso3 <- options$area_scope
 
-  pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+  pse <- naomi.resources::load_shipp_exdata("kp_estimates", iso3) %>%
     dplyr::filter(kp == "PWID", indicator == "pse_prop")
 
   pwid_pse <- pse %>%
@@ -470,7 +470,7 @@ agyw_disaggregate_pwid <- function(outputs,
 #' @return District level MSM estimates by 5-year age bands for ages 15-49.
 #' @keywords internal
 
-agyw_disaggregate_msm <- function(outputs,
+shipp_disaggregate_msm <- function(outputs,
                                   options,
                                   naomi_pop,
                                   kp_consensus)
@@ -478,7 +478,7 @@ agyw_disaggregate_msm <- function(outputs,
 
   # Extract country specific national MSM PSEs
   iso3 <- options$area_scope
-  pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+  pse <- naomi.resources::load_shipp_exdata("kp_estimates", iso3) %>%
     dplyr::filter(kp == "MSM", indicator == "pse_prop")
 
   msm_pse <- pse %>%
@@ -542,7 +542,7 @@ agyw_disaggregate_msm <- function(outputs,
 
   # Calculate proportion of sexually active population using Kinh's country specific
   # estimates of age at first sex and naomi population
-  afs <- naomi.resources::load_agyw_exdata("afs", iso3)
+  afs <- naomi.resources::load_shipp_exdata("afs", iso3)
 
   # Select birth cohort from 2000, to turn 15 in 2015
   cohort <- 2000
@@ -587,7 +587,7 @@ agyw_disaggregate_msm <- function(outputs,
 
   # Adjusting country specific sexual debut estimates with age distribution of
   # MSM from Thembisa
-  zaf_propensity <- naomi.resources::load_agyw_exdata("zaf_propensity", iso3 = "ZAF") %>%
+  zaf_propensity <- naomi.resources::load_shipp_exdata("zaf_propensity", iso3 = "ZAF") %>%
     dplyr::filter(kp == "MSM")
 
 
@@ -617,7 +617,7 @@ agyw_disaggregate_msm <- function(outputs,
 #'
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
-#' @param fsw_est 5-year estimates of FSW PSEs generated from `agyw_disaggregate_fsw()`.
+#' @param fsw_est 5-year estimates of FSW PSEs generated from `shipp_disaggregate_fsw()`.
 #'
 #'
 #' Estimates are generated for the following groups:
@@ -637,7 +637,7 @@ agyw_disaggregate_msm <- function(outputs,
 #' @keywords internal
 
 
-agyw_adjust_sexbehav_fsw <- function(outputs,
+shipp_adjust_sexbehav_fsw <- function(outputs,
                                      options,
                                      fsw_est)
 {
@@ -656,7 +656,7 @@ agyw_adjust_sexbehav_fsw <- function(outputs,
   fsw_df <- fsw_est %>% dplyr::select(age_group, fsw_match_area = area_id, fsw_prop)
 
   # Load female SRB proportions
-  female_srb <- naomi.resources::load_agyw_exdata("srb_female", options$area_scope)
+  female_srb <- naomi.resources::load_shipp_exdata("srb_female", options$area_scope)
 
   adj_female_srb <- female_srb %>%
     dplyr::filter(iso3 == options$area_scope) %>%
@@ -687,8 +687,8 @@ agyw_adjust_sexbehav_fsw <- function(outputs,
 #'
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
-#' @param msm_est 5-year estimates of MSM PSEs generated from `agyw__disaggregate_msm()`.
-#' @param pwid_est 5-year estimates of MSM PSEs generated from `agyw__disaggregate_pwid()`.
+#' @param msm_est 5-year estimates of MSM PSEs generated from `shipp__disaggregate_msm()`.
+#' @param pwid_est 5-year estimates of MSM PSEs generated from `shipp__disaggregate_pwid()`.
 #'
 #' @return District level estimates of male sexual risk behaviour groups
 #' @keywords internal
@@ -708,7 +708,7 @@ agyw_adjust_sexbehav_fsw <- function(outputs,
 #' 2. Subtracting MSM and PWID proportionally from all SRB groups.
 
 
-agyw_adjust_sexbehav_msm_pwid <- function(outputs,
+shipp_adjust_sexbehav_msm_pwid <- function(outputs,
                                           options,
                                           msm_est,
                                           pwid_est) {
@@ -728,7 +728,7 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
   pwid_df <- pwid_est %>% dplyr::select(age_group, kp_match_area = area_id, pwid_prop)
 
   # Load male SRB proportions
-  male_srb <- naomi.resources::load_agyw_exdata("srb_male", options$area_scope)
+  male_srb <- naomi.resources::load_shipp_exdata("srb_male", options$area_scope)
 
   adj_male_srb <- male_srb %>%
     dplyr::filter(iso3 == options$area_scope) %>%
@@ -765,8 +765,8 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
 #'
 #' @param naomi_output Naomi output.
 #' @param options Naomi model options.
-#' @param fsw_est 5-year estimates of FSW PSEs generated from `agyw_disaggregate_fse()`.
-#' @param female_srb FSW adjusted estimates of female SRB groups generated by `agyw_adjust_sexbehav_fsw()`
+#' @param fsw_est 5-year estimates of FSW PSEs generated from `shipp_disaggregate_fse()`.
+#' @param female_srb FSW adjusted estimates of female SRB groups generated by `shipp_adjust_sexbehav_fsw()`
 #' @param survey_year Year of survey to sample estimates.
 #'
 #' To calculate district-age-sex-sexual behaviour-specific HIV prevalence, we maintain
@@ -781,7 +781,7 @@ agyw_adjust_sexbehav_msm_pwid <- function(outputs,
 #' @return SRB PSEs with logit prevalence estimates.
 #' @keywords internal
 
-agyw_calculate_prevalence_female <- function(naomi_output,
+shipp_calculate_prevalence_female <- function(naomi_output,
                                              options,
                                              fsw_est,
                                              female_srb,
@@ -804,7 +804,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
   # Extract country specific national FSW prevalence
   iso3 <- options$area_scope
   # THIS IS NOW USING SINGLE COUNTRY INSTEAD OF ALL COUNTRIES
-  fsw_prev <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+  fsw_prev <- naomi.resources::load_shipp_exdata("kp_estimates", iso3) %>%
     dplyr::filter(kp == "FSW", indicator == "prevalence")
 
   kp_prev <- fsw_prev %>%
@@ -836,7 +836,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
   calculate_prevalence <- function(x, iso3){
 
     # Log odds ratio from SRB group survey prevalence
-    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", iso3) %>%
+    lor <- naomi.resources:::load_shipp_exdata("srb_survey_lor", iso3) %>%
       dplyr::filter(sex == "female")
 
     lor_15to29 <- lor$lor_15to29
@@ -893,8 +893,8 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 #' @param naomi_output Naomi output.
 #' @param areas Naomi boundary file.
 #' @param options Naomi model options.
-#' @param msm_est 5-year estimates of MSM PSEs generated from `agyw__disaggregate_msm()`.
-#' @param male_srb MSM and PWID adjusted estimates of male SRB groups generated by `agyw_adjust_sexbehav_msm_pwid()`.
+#' @param msm_est 5-year estimates of MSM PSEs generated from `shipp__disaggregate_msm()`.
+#' @param male_srb MSM and PWID adjusted estimates of male SRB groups generated by `shipp_adjust_sexbehav_msm_pwid()`.
 #' @param survey_year Year of survey to sample estimates.
 #'
 #'
@@ -912,7 +912,7 @@ agyw_calculate_prevalence_female <- function(naomi_output,
 #' @keywords internal
 
 
-agyw_calculate_prevalence_male <- function(naomi_output,
+shipp_calculate_prevalence_male <- function(naomi_output,
                                            areas,
                                            options,
                                            msm_est,
@@ -940,7 +940,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
   # Extract country specific national MSM + PWID prevalence
   iso3 <- options$area_scope
 
-  msm_pwid_prev <- naomi.resources::load_agyw_exdata("kp_estimates", iso3) %>%
+  msm_pwid_prev <- naomi.resources::load_shipp_exdata("kp_estimates", iso3) %>%
     dplyr::filter(indicator == "prevalence", kp %in% c("MSM", "PWID"))
 
  # KP population prevalence
@@ -987,7 +987,7 @@ agyw_calculate_prevalence_male <- function(naomi_output,
   calculate_prevalence <- function(x, iso3){
 
     # Log odds ratio from SRB group survey prevalence
-    lor <- naomi.resources:::load_agyw_exdata("srb_survey_lor", iso3) %>%
+    lor <- naomi.resources:::load_shipp_exdata("srb_survey_lor", iso3) %>%
       dplyr::filter(sex == "male")
 
     lor_15to29 <- lor$lor_15to29
@@ -1089,15 +1089,15 @@ logit_scale_prev <- function(lor, N_fine, plhiv) {
 #'
 #' @param naomi_output Naomi output.
 #' @param options Naomi model options.
-#' @param female_srb FSW adjusted estimates of female sexual risk groups generated by `agyw_adjust_sexbehav_fsw()`.
-#' @param female_logit_prevalence Risk adjusted estimates of female prevalence in sexual risk groups generated by `agyw_calculate_prevalence_female()`.
+#' @param female_srb FSW adjusted estimates of female sexual risk groups generated by `shipp_adjust_sexbehav_fsw()`.
+#' @param female_logit_prevalence Risk adjusted estimates of female prevalence in sexual risk groups generated by `shipp_calculate_prevalence_female()`.
 #' @param survey_year Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
 #' model of sexual behaviour fitted to all countries has the most data for in roughly 2018.
 #'
-#' @return Wide format output required for the AGYW workbook.
+#' @return Wide format output required for the SHIPP workbook.
 #' @keywords internal
 
-agyw_calculate_incidence_female <- function(naomi_output,
+shipp_calculate_incidence_female <- function(naomi_output,
                                             options,
                                             female_srb,
                                             female_logit_prevalence,
@@ -1332,14 +1332,14 @@ agyw_calculate_incidence_female <- function(naomi_output,
 #'
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
-#' @param male_srb MSM and PWID adjusted estimated of male SRB groups generated by `agyw_adjust_sexbehav_msm_pwid()`.
-#' @param male_logit_prevalence Risk adjusted estimates of male HIV prevalence in sexual risk groups generated by `agyw_calculate_prevalence_male()`.
+#' @param male_srb MSM and PWID adjusted estimated of male SRB groups generated by `shipp_adjust_sexbehav_msm_pwid()`.
+#' @param male_logit_prevalence Risk adjusted estimates of male HIV prevalence in sexual risk groups generated by `shipp_calculate_prevalence_male()`.
 #' @param survey_year Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
 #' model of sexual behaviour fitted to all countries has the most data for in roughly 2018.
 
-#' @return Wide format output required for the AGYW workbook
+#' @return Wide format output required for the SHIPP workbook
 
-agyw_calculate_incidence_male <- function(naomi_output,
+shipp_calculate_incidence_male <- function(naomi_output,
                                           options,
                                           male_srb,
                                           male_logit_prevalence,
@@ -1604,20 +1604,20 @@ agyw_calculate_incidence_male <- function(naomi_output,
 }
 
 
-#' Generate outputs to update AGYW tool.
+#' Generate outputs to update SHIPP tool.
 #'
 #' @param naomi_output Path to naomi output (zip file or hintr object).
 #' @param pjnz Path to spectrum file.
-#' @param male_srb Estimates of male sexual risk groups generated by `agyw_adjust_sexbehav_msm_pwid()`
-#' @param male_logit_prevalence Risk adjusted estimates of male prevalence in sexual risk groups generated by `agyw_calculate_prevalence_male()`
+#' @param male_srb Estimates of male sexual risk groups generated by `shipp_adjust_sexbehav_msm_pwid()`
+#' @param male_logit_prevalence Risk adjusted estimates of male prevalence in sexual risk groups generated by `shipp_calculate_prevalence_male()`
 #' @param survey_year Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
 #' model of sexual behaviour fitted to all countries has the most data for in roughly 2018.
 #'
-#' @return Output files to update AGYW excel workbook.
+#' @return Output files to update SHIPP excel workbook.
 #' @keywords internal
 
 
-agyw_generate_risk_populations <- function(naomi_output,
+shipp_generate_risk_populations <- function(naomi_output,
                                            pjnz,
                                            survey_year = 2018) {
 
@@ -1635,12 +1635,12 @@ agyw_generate_risk_populations <- function(naomi_output,
     options <- outputs$fit$model_options
   }
 
-  # Check for concordence between area_ids in agyw resources from `naomi.resources`
+  # Check for concordence between area_ids in shipp resources from `naomi.resources`
   #  and Naomi estimates
-  assert_agyw_resource_hierarchy(outputs, options)
+  assert_shipp_resource_hierarchy(outputs, options)
 
   # Format naomi output
-  naomi <- agyw_format_naomi(outputs, options)
+  naomi <- shipp_format_naomi(outputs, options)
 
   # Naomi population
   naomi_pop <- naomi$naomi_long %>%
@@ -1652,22 +1652,22 @@ agyw_generate_risk_populations <- function(naomi_output,
 
   # Disaggregate KP PSEs from Oli's analysis to 5-year bands
   kp_consensus <- extract_kp_workbook(pjnz)
-  fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
-  pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
-  msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
+  fsw_est <- shipp_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
+  pwid_est <- shipp_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
+  msm_est <- shipp_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
 
   # Adjust SAE model output with KP proportions
-  female_srb <- agyw_adjust_sexbehav_fsw(outputs, options, fsw_est)
-  male_srb <- agyw_adjust_sexbehav_msm_pwid(outputs, options, msm_est, pwid_est)
+  female_srb <- shipp_adjust_sexbehav_fsw(outputs, options, fsw_est)
+  male_srb <- shipp_adjust_sexbehav_msm_pwid(outputs, options, msm_est, pwid_est)
 
   # Calculate risk group prevalence
-  female_logit_prevalence <- agyw_calculate_prevalence_female(naomi$naomi_long,
+  female_logit_prevalence <- shipp_calculate_prevalence_female(naomi$naomi_long,
                                                               options,
                                                               fsw_est,
                                                               female_srb,
                                                               survey_year)
 
-  male_logit_prevalence <- agyw_calculate_prevalence_male(naomi$naomi_long,
+  male_logit_prevalence <- shipp_calculate_prevalence_male(naomi$naomi_long,
                                                           outputs$meta_area,
                                                           options,
                                                           msm_est,
@@ -1675,14 +1675,14 @@ agyw_generate_risk_populations <- function(naomi_output,
                                                           survey_year)
 
   # Calculate risk group incidence
-  female_incidence <- agyw_calculate_incidence_female(naomi$naomi_long,
+  female_incidence <- shipp_calculate_incidence_female(naomi$naomi_long,
                                                       options,
                                                       female_srb,
                                                       female_logit_prevalence,
                                                       survey_year,
                                                       kp_consensus)
 
-  male_incidence <- agyw_calculate_incidence_male(naomi$naomi_long,
+  male_incidence <- shipp_calculate_incidence_male(naomi$naomi_long,
                                                   options,
                                                   male_srb,
                                                   male_logit_prevalence,
@@ -1705,8 +1705,8 @@ agyw_generate_risk_populations <- function(naomi_output,
 
 }
 
-#' Throw warning when area hierarchy in external AGYW resources read in from
-#'  `naomi.resources` do not match Naomi outputs used to update AGYW estimates.
+#' Throw warning when area hierarchy in external SHIPP resources read in from
+#'  `naomi.resources` do not match Naomi outputs used to update SHIPP estimates.
 #'
 #'  @param outputs Naomi outputs.
 #'  @param options Naomi options.
@@ -1714,18 +1714,18 @@ agyw_generate_risk_populations <- function(naomi_output,
 #'  @keywords internal
 
 
-assert_agyw_resource_hierarchy <- function(outputs,
+assert_shipp_resource_hierarchy <- function(outputs,
                                            options){
 
   # iso3 from model options
   iso3 <- options$area_scope
 
   # KP PSE's
-  pse <- naomi.resources::load_agyw_exdata("kp_estimates", iso3)
+  pse <- naomi.resources::load_shipp_exdata("kp_estimates", iso3)
 
   # SRB SAE model estimates
-  female_srb <- naomi.resources::load_agyw_exdata("srb_female", options$area_scope)
-  male_srb <- naomi.resources::load_agyw_exdata("srb_male", options$area_scope)
+  female_srb <- naomi.resources::load_shipp_exdata("srb_female", options$area_scope)
+  male_srb <- naomi.resources::load_shipp_exdata("srb_male", options$area_scope)
 
   # Naomi area_ids at lowest admin-level
   naomi_hierarchy <- outputs$meta_area %>% dplyr::filter(area_level == options$area_level)
@@ -1737,39 +1737,39 @@ assert_agyw_resource_hierarchy <- function(outputs,
 
   if(length(pse_diff) != 0 ){
 
-    stop(paste0(t_("AGYW_ERROR_KP_PREFIX"),
+    stop(paste0(t_("SHIPP_ERROR_KP_PREFIX"),
              paste0(unique(pse$area_id), collapse = "; "),
-             t_("AGYW_ERROR_NAOMI_MISMATCH"),
+             t_("SHIPP_ERROR_NAOMI_MISMATCH"),
              paste0(unique(naomi_ids), collapse = "; "),
-             t_("AGYW_ERROR_CONTACT_SUPPORT")))
+             t_("SHIPP_ERROR_CONTACT_SUPPORT")))
   }
 
   if(length(female_srb_diff) != 0 ){
 
-    stop(paste0(t_("AGYW_ERROR_FSRB_PREFIX"),
+    stop(paste0(t_("SHIPP_ERROR_FSRB_PREFIX"),
                 paste0(unique(pse$area_id), collapse = "; "),
-                t_("AGYW_ERROR_NAOMI_MISMATCH"),
+                t_("SHIPP_ERROR_NAOMI_MISMATCH"),
                 paste0(unique(naomi_ids), collapse = "; "),
-                t_("AGYW_ERROR_CONTACT_SUPPORT")))
+                t_("SHIPP_ERROR_CONTACT_SUPPORT")))
   }
 
   if(length(male_srb_diff) != 0 ){
 
-    stop(paste0(t_("AGYW_ERROR_MSRB_PREFIX"),
+    stop(paste0(t_("SHIPP_ERROR_MSRB_PREFIX"),
                 paste0(unique(pse$area_id), collapse = "; "),
-                t_("AGYW_ERROR_NAOMI_MISMATCH"),
+                t_("SHIPP_ERROR_NAOMI_MISMATCH"),
                 paste0(unique(naomi_ids), collapse = "; "),
-                t_("AGYW_ERROR_CONTACT_SUPPORT")))
+                t_("SHIPP_ERROR_CONTACT_SUPPORT")))
   }
 
 }
 
-write_agyw_workbook <- function(sheets, dest) {
-  template_path <- naomi.resources::get_agyw_workbook_path()
+write_shipp_workbook <- function(sheets, dest) {
+  template_path <- naomi.resources::get_shipp_workbook_path()
   withCallingHandlers(
     write_xlsx_sheets(template_path, sheets, dest),
     error = function(e) {
-      e$message <- t_("AGYW_ERROR_WRITE", list(message = e$message))
+      e$message <- t_("SHIPP_ERROR_WRITE", list(message = e$message))
       stop(e)
     })
 }
diff --git a/R/test-helpers.R b/R/test-helpers.R
index 042949d4..acd6e7a9 100644
--- a/R/test-helpers.R
+++ b/R/test-helpers.R
@@ -5,9 +5,9 @@
 #' @param naomi_output Calibrated naomi output
 #'
 #' @return Calibrated naomi output matched to MWI test data on
-#'   `naomi.resources` to be used to generate the agyw tool.
+#'   `naomi.resources` to be used to generate the shipp tool.
 #' @keywords internal
-make_agyw_testfiles <- function(naomi_output) {
+make_shipp_testfiles <- function(naomi_output) {
   # Create naomi outputs align with testing data in naomi.resources:
   #   - Change iso3 to "MWI_demo"
   #   - Restrict outputs to admin2
@@ -42,8 +42,8 @@ make_agyw_testfiles <- function(naomi_output) {
   naomi:::hintr_save(demo, out_demo)
 
   # Add to existing hintr_test data
-  agyw_output_demo <- naomi_output
-  agyw_output_demo$model_output_path <- out_demo
+  shipp_output_demo <- naomi_output
+  shipp_output_demo$model_output_path <- out_demo
 
-  agyw_output_demo
+  shipp_output_demo
 }
diff --git a/inst/traduire/en-translation.json b/inst/traduire/en-translation.json
index 0186d3d8..6ed50714 100644
--- a/inst/traduire/en-translation.json
+++ b/inst/traduire/en-translation.json
@@ -91,7 +91,7 @@
     "PROGRESS_DOWNLOAD_COARSE": "Generating coarse-output download",
     "PROGRESS_DOWNLOAD_SUMMARY": "Generating summary report",
     "PROGRESS_DOWNLOAD_COMPARISON": "Generating comparison report",
-    "PROGRESS_DOWNLOAD_AGYW": "Generating AGYW tool",
+    "PROGRESS_DOWNLOAD_SHIPP": "Generating SHIPP tool",
     "PLHIV_MAP_PLOT_TITLE": "People living with HIV (15+)",
     "ART_CURRENT_MAP_PLOT_TITLE": "Residents receiving ART (15+)",
     "INFECTIONS_MAP_PLOT_TITLE": "Annual HIV infections (15+)",
@@ -275,13 +275,13 @@
     "DOWNLOAD_OUTPUT_DESCRIPTION": "Naomi output uploaded from Naomi web app",
     "DOWNLOAD_SUMMARY_DESCRIPTION": "Naomi summary report uploaded from Naomi web app",
     "DOWNLOAD_COMPARISON_DESCRIPTION": "Naomi comparison report uploaded from Naomi web app",
-    "DOWNLOAD_AGYW_DESCRIPTION": "Naomi AGYW tool uploaded from Naomi web app",
-    "AGYW_ERROR_KP_PREFIX": "Available KP PSE estimates for: \n",
-    "AGYW_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
-    "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
-    "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
-    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
-    "AGYW_ERROR_WRITE": "Failed to build workbook, please contact support: {{message}}",
+    "DOWNLOAD_SHIPP_DESCRIPTION": "Naomi SHIPP tool uploaded from Naomi web app",
+    "SHIPP_ERROR_KP_PREFIX": "Available KP PSE estimates for: \n",
+    "SHIPP_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
+    "SHIPP_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
+    "SHIPP_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
+    "SHIPP_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
+    "SHIPP_ERROR_WRITE": "Failed to build workbook, please contact support: {{message}}",
     "FSW_CONSENSUS_WARNING": "Consensus estimate for FSW is larger than 5% of total females aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "MSM_CONSENSUS_WARNING": "Consensus estimate for MSM is larger than 5% of total males aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "PWID_CONSENSUS_WARNING": "Consensus estimate for PWID is larger than 5% of total males aged 15-59. Modelled estimates for PWID PSEs will be used."
diff --git a/inst/traduire/fr-translation.json b/inst/traduire/fr-translation.json
index 02f7abb7..ae34a03f 100644
--- a/inst/traduire/fr-translation.json
+++ b/inst/traduire/fr-translation.json
@@ -139,6 +139,7 @@
     "PROGRESS_DOWNLOAD_COARSE": "Génération d'un téléchargement à sortie grossière",
     "PROGRESS_DOWNLOAD_SUMMARY": "Générer un rapport de synthèse",
     "PROGRESS_DOWNLOAD_COMPARISON": "Générer un rapport de comparaison",
+    "PROGRESS_DOWNLOAD_SHIPP": "Generating SHIPP tool",
     "CANNOT_RECALIBRATE": "L'étalonnage ne peut pas être relancé pour cet ajustement de modèle, veuillez relancer l'étape d'ajustement.",
     "INVALID_ART_TIME_PERIOD": "Données trimestrielles non fournies pour tous les désagrégats ou données annuelles dupliquées fournies pour tous les désagrégats.",
     "ART_TOTAL": "Numéros sur TARV",
@@ -274,12 +275,13 @@
     "DOWNLOAD_OUTPUT_DESCRIPTION": "Paquet Naomi téléchargée depuis l'application web Naomi",
     "DOWNLOAD_SUMMARY_DESCRIPTION": "Rapport de synthèse Naomi téléchargé depuis l'application web Naomi",
     "DOWNLOAD_COMPARISON_DESCRIPTION": "Rapport de comparaison Naomi téléchargé à partir de l'application web Naomi",
-    "AGYW_ERROR_KP_PREFIX": " Available KP PSE estimates for: \n",
-    "AGYW_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
-    "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
-    "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
-    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
-    "AGYW_ERROR_WRITE": "Échec de la création du classeur, veuillez contacter l'assistance: {{message}}",
+    "DOWNLOAD_SHIPP_DESCRIPTION": "Naomi SHIPP tool uploaded from Naomi web app",
+    "SHIPP_ERROR_KP_PREFIX": " Available KP PSE estimates for: \n",
+    "SHIPP_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
+    "SHIPP_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
+    "SHIPP_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
+    "SHIPP_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
+    "SHIPP_ERROR_WRITE": "Échec de la création du classeur, veuillez contacter l'assistance: {{message}}",
     "FSW_CONSENSUS_WARNING": "Consensus estimate for FSW is larger than 5% of total females aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "MSM_CONSENSUS_WARNING": "Consensus estimate for MSM is larger than 5% of total males aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "PWID_CONSENSUS_WARNING": "Consensus estimate for PWID is larger than 5% of total males aged 15-59. Modelled estimates for PWID PSEs will be used."
diff --git a/inst/traduire/pt-translation.json b/inst/traduire/pt-translation.json
index 2490040e..c23747e5 100644
--- a/inst/traduire/pt-translation.json
+++ b/inst/traduire/pt-translation.json
@@ -139,6 +139,7 @@
     "PROGRESS_DOWNLOAD_COARSE": "Geração de resultados grosseiros",
     "PROGRESS_DOWNLOAD_SUMMARY": "Elaboração de relatório de síntese",
     "PROGRESS_DOWNLOAD_COMPARISON": "Elaboração de relatório de comparação",
+    "PROGRESS_DOWNLOAD_SHIPP": "Generating SHIPP tool",
     "CANNOT_RECALIBRATE": "A calibração não pode ser executada de novo para este modelo de ajuste, por favor, execute de novo o passo de ajuste.",
     "INVALID_ART_TIME_PERIOD": "Dados trimestrais não fornecidos para todos os desagregados ou dados anuais duplicados fornecidos para todos os desagregados.",
     "ART_TOTAL": "Contagem de TARV",
@@ -274,12 +275,13 @@
     "DOWNLOAD_OUTPUT_DESCRIPTION": "Pacote Naomi descarregado a partir da aplicação web Naomi",
     "DOWNLOAD_SUMMARY_DESCRIPTION": "Relatório de síntese da Naomi carregado da aplicação web Naomi",
     "DOWNLOAD_COMPARISON_DESCRIPTION": "Relatório de comparação Naomi carregado a partir da aplicação web Naomi",
-    "AGYW_ERROR_KP_PREFIX": " Available KP PSE estimates for: \n",
-    "AGYW_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
-    "AGYW_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
-    "AGYW_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
-    "AGYW_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
-    "AGYW_ERROR_WRITE": "Falha ao criar a pasta de trabalho, entre em contato com o suporte: {{message}}",
+    "DOWNLOAD_SHIPP_DESCRIPTION": "Naomi SHIPP tool uploaded from Naomi web app",
+    "SHIPP_ERROR_KP_PREFIX": " Available KP PSE estimates for: \n",
+    "SHIPP_ERROR_FSRB_PREFIX": " Available female sexual risk behaviour survey estimates for: \n",
+    "SHIPP_ERROR_MSRB_PREFIX": " Available male sexual risk behaviour survey estimates for: \n",
+    "SHIPP_ERROR_NAOMI_MISMATCH": "\n\n Do not match Naomi estimates for: \n",
+    "SHIPP_ERROR_CONTACT_SUPPORT": "\n\nTo update estimates, please contact Naomi support.",
+    "SHIPP_ERROR_WRITE": "Falha ao criar a pasta de trabalho, entre em contato com o suporte: {{message}}",
     "FSW_CONSENSUS_WARNING": "Consensus estimate for FSW is larger than 5% of total females aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "MSM_CONSENSUS_WARNING": "Consensus estimate for MSM is larger than 5% of total males aged 15-59. Modelled estimates for FWS PSEs will be used.",
     "PWID_CONSENSUS_WARNING": "Consensus estimate for PWID is larger than 5% of total males aged 15-59. Modelled estimates for PWID PSEs will be used."
diff --git a/man/assert_agyw_resource_hierarchy.Rd b/man/assert_agyw_resource_hierarchy.Rd
deleted file mode 100644
index 77dd977e..00000000
--- a/man/assert_agyw_resource_hierarchy.Rd
+++ /dev/null
@@ -1,15 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{assert_agyw_resource_hierarchy}
-\alias{assert_agyw_resource_hierarchy}
-\title{Throw warning when area hierarchy in external AGYW resources read in from
-\code{naomi.resources} do not match Naomi outputs used to update AGYW estimates.}
-\usage{
-assert_agyw_resource_hierarchy(outputs, options)
-}
-\description{
-@param outputs Naomi outputs.
-@param options Naomi options.
-@return Resource hierarchy
-@keywords internal
-}
diff --git a/man/assert_shipp_resource_hierarchy.Rd b/man/assert_shipp_resource_hierarchy.Rd
new file mode 100644
index 00000000..f4e3f133
--- /dev/null
+++ b/man/assert_shipp_resource_hierarchy.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/shipp.R
+\name{assert_shipp_resource_hierarchy}
+\alias{assert_shipp_resource_hierarchy}
+\title{Throw warning when area hierarchy in external SHIPP resources read in from
+\code{naomi.resources} do not match Naomi outputs used to update SHIPP estimates.}
+\usage{
+assert_shipp_resource_hierarchy(outputs, options)
+}
+\description{
+@param outputs Naomi outputs.
+@param options Naomi options.
+@return Resource hierarchy
+@keywords internal
+}
diff --git a/man/calculate_ywkp_pr_lor.Rd b/man/calculate_ywkp_pr_lor.Rd
index 7162ca58..b481a540 100644
--- a/man/calculate_ywkp_pr_lor.Rd
+++ b/man/calculate_ywkp_pr_lor.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
+% Please edit documentation in R/shipp.R
 \name{calculate_ywkp_pr_lor}
 \alias{calculate_ywkp_pr_lor}
 \title{Calculate YWKP prevalence ratio and log odds ratio}
diff --git a/man/hintr_prepare_agyw_download.Rd b/man/hintr_prepare_shipp_download.Rd
similarity index 57%
rename from man/hintr_prepare_agyw_download.Rd
rename to man/hintr_prepare_shipp_download.Rd
index ecf3048f..3d5d40fd 100644
--- a/man/hintr_prepare_agyw_download.Rd
+++ b/man/hintr_prepare_shipp_download.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/downloads.R
-\name{hintr_prepare_agyw_download}
-\alias{hintr_prepare_agyw_download}
-\title{Prepare AGYW tool download}
+\name{hintr_prepare_shipp_download}
+\alias{hintr_prepare_shipp_download}
+\title{Prepare SHIPP tool download}
 \usage{
-hintr_prepare_agyw_download(output, pjnz, path = tempfile(fileext = ".xlsx"))
+hintr_prepare_shipp_download(output, pjnz, path = tempfile(fileext = ".xlsx"))
 }
 \arguments{
 \item{pjnz}{Path to input PJNZ file}
@@ -17,5 +17,5 @@ hintr_prepare_agyw_download(output, pjnz, path = tempfile(fileext = ".xlsx"))
 Path to output file and metadata for file
 }
 \description{
-Prepare AGYW tool download
+Prepare SHIPP tool download
 }
diff --git a/man/logit_scale_prev.Rd b/man/logit_scale_prev.Rd
index 9cc2418e..8f3f7feb 100644
--- a/man/logit_scale_prev.Rd
+++ b/man/logit_scale_prev.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
+% Please edit documentation in R/shipp.R
 \name{logit_scale_prev}
 \alias{logit_scale_prev}
 \title{Calculate prevalence and PLHIV using logit-scale disaggregation}
diff --git a/man/make_agyw_testfiles.Rd b/man/make_shipp_testfiles.Rd
similarity index 69%
rename from man/make_agyw_testfiles.Rd
rename to man/make_shipp_testfiles.Rd
index 8a1396b7..67c3831e 100644
--- a/man/make_agyw_testfiles.Rd
+++ b/man/make_shipp_testfiles.Rd
@@ -1,17 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/test-helpers.R
-\name{make_agyw_testfiles}
-\alias{make_agyw_testfiles}
+\name{make_shipp_testfiles}
+\alias{make_shipp_testfiles}
 \title{Build JSON from template and a set of params}
 \usage{
-make_agyw_testfiles(naomi_output)
+make_shipp_testfiles(naomi_output)
 }
 \arguments{
 \item{naomi_output}{Calibrated naomi output}
 }
 \value{
 Calibrated naomi output matched to MWI test data on
-\code{naomi.resources} to be used to generate the agyw tool.
+\code{naomi.resources} to be used to generate the shipp tool.
 }
 \description{
 Build JSON from template and a set of params
diff --git a/man/odds.Rd b/man/odds.Rd
index 2cda6442..2b7ca872 100644
--- a/man/odds.Rd
+++ b/man/odds.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
+% Please edit documentation in R/shipp.R
 \name{odds}
 \alias{odds}
 \title{Calculate the odds}
diff --git a/man/agyw_adjust_sexbehav_fsw.Rd b/man/shipp_adjust_sexbehav_fsw.Rd
similarity index 80%
rename from man/agyw_adjust_sexbehav_fsw.Rd
rename to man/shipp_adjust_sexbehav_fsw.Rd
index 70123c37..f98b36b6 100644
--- a/man/agyw_adjust_sexbehav_fsw.Rd
+++ b/man/shipp_adjust_sexbehav_fsw.Rd
@@ -1,17 +1,17 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_adjust_sexbehav_fsw}
-\alias{agyw_adjust_sexbehav_fsw}
+% Please edit documentation in R/shipp.R
+\name{shipp_adjust_sexbehav_fsw}
+\alias{shipp_adjust_sexbehav_fsw}
 \title{Adjust female sexual behavior risk groups by FSW proportions}
 \usage{
-agyw_adjust_sexbehav_fsw(outputs, options, fsw_est)
+shipp_adjust_sexbehav_fsw(outputs, options, fsw_est)
 }
 \arguments{
 \item{outputs}{Naomi output.}
 
 \item{options}{Naomi model options.}
 
-\item{fsw_est}{5-year estimates of FSW PSEs generated from \code{agyw_disaggregate_fsw()}.
+\item{fsw_est}{5-year estimates of FSW PSEs generated from \code{shipp_disaggregate_fsw()}.
 
 Estimates are generated for the following groups:
 \itemize{
diff --git a/man/agyw_adjust_sexbehav_msm_pwid.Rd b/man/shipp_adjust_sexbehav_msm_pwid.Rd
similarity index 81%
rename from man/agyw_adjust_sexbehav_msm_pwid.Rd
rename to man/shipp_adjust_sexbehav_msm_pwid.Rd
index 6810429d..467b2ead 100644
--- a/man/agyw_adjust_sexbehav_msm_pwid.Rd
+++ b/man/shipp_adjust_sexbehav_msm_pwid.Rd
@@ -1,19 +1,19 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_adjust_sexbehav_msm_pwid}
-\alias{agyw_adjust_sexbehav_msm_pwid}
+% Please edit documentation in R/shipp.R
+\name{shipp_adjust_sexbehav_msm_pwid}
+\alias{shipp_adjust_sexbehav_msm_pwid}
 \title{Adjust male sexual behavior risk groups by MSM + PWID proportions}
 \usage{
-agyw_adjust_sexbehav_msm_pwid(outputs, options, msm_est, pwid_est)
+shipp_adjust_sexbehav_msm_pwid(outputs, options, msm_est, pwid_est)
 }
 \arguments{
 \item{outputs}{Naomi output.}
 
 \item{options}{Naomi model options.}
 
-\item{msm_est}{5-year estimates of MSM PSEs generated from \code{agyw__disaggregate_msm()}.}
+\item{msm_est}{5-year estimates of MSM PSEs generated from \code{shipp__disaggregate_msm()}.}
 
-\item{pwid_est}{5-year estimates of MSM PSEs generated from \code{agyw__disaggregate_pwid()}.}
+\item{pwid_est}{5-year estimates of MSM PSEs generated from \code{shipp__disaggregate_pwid()}.}
 }
 \value{
 District level estimates of male sexual risk behaviour groups
diff --git a/man/agyw_calculate_incidence_female.Rd b/man/shipp_calculate_incidence_female.Rd
similarity index 70%
rename from man/agyw_calculate_incidence_female.Rd
rename to man/shipp_calculate_incidence_female.Rd
index d063d0a7..a8352037 100644
--- a/man/agyw_calculate_incidence_female.Rd
+++ b/man/shipp_calculate_incidence_female.Rd
@@ -1,15 +1,16 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_calculate_incidence_female}
-\alias{agyw_calculate_incidence_female}
+% Please edit documentation in R/shipp.R
+\name{shipp_calculate_incidence_female}
+\alias{shipp_calculate_incidence_female}
 \title{Calculate incidence for female SRB groups.}
 \usage{
-agyw_calculate_incidence_female(
+shipp_calculate_incidence_female(
   naomi_output,
   options,
   female_srb,
   female_logit_prevalence,
-  survey_year = 2018
+  survey_year = 2018,
+  kp_consensus
 )
 }
 \arguments{
@@ -17,15 +18,15 @@ agyw_calculate_incidence_female(
 
 \item{options}{Naomi model options.}
 
-\item{female_srb}{FSW adjusted estimates of female sexual risk groups generated by \code{agyw_adjust_sexbehav_fsw()}.}
+\item{female_srb}{FSW adjusted estimates of female sexual risk groups generated by \code{shipp_adjust_sexbehav_fsw()}.}
 
-\item{female_logit_prevalence}{Risk adjusted estimates of female prevalence in sexual risk groups generated by \code{agyw_calculate_prevalence_female()}.}
+\item{female_logit_prevalence}{Risk adjusted estimates of female prevalence in sexual risk groups generated by \code{shipp_calculate_prevalence_female()}.}
 
 \item{survey_year}{Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
 model of sexual behaviour fitted to all countries has the most data for in roughly 2018.}
 }
 \value{
-Wide format output required for the AGYW workbook.
+Wide format output required for the SHIPP workbook.
 }
 \description{
 While maintaining age/sex/district-specific HIV incidence from Naomi, distribute
diff --git a/man/agyw_calculate_incidence_male.Rd b/man/shipp_calculate_incidence_male.Rd
similarity index 67%
rename from man/agyw_calculate_incidence_male.Rd
rename to man/shipp_calculate_incidence_male.Rd
index 1b7c4f05..af301638 100644
--- a/man/agyw_calculate_incidence_male.Rd
+++ b/man/shipp_calculate_incidence_male.Rd
@@ -1,23 +1,24 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_calculate_incidence_male}
-\alias{agyw_calculate_incidence_male}
+% Please edit documentation in R/shipp.R
+\name{shipp_calculate_incidence_male}
+\alias{shipp_calculate_incidence_male}
 \title{Calculate incidence in high risk male key populations}
 \usage{
-agyw_calculate_incidence_male(
+shipp_calculate_incidence_male(
   naomi_output,
   options,
   male_srb,
   male_logit_prevalence,
-  survey_year = 2018
+  survey_year = 2018,
+  kp_consensus
 )
 }
 \arguments{
 \item{options}{Naomi model options.}
 
-\item{male_srb}{MSM and PWID adjusted estimated of male SRB groups generated by \code{agyw_adjust_sexbehav_msm_pwid()}.}
+\item{male_srb}{MSM and PWID adjusted estimated of male SRB groups generated by \code{shipp_adjust_sexbehav_msm_pwid()}.}
 
-\item{male_logit_prevalence}{Risk adjusted estimates of male HIV prevalence in sexual risk groups generated by \code{agyw_calculate_prevalence_male()}.}
+\item{male_logit_prevalence}{Risk adjusted estimates of male HIV prevalence in sexual risk groups generated by \code{shipp_calculate_prevalence_male()}.}
 
 \item{survey_year}{Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
 model of sexual behaviour fitted to all countries has the most data for in roughly 2018.}
@@ -25,7 +26,7 @@ model of sexual behaviour fitted to all countries has the most data for in rough
 \item{outputs}{Naomi output.}
 }
 \value{
-Wide format output required for the AGYW workbook
+Wide format output required for the SHIPP workbook
 }
 \description{
 Calculate incidence in high risk male key populations
diff --git a/man/agyw_calculate_prevalence_female.Rd b/man/shipp_calculate_prevalence_female.Rd
similarity index 82%
rename from man/agyw_calculate_prevalence_female.Rd
rename to man/shipp_calculate_prevalence_female.Rd
index 16eef6a7..9ac9e421 100644
--- a/man/agyw_calculate_prevalence_female.Rd
+++ b/man/shipp_calculate_prevalence_female.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_calculate_prevalence_female}
-\alias{agyw_calculate_prevalence_female}
+% Please edit documentation in R/shipp.R
+\name{shipp_calculate_prevalence_female}
+\alias{shipp_calculate_prevalence_female}
 \title{Calculate prevalence for female SRB groups.}
 \usage{
-agyw_calculate_prevalence_female(
+shipp_calculate_prevalence_female(
   naomi_output,
   options,
   fsw_est,
@@ -17,9 +17,9 @@ agyw_calculate_prevalence_female(
 
 \item{options}{Naomi model options.}
 
-\item{fsw_est}{5-year estimates of FSW PSEs generated from \code{agyw_disaggregate_fse()}.}
+\item{fsw_est}{5-year estimates of FSW PSEs generated from \code{shipp_disaggregate_fse()}.}
 
-\item{female_srb}{FSW adjusted estimates of female SRB groups generated by \code{agyw_adjust_sexbehav_fsw()}}
+\item{female_srb}{FSW adjusted estimates of female SRB groups generated by \code{shipp_adjust_sexbehav_fsw()}}
 
 \item{survey_year}{Year of survey to sample estimates.
 
diff --git a/man/agyw_calculate_prevalence_male.Rd b/man/shipp_calculate_prevalence_male.Rd
similarity index 83%
rename from man/agyw_calculate_prevalence_male.Rd
rename to man/shipp_calculate_prevalence_male.Rd
index fbaf3c07..0ca15b62 100644
--- a/man/agyw_calculate_prevalence_male.Rd
+++ b/man/shipp_calculate_prevalence_male.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_calculate_prevalence_male}
-\alias{agyw_calculate_prevalence_male}
+% Please edit documentation in R/shipp.R
+\name{shipp_calculate_prevalence_male}
+\alias{shipp_calculate_prevalence_male}
 \title{Calculate prevalence for male SRB groups.}
 \usage{
-agyw_calculate_prevalence_male(
+shipp_calculate_prevalence_male(
   naomi_output,
   areas,
   options,
@@ -20,9 +20,9 @@ agyw_calculate_prevalence_male(
 
 \item{options}{Naomi model options.}
 
-\item{msm_est}{5-year estimates of MSM PSEs generated from \code{agyw__disaggregate_msm()}.}
+\item{msm_est}{5-year estimates of MSM PSEs generated from \code{shipp__disaggregate_msm()}.}
 
-\item{male_srb}{MSM and PWID adjusted estimates of male SRB groups generated by \code{agyw_adjust_sexbehav_msm_pwid()}.}
+\item{male_srb}{MSM and PWID adjusted estimates of male SRB groups generated by \code{shipp_adjust_sexbehav_msm_pwid()}.}
 
 \item{survey_year}{Year of survey to sample estimates.
 
diff --git a/man/agyw_disaggregate_fsw.Rd b/man/shipp_disaggregate_fsw.Rd
similarity index 74%
rename from man/agyw_disaggregate_fsw.Rd
rename to man/shipp_disaggregate_fsw.Rd
index dfe15601..3c87611c 100644
--- a/man/agyw_disaggregate_fsw.Rd
+++ b/man/shipp_disaggregate_fsw.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_disaggregate_fsw}
-\alias{agyw_disaggregate_fsw}
+% Please edit documentation in R/shipp.R
+\name{shipp_disaggregate_fsw}
+\alias{shipp_disaggregate_fsw}
 \title{Dissagreggate admin1 FSW proportions from Oli's KP model to 5-age groups}
 \usage{
-agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
+shipp_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
 }
 \arguments{
 \item{outputs}{Naomi output.}
diff --git a/man/agyw_disaggregate_msm.Rd b/man/shipp_disaggregate_msm.Rd
similarity index 74%
rename from man/agyw_disaggregate_msm.Rd
rename to man/shipp_disaggregate_msm.Rd
index 27e56b5a..1ba051cb 100644
--- a/man/agyw_disaggregate_msm.Rd
+++ b/man/shipp_disaggregate_msm.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_disaggregate_msm}
-\alias{agyw_disaggregate_msm}
+% Please edit documentation in R/shipp.R
+\name{shipp_disaggregate_msm}
+\alias{shipp_disaggregate_msm}
 \title{Disaggregate admin1 MSM proportions from Oli's KP model to 5-age groups}
 \usage{
-agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
+shipp_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
 }
 \arguments{
 \item{outputs}{Naomi output.}
diff --git a/man/agyw_disaggregate_pwid.Rd b/man/shipp_disaggregate_pwid.Rd
similarity index 73%
rename from man/agyw_disaggregate_pwid.Rd
rename to man/shipp_disaggregate_pwid.Rd
index ee767f4a..e40ce820 100644
--- a/man/agyw_disaggregate_pwid.Rd
+++ b/man/shipp_disaggregate_pwid.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_disaggregate_pwid}
-\alias{agyw_disaggregate_pwid}
+% Please edit documentation in R/shipp.R
+\name{shipp_disaggregate_pwid}
+\alias{shipp_disaggregate_pwid}
 \title{Disaggregate admin1 PWID proportions from Oli's KP model to 5-age groups}
 \usage{
-agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
+shipp_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
 }
 \arguments{
 \item{outputs}{Naomi output.}
diff --git a/man/agyw_format_naomi.Rd b/man/shipp_format_naomi.Rd
similarity index 57%
rename from man/agyw_format_naomi.Rd
rename to man/shipp_format_naomi.Rd
index 6fc1a935..571629fe 100644
--- a/man/agyw_format_naomi.Rd
+++ b/man/shipp_format_naomi.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_format_naomi}
-\alias{agyw_format_naomi}
+% Please edit documentation in R/shipp.R
+\name{shipp_format_naomi}
+\alias{shipp_format_naomi}
 \title{Format naomi outputs for PSE tool}
 \usage{
-agyw_format_naomi(outputs, options)
+shipp_format_naomi(outputs, options)
 }
 \arguments{
 \item{outputs}{Naomi output}
@@ -12,7 +12,7 @@ agyw_format_naomi(outputs, options)
 \item{options}{Naomi model options.}
 }
 \value{
-Naomi indicators formatted for the AGYW workbook.
+Naomi indicators formatted for the SHIPP workbook.
 }
 \description{
 Format naomi outputs for PSE tool
diff --git a/man/agyw_generate_risk_populations.Rd b/man/shipp_generate_risk_populations.Rd
similarity index 60%
rename from man/agyw_generate_risk_populations.Rd
rename to man/shipp_generate_risk_populations.Rd
index 11128ab8..3e7925d2 100644
--- a/man/agyw_generate_risk_populations.Rd
+++ b/man/shipp_generate_risk_populations.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agyw-integration.R
-\name{agyw_generate_risk_populations}
-\alias{agyw_generate_risk_populations}
-\title{Generate outputs to update AGYW tool.}
+% Please edit documentation in R/shipp.R
+\name{shipp_generate_risk_populations}
+\alias{shipp_generate_risk_populations}
+\title{Generate outputs to update SHIPP tool.}
 \usage{
-agyw_generate_risk_populations(naomi_output, pjnz, survey_year = 2018)
+shipp_generate_risk_populations(naomi_output, pjnz, survey_year = 2018)
 }
 \arguments{
 \item{naomi_output}{Path to naomi output (zip file or hintr object).}
@@ -14,14 +14,14 @@ agyw_generate_risk_populations(naomi_output, pjnz, survey_year = 2018)
 \item{survey_year}{Survey year to sample from the SAE model. Default is 2018. Survey year should be updated to most current household survey in the country - for countries without recent household surveys, leave at 2018 - the spatiotemporal
 model of sexual behaviour fitted to all countries has the most data for in roughly 2018.}
 
-\item{male_srb}{Estimates of male sexual risk groups generated by \code{agyw_adjust_sexbehav_msm_pwid()}}
+\item{male_srb}{Estimates of male sexual risk groups generated by \code{shipp_adjust_sexbehav_msm_pwid()}}
 
-\item{male_logit_prevalence}{Risk adjusted estimates of male prevalence in sexual risk groups generated by \code{agyw_calculate_prevalence_male()}}
+\item{male_logit_prevalence}{Risk adjusted estimates of male prevalence in sexual risk groups generated by \code{shipp_calculate_prevalence_male()}}
 }
 \value{
-Output files to update AGYW excel workbook.
+Output files to update SHIPP excel workbook.
 }
 \description{
-Generate outputs to update AGYW tool.
+Generate outputs to update SHIPP tool.
 }
 \keyword{internal}
diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R
index 4ce9d1a0..768f0037 100644
--- a/tests/testthat/test-downloads.R
+++ b/tests/testthat/test-downloads.R
@@ -193,15 +193,15 @@ test_that("comparison report download can be created", {
 })
 
 
-test_that("AGYW download can be created", {
+test_that("SHIPP download can be created", {
 
-  agyw_output_demo <- make_agyw_testfiles(a_hintr_output_calibrated)
+  shipp_output_demo <- make_shipp_testfiles(a_hintr_output_calibrated)
 
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
 
   with_mocked_bindings(
     messages <- naomi_evaluate_promise(
-      out <- hintr_prepare_agyw_download(agyw_output_demo,
+      out <- hintr_prepare_shipp_download(shipp_output_demo,
                                          a_hintr_data$pjnz)),
     new_simple_progress = mock_new_simple_progress)
 
@@ -220,24 +220,24 @@ test_that("AGYW download can be created", {
 
   ## Progress messages printed
   expect_length(messages$progress, 1)
-  expect_equal(messages$progress[[1]]$message, "Generating AGYW tool")
+  expect_equal(messages$progress[[1]]$message, "Generating SHIPP tool")
 
-  # Test agyw workbook with no kp workbook saved into spectrum
-  risk_prop <- agyw_generate_risk_populations(agyw_output_demo$model_output_path,
+  # Test shipp workbook with no kp workbook saved into spectrum
+  risk_prop <- shipp_generate_risk_populations(shipp_output_demo$model_output_path,
                                               a_hintr_data$pjnz)
 
   expect_equal(risk_prop$meta_consensus,
                data.frame(kp = c("FSW", "MSM", "PWID"),
                           consensus_estimate = NA))
 
-  # Test agyw workbook with mock workbook saved into spectrum
+  # Test shipp workbook with mock workbook saved into spectrum
   kp_consensus <- readRDS(file.path("testdata/kp_workbook_spectrum.rds"))
   mock_extract_kp_workbook <- mockery::mock(kp_consensus)
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
 
   with_mocked_bindings(
-    risk_prop_scaled <- agyw_generate_risk_populations(
-      agyw_output_demo$model_output_path, a_hintr_data$pjnz),
+    risk_prop_scaled <- shipp_generate_risk_populations(
+      shipp_output_demo$model_output_path, a_hintr_data$pjnz),
     new_simple_progress = mock_new_simple_progress,
     extract_kp_workbook = mock_extract_kp_workbook
   )
@@ -248,10 +248,10 @@ test_that("AGYW download can be created", {
                           consensus_estimate = c(40000, 35500, 5000)))
 
   # Test that PSE tool adjusted to KP consensus estimates correctly
-  model_object <- read_hintr_output(agyw_output_demo$model_output_path)
+  model_object <- read_hintr_output(shipp_output_demo$model_output_path)
   outputs <- model_object$output_package
   options <- outputs$fit$model_options
-  naomi <- agyw_format_naomi(outputs, options)
+  naomi <- shipp_format_naomi(outputs, options)
 
   # Naomi population
   naomi_pop <- naomi$naomi_long %>%
@@ -263,9 +263,9 @@ test_that("AGYW download can be created", {
 
   # KP PSEs adjusted to consensus estimates when consensus estimates are
   #  < 5% of age matched population  denominator
-  fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
-  pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
-  msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
+  fsw_est <- shipp_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus)
+  pwid_est <- shipp_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus)
+  msm_est <- shipp_disaggregate_msm(outputs, options, naomi_pop, kp_consensus)
 
   fsw <- sum(fsw_est$fsw)
   pwid <- sum(pwid_est$pwid)
@@ -283,8 +283,8 @@ test_that("AGYW download can be created", {
   mock_new_simple_progress <- mockery::mock(MockSimpleProgress$new())
 
   with_mocked_bindings(
-    risk_prop_scaled <- agyw_generate_risk_populations(
-      agyw_output_demo$model_output_path, a_hintr_data$pjnz),
+    risk_prop_scaled <- shipp_generate_risk_populations(
+      shipp_output_demo$model_output_path, a_hintr_data$pjnz),
     new_simple_progress = mock_new_simple_progress,
     extract_kp_workbook = mock_extract_kp_workbook
   )
@@ -298,9 +298,9 @@ test_that("AGYW download can be created", {
   # KP PSEs use default proportions from Oli's mode when consensus estimates are
   #  >= 5% of age matched population  denominator
 
-  fsw_est <- agyw_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus_bad)
-  pwid_est <- agyw_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus_bad)
-  msm_est <- agyw_disaggregate_msm(outputs, options, naomi_pop, kp_consensus_bad)
+  fsw_est <- shipp_disaggregate_fsw(outputs, options, naomi_pop, kp_consensus_bad)
+  pwid_est <- shipp_disaggregate_pwid(outputs, options, naomi_pop, kp_consensus_bad)
+  msm_est <- shipp_disaggregate_msm(outputs, options, naomi_pop, kp_consensus_bad)
 
   fsw <- sum(fsw_est$fsw)
   pwid <- sum(pwid_est$pwid)
@@ -311,7 +311,7 @@ test_that("AGYW download can be created", {
 })
 
 
-test_that("Error thrown when AGYW resources are out of date", {
+test_that("Error thrown when SHIPP resources are out of date", {
 
   kp_error <- paste0("Available KP PSE estimates for: \n",
                      "MWI_1_1; MWI_1_2; MWI_1_3",
@@ -319,7 +319,7 @@ test_that("Error thrown when AGYW resources are out of date", {
                      "MWI_2_1_demo; MWI_2_2_demo; MWI_2_3_demo; MWI_2_4_demo; MWI_2_5_demo",
                      "\n\nTo update estimates, please contact Naomi support.")
 
- expect_error(hintr_prepare_agyw_download(a_hintr_output_calibrated,
+ expect_error(hintr_prepare_shipp_download(a_hintr_output_calibrated,
                                           a_hintr_data$pjnz), kp_error)
 
 })
@@ -341,6 +341,6 @@ test_that("output description is translated", {
 test_that("failing to write data into xlsx sheet gives a useful error", {
   sheets_to_write <- list(x = data.frame(x = c(1, 2, 3)))
   dest <- tempfile()
-  expect_error(write_agyw_workbook(sheets_to_write, dest),
+  expect_error(write_shipp_workbook(sheets_to_write, dest),
                "Failed to build workbook, please contact support: Sheet 'x' does not exist")
 })

From 7b2ea086f13033763dd27b08c5141a48ecc0950c Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Mon, 5 Feb 2024 12:03:31 +0200
Subject: [PATCH 47/53] Scale new infection to KP workbook or GOALS

---
 R/shipp.R | 210 +++++++++++++++++++++++++-----------------------------
 1 file changed, 99 insertions(+), 111 deletions(-)

diff --git a/R/shipp.R b/R/shipp.R
index b74f1ad8..791b19f0 100644
--- a/R/shipp.R
+++ b/R/shipp.R
@@ -1181,49 +1181,52 @@ shipp_calculate_incidence_female <- function(naomi_output,
       susceptible_sexnonreg = population_sexnonreg - plhiv_sexnonreg,
       susceptible_sexpaid12m = population_sexpaid12m - plhiv_sexpaid12m,
       incidence_sexpaid12m = (incidence/100) * rr_sexpaid12m,
-      infections_sexpaid12m = susceptible_sexpaid12m * incidence_sexpaid12m,
+      infections_sexpaid12m = susceptible_sexpaid12m * incidence_sexpaid12m)
+
+  # Scale FSW new infections consensus estimate for KP Workbook or Goals
+  # Check for consensus estimate of FSW new infections
+  fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$infections
+
+  if(is.na(fsw_consensus)){
+    # If no KP workbook present, read in FSW new infections from GOALS
+    goals <- naomi.resources::load_shipp_exdata("goals", "SSA")
+    fsw_consensus <- goals[goals$iso3 == options$area_scope, ]$`fsw-new_inf`
+  }
+
+  # Sum prior count of new infections
+  fsw_sum <- sum(df1$infections_sexpaid12m)
+  # Generate a ratio to scale FSW new infections by
+  fsw_ratio <- fsw_consensus / fsw_sum
+
+  # Adjust new infections
+  df2 <- df1 %>%
+    dplyr::mutate(
+      # Adjust district-level new infections and incidence for FSW
+      infections_sexpaid12m = infections_sexpaid12m * fsw_ratio,
+      incidence_sexpaid12m = infections_sexpaid12m / susceptible_sexpaid12m,
+      # Adjust sexcohab and sexnonreg new infections and incidence to scale rest of infections
+      # from the district
+      incidence_sexcohab = (infections - infections_sexpaid12m) / (susceptible_sexcohab + rr_sexnonreg * susceptible_sexnonreg),
+      incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
+      infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
+      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
+      rr_sexpaid12m = incidence_sexpaid12m / incidence_sexcohab,
+      # Calculate incidence in rest of groups
       incidence_nosex12m = 0,
       incidence_sexcohab = (infections - infections_sexpaid12m) / (susceptible_sexcohab + rr_sexnonreg * susceptible_sexnonreg),
       incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
       infections_nosex12m = 0,
       infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
-      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg)
-
-  # Check for consensus estimate of FSW infections
-  fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$infections
-
-  if(!is.na(fsw_consensus)){
+      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg
+    )
 
-    # scale new infections if there's a KP consensus estimate
-    # sum prior count of new infections
-    fsw_sum <- sum(df1$infections_sexpaid12m)
-    # generate a ratio to scale FSW new infections by
-    fsw_ratio <- fsw_consensus / fsw_sum
-    # adjust district-level new infections and incidence for FSW
-    df1 <- df1 %>%
-      dplyr::mutate(
-        infections_sexpaid12m = infections_sexpaid12m * fsw_ratio,
-        incidence_sexpaid12m = infections_sexpaid12m / susceptible_sexpaid12m,
-      )
-    # Error here to catch that the KP adjustment has made the number of new infections
-    # in KPs greater than the estimated population susceptible
+  # Error here to catch that the KP adjustment has made the number of new infections
+  # in KPs greater than the estimated population susceptible
     if(sum(df1$incidence_sexpaid12m > 1) > 0) {
       stop("KP new infections exceeds susceptible population size. Please contact support.")
     }
-    # adjust sexcohab and sexnonreg new infections and incidence to scale rest of infections
-    # from the district
-    df1 <- df1 %>%
-      dplyr::mutate(
-        incidence_sexcohab = (infections - infections_sexpaid12m) / (susceptible_sexcohab + rr_sexnonreg * susceptible_sexnonreg),
-        incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
-        infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
-        infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
-        rr_sexpaid12m = incidence_sexpaid12m / incidence_sexcohab
-      )
-  }
 
   # Calculate risk group incidence for aggregate age groups
-
   summarise_age_cat_female <- function(dat, age_cat) {
 
     if (age_cat == "Y015_024") {age_groups <- c("Y015_019", "Y020_024")}
@@ -1276,33 +1279,34 @@ shipp_calculate_incidence_female <- function(naomi_output,
   }
 
   # Aggregate data
-  df2 <- dplyr::bind_rows(summarise_age_cat_female(df1, "Y015_024"),
-                          summarise_age_cat_female(df1, "Y025_049"),
-                          summarise_age_cat_female(df1, "Y015_049"))
+  df3 <- dplyr::bind_rows(summarise_age_cat_female(df2, "Y015_024"),
+                          summarise_age_cat_female(df2, "Y025_049"),
+                          summarise_age_cat_female(df2, "Y015_049"))
 
   # Calculate incidence
-  df3 <- dplyr::bind_rows(df1, df2) %>%
+  df4 <- dplyr::bind_rows(df2, df3) %>%
     dplyr::mutate(incidence_cat = cut(incidence,
                                       c(0, 0.3, 1, 3, 10^6),
                                       labels = c("Low", "Moderate", "High", "Very High"),
                                       include.lowest = TRUE, right = TRUE))
 
   # Check that sum of disaggregated infections is the same as total infections
-  sum_infections <- df3$infections_nosex12m + df3$infections_sexcohab + df3$infections_sexnonreg + df3$infections_sexpaid12m
+  sum_infections <- df4$infections_nosex12m + df4$infections_sexcohab + df4$infections_sexnonreg + df4$infections_sexpaid12m
 
-  if(max(df3$infections - sum_infections) > 10^{-9}){
+  if(max(df4$infections - sum_infections) > 10^{-9}){
     stop("Risk group proportions do not sum correctly. Please contact suppport.")
   }
 
   # Check that new infections are never negative in any behavioural risk group
-  sexcohab_inf_check <- sum(df3$infections_sexcohab < 0)
-  sexnonreg_inf_check <- sum(df3$infections_sexnonreg < 0)
-  sexpaid12m_inf_check <- sum(df3$infections_sexpaid12m < 0)
+  sexcohab_inf_check <- sum(df4$infections_sexcohab < 0)
+  sexnonreg_inf_check <- sum(df4$infections_sexnonreg < 0)
+  sexpaid12m_inf_check <- sum(df4$infections_sexpaid12m < 0)
+
   if(sum(sexcohab_inf_check,sexnonreg_inf_check,sexpaid12m_inf_check)>0) {
     stop("Number of new infections below 0. Please contact support.")
   }
 
-  df3 %>%
+  df4 %>%
     dplyr::mutate(concat = paste0(area_id, age_group), iso3 = options$area_scope) %>%
     dplyr::select(area_id, age_group, concat,
                   nosex12m, sexcohab, sexnonregplus, sexnonreg, sexpaid12m,
@@ -1324,8 +1328,6 @@ shipp_calculate_incidence_female <- function(naomi_output,
     dplyr::mutate_if(is.numeric, as.numeric) %>%
     dplyr::mutate_if(is.factor, as.character)
 
-
-
 }
 
 #' Calculate incidence in high risk male key populations
@@ -1416,74 +1418,60 @@ shipp_calculate_incidence_male <- function(naomi_output,
       incidence_msm = (incidence/100) * rr_msm,
       incidence_pwid = (incidence/100) * rr_pwid,
       infections_msm = susceptible_msm * incidence_msm,
-      infections_pwid = susceptible_pwid * incidence_pwid,
+      infections_pwid = susceptible_pwid * incidence_pwid)
+
+  # Scale MSM and PWID new infections consensus estimate for KP Workbook or Goals
+  # Check for consensus estimate of MSM and PWID new infections
+  msm_consensus <- kp_consensus[kp_consensus$key_population == "MSM", ]$infections
+  pwid_consensus <- kp_consensus[kp_consensus$key_population == "PWID", ]$infections
+
+  if(is.na(msm_consensus)){
+    # If no KP workbook present, read in FSW new infections from GOALS
+    goals <- naomi.resources::load_shipp_exdata("goals", "SSA")
+    msm_consensus <- goals[goals$iso3 == options$area_scope, ]$`msm-new_inf`
+  }
+
+  if(is.na(pwid_consensus)){
+    # If no KP workbook present, read in FSW new infections from GOALS
+    goals <- naomi.resources::load_shipp_exdata("goals", "SSA")
+    pwid_consensus <- goals[goals$iso3 == options$area_scope, ]$`pwid-new_inf`
+  }
+
+  # Sum prior count of new infections
+  msm_sum <- sum(df1$infections_msm)
+  pwid_sum <- sum(df1$pwid)
+
+  # Generate a ratio to scale FSW new infections by
+  msm_ratio <- msm_consensus / msm_sum
+  pwid_ratio <- pwid_consensus / pwid_sum
+
+  # Adjust new infections
+  df2 <- df1 %>%
+    dplyr::mutate(
+      # Adjust district-level new infections and incidence for MSM
+      infections_msm = infections_msm * msm_ratio,
+      incidence_msm = infections_msm / susceptible_msm,
+      # Adjust district-level new infections and incidence for PWID
+      infections_pwid = infections_pwid * pwid_ratio,
+      incidence_pwid = infections_pwid / susceptible_pwid,
+
+      # Adjust sexcohab and sexnonreg new infections and incidence to scale rest of infections
+      # from the district
       incidence_nosex12m = 0,
       incidence_sexcohab = (infections - infections_msm - infections_pwid) / (susceptible_sexcohab +
-                                           rr_sexnonreg * susceptible_sexnonreg),
+                                                                                rr_sexnonreg * susceptible_sexnonreg),
       incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
       infections_nosex12m = 0,
       infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
       infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg
-
     )
 
-  # Check for consensus estimate of MSM and PWID infections
-  male_consensus <- kp_consensus[kp_consensus$key_population %in% c("MSM","PWID"),]$infections
-
-
-  if(!anyNA(male_consensus)){
-
-    msm_consensus <- kp_consensus[kp_consensus$key_population %in% c("MSM"),]$infections
-    pwid_consensus <- kp_consensus[kp_consensus$key_population %in% c("PWID"),]$infections
-
-    # scale new infections for MSM if there's an MSM KP consensus estimate
-    if(!is.na(msm_consensus)){
-      # sum prior count of new infections
-      msm_sum <- sum(df1$infections_msm)
-      # generate a ratio to scale MSM new infections by
-      msm_ratio <- msm_consensus / msm_sum
-      # adjust district-level new infections and incidence for MSM
-      df1 <- df1 %>%
-        dplyr::mutate(
-          infections_msm = infections_msm * msm_ratio,
-          incidence_msm = infections_msm / susceptible_msm,
-        )
-    }
-
-    # scale new infections for PWID if there's a PWID KP consensus estimate
-    if(!is.na(pwid_consensus)){
-      # scale consensus that we'll use to account for the 1:10 ratio assumption of
-      # male:female PWID
-      pwid_consensus <- pwid_consensus * 0.91
-      # sum prior count of new infections
-      pwid_sum <- sum(df1$infections_pwid)
-      # generate a ratio to scale PWID new infections by
-      pwid_ratio <- pwid_consensus / pwid_sum
-      # adjust district-level new infections and incidence for MSM
-      df1 <- df1 %>%
-        dplyr::mutate(
-          infections_pwid = infections_pwid * pwid_ratio,
-          incidence_pwid = infections_pwid / susceptible_pwid,
-        )
-    }
     # Error here to catch that the KP adjustment has made the number of new infections
     # in KPs greater than the estimated population susceptible
-    if(sum(df1$incidence_msm > 1,df1$incidence_pwid > 1) > 0) {
+    if(sum(df1$incidence_msm > 1, df1$incidence_pwid > 1) > 0) {
       stop("KP new infections exceeds susceptible population size. Please contact support.")
     }
-    # adjust sexcohab and sexnonreg new infections and incidence to scale rest of infections
-    # from the district
-    df1 <- df1 %>%
-      dplyr::mutate(
-        incidence_sexcohab = (infections - infections_msm - infections_pwid) / (susceptible_sexcohab +
-                                                                                  rr_sexnonreg * susceptible_sexnonreg),
-        incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
-        infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
-        infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
-        rr_msm = incidence_msm / incidence_sexcohab,
-        rr_pwid = incidence_pwid / incidence_sexcohab
-      )
-  }
+
 
   # Calculate risk group incidence for aggregate age groups
 
@@ -1546,12 +1534,12 @@ shipp_calculate_incidence_male <- function(naomi_output,
   }
 
   # Aggregate data
-  df2 <- dplyr::bind_rows(summarise_age_cat_male(df1, "Y015_024"),
-                          summarise_age_cat_male(df1, "Y025_049"),
-                          summarise_age_cat_male(df1, "Y015_049"))
+  df3 <- dplyr::bind_rows(summarise_age_cat_male(df2, "Y015_024"),
+                          summarise_age_cat_male(df2, "Y025_049"),
+                          summarise_age_cat_male(df2, "Y015_049"))
 
   # Calculate incidence
-  df3 <- dplyr::bind_rows(df1, df2) %>%
+  df4 <- dplyr::bind_rows(df2, df3) %>%
     dplyr::mutate(incidence_cat = cut(incidence,
                                       c(0, 0.3, 1, 3, 10^6),
                                       labels = c("Low", "Moderate", "High", "Very High"),
@@ -1561,23 +1549,23 @@ shipp_calculate_incidence_male <- function(naomi_output,
 
   # Check that sum of disaggregated infections is the same as total infections
   #  TO DO: add warning for sum not matching - contact admin
-  sum_infections <- df3$infections_nosex12m + df3$infections_sexcohab + df3$infections_sexnonreg + df3$infections_msm + df3$infections_pwid
+  sum_infections <- df4$infections_nosex12m + df4$infections_sexcohab + df4$infections_sexnonreg + df4$infections_msm + df4$infections_pwid
 
-  if(max(df3$infections - sum_infections) > 10^{-9}){
+  if(max(df4$infections - sum_infections) > 10^{-9}){
     stop("Risk group proportions do not sum correctly. Please contact suppport.")
   }
 
   # Check that new infections are never negative in any behavioural risk group
-  sexcohab_inf_check <- sum(df3$infections_sexcohab < 0)
-  sexnonreg_inf_check <- sum(df3$infections_sexnonreg < 0)
-  msm_inf_check <- sum(df3$infections_msm < 0)
-  pwid_inf_check <- sum(df3$infections_pwid < 0)
+  sexcohab_inf_check <- sum(df4$infections_sexcohab < 0)
+  sexnonreg_inf_check <- sum(df4$infections_sexnonreg < 0)
+  msm_inf_check <- sum(df4$infections_msm < 0)
+  pwid_inf_check <- sum(df4$infections_pwid < 0)
   if(sum(sexcohab_inf_check,sexnonreg_inf_check,msm_inf_check,pwid_inf_check)>0) {
     stop("Number of new infections below 0. Please contact support.")
   }
 
 
-  df3 %>%
+  df4 %>%
     dplyr::mutate(concat = paste0(area_id, age_group), iso3 = options$area_scope) %>%
     dplyr::select(area_id, age_group, concat,
                   nosex12m, sexcohab, sexnonregplus, sexnonreg, msm, pwid,

From e35a4a05f02b4dcf7f5bb83f4c0b086ee5567937 Mon Sep 17 00:00:00 2001
From: Katie Risher <krisher@ic.ac.uk>
Date: Sun, 18 Feb 2024 21:24:08 -0500
Subject: [PATCH 48/53] Update SHIPP vignette + some code fixes

- Align SHIPP vignette with the methods used
- Minor code fixes for adjusting KP new infections to either KP workbook or Goals estimates under the assumption that a consensus KP new infection estimate is never missing
---
 R/shipp.R                       |  23 +++----
 vignettes/hiv-prev-workflow.Rmd | 115 +++++++++++++++-----------------
 2 files changed, 64 insertions(+), 74 deletions(-)

diff --git a/R/shipp.R b/R/shipp.R
index 791b19f0..3782049f 100644
--- a/R/shipp.R
+++ b/R/shipp.R
@@ -1204,25 +1204,19 @@ shipp_calculate_incidence_female <- function(naomi_output,
       # Adjust district-level new infections and incidence for FSW
       infections_sexpaid12m = infections_sexpaid12m * fsw_ratio,
       incidence_sexpaid12m = infections_sexpaid12m / susceptible_sexpaid12m,
-      # Adjust sexcohab and sexnonreg new infections and incidence to scale rest of infections
-      # from the district
+      # Calculate incidence in rest of groups
       incidence_sexcohab = (infections - infections_sexpaid12m) / (susceptible_sexcohab + rr_sexnonreg * susceptible_sexnonreg),
       incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
       infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
       infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
-      rr_sexpaid12m = incidence_sexpaid12m / incidence_sexcohab,
-      # Calculate incidence in rest of groups
       incidence_nosex12m = 0,
-      incidence_sexcohab = (infections - infections_sexpaid12m) / (susceptible_sexcohab + rr_sexnonreg * susceptible_sexnonreg),
-      incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
       infections_nosex12m = 0,
-      infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
-      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg
+      rr_sexpaid12m = incidence_sexpaid12m / incidence_sexcohab
     )
 
   # Error here to catch that the KP adjustment has made the number of new infections
   # in KPs greater than the estimated population susceptible
-    if(sum(df1$incidence_sexpaid12m > 1) > 0) {
+    if(sum(df2$incidence_sexpaid12m > 1) > 0) {
       stop("KP new infections exceeds susceptible population size. Please contact support.")
     }
 
@@ -1330,7 +1324,7 @@ shipp_calculate_incidence_female <- function(naomi_output,
 
 }
 
-#' Calculate incidence in high risk male key populations
+#' Calculate incidence in all behavioural groups
 #'
 #' @param outputs Naomi output.
 #' @param options Naomi model options.
@@ -1436,6 +1430,9 @@ shipp_calculate_incidence_male <- function(naomi_output,
     goals <- naomi.resources::load_shipp_exdata("goals", "SSA")
     pwid_consensus <- goals[goals$iso3 == options$area_scope, ]$`pwid-new_inf`
   }
+  # scale consensus that we'll use to account for the 1:10 ratio assumption of
+  # male:female PWID
+  pwid_consensus <- pwid_consensus * 0.91
 
   # Sum prior count of new infections
   msm_sum <- sum(df1$infections_msm)
@@ -1463,12 +1460,14 @@ shipp_calculate_incidence_male <- function(naomi_output,
       incidence_sexnonreg = incidence_sexcohab * rr_sexnonreg,
       infections_nosex12m = 0,
       infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
-      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg
+      infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
+      rr_msm = incidence_msm / incidence_sexcohab,
+      rr_pwid = incidence_pwid / incidence_sexcohab
     )
 
     # Error here to catch that the KP adjustment has made the number of new infections
     # in KPs greater than the estimated population susceptible
-    if(sum(df1$incidence_msm > 1, df1$incidence_pwid > 1) > 0) {
+    if(sum(df2$incidence_msm > 1, df2$incidence_pwid > 1) > 0) {
       stop("KP new infections exceeds susceptible population size. Please contact support.")
     }
 
diff --git a/vignettes/hiv-prev-workflow.Rmd b/vignettes/hiv-prev-workflow.Rmd
index 0fd09ecf..75c5a84b 100644
--- a/vignettes/hiv-prev-workflow.Rmd
+++ b/vignettes/hiv-prev-workflow.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "HIV prevention prioritization tool"
+title: "SHIPP tool"
 output: rmarkdown::html_vignette
 vignette: >
   %\VignetteIndexEntry{HIV prevention prioritization tool workflow}
@@ -15,30 +15,30 @@ library(naomi)
 
 ## Background
 
-Many HIV prevention programmes aim to reduce new infections but is not feasible to target all individuals living in locations with HIV incidence. In different geographic settings, with different background incidence, sexual behaviours confer different levels of risk of HIV infection. 
+Many HIV prevention programmes aim to reduce new infections but it is infeasible to target all individuals living in locations with moderate to high HIV incidence. In different geographic settings, with different background incidence, sexual behaviours confer different levels of risk of HIV infection. 
 
-Recent HIV programming guidance introduces thresholds for prioritization considering both HIV incidence and sexual behaviours to reach the largest population at risk of HIV. This tool provides the “denominator” for HIV prevention categorised by sex, age, geography, and sexual behaviour.
+Recent HIV programming guidance introduces thresholds for prioritization considering both HIV incidence and sexual behaviours to reach the largest population at risk of HIV. The SHIPP tool provides the “denominator” for HIV prevention categorised by sex, age, geography, and sexual behaviour.
 
 ## Data inputs
 
-These categorizations are calculated using subnational estimates of PLHIV burden by age and sex produced by the Naomi model. [Naomi](https://github.com/mrc-ide/naomi) is a small-area estimation model for estimating HIV prevalence and PLHIV, ART coverage, and new HIV infections at district level by sex and five-year age group. The model combines district-level data about multiple outcomes from several sources in a Bayesian statistical model to produce robust indicators of subnational HIV burden. Naomi is used to annual update estimates of subnational HIV burden as part of the [UNAIDS HIV estimates process](https://www.unaids.org/en/dataanalysis/knowyourresponse/HIVdata_estimates).
+These categorizations are calculated using subnational estimates of HIV prevalence and incidence by age and sex produced by the Naomi model. [Naomi](https://github.com/mrc-ide/naomi) is a small-area estimation model for estimating HIV prevalence and PLHIV, ART coverage, and new HIV infections at a district level by sex and five-year age group. The model combines district-level data about multiple outcomes from several sources in a Bayesian statistical model to produce robust indicators of subnational HIV burden. Naomi is used to update annual estimates of subnational HIV burden as part of the [UNAIDS HIV estimates process](https://www.unaids.org/en/dataanalysis/knowyourresponse/HIVdata_estimates).
 
-The tool synthesises the most recent estimates of subnational PLHIV with outputs from additional mathematical models 
-describing subnational variation in sexual risk behaviour and estimates of key populations at an elevated risk of HIV infection:
+The tool synthesises the most recent estimates of subnational HIV prevalence and incidence with outputs from additional statistical models 
+describing subnational variation in sexual behaviour and estimates of key populations at an elevated risk of HIV infection:
 
-* **[Howes et al.](https://journals.plos.org/globalpublichealth/article?id=10.1371/journal.pgph.0001731)** model estimating subnational prevalence of HIV risk behaviours and associated HIV incidence. 
-* **[Stevens et al.](https://www.medrxiv.org/content/10.1101/2022.07.27.22278071v2)** model for population size estimates (PSEs) of key population in sub-Saharan Africa, including female sex workers (FSW), men-who-have-sex-with-men (MSM) and people who inject drugs (PWID).
-* **[Nguyen et al.](https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-022-13451-y)**: model for age of sexual debut sub-Saharan Africa.
+* We extend the **[Howes et al.](https://journals.plos.org/globalpublichealth/article?id=10.1371/journal.pgph.0001731)** spatiotemporal multinomial model of sexual behaviour proportions to both men and women aged 15-49. This model utilizes household survey data (Demographic and Health Surveys, Population-based HIV Impact Assessments, Multiple Indicator Cluster Surveys, and other country-specific surveys) to develop spatially smoothed district-level estimates of the proportion of the population in each of three behavioural group (no sex, one regular partner, non-regular partner(s)) described below. The model includes terms for age (5-year age groups), a main spatial effect, a spatial interaction term for 5-year age groups, and survey year (a temporal effect). Men's and women's behavioural data is fitted separately, though all countries are fitted in a single model stratified by sex. The models are fit using the INLA package in R using the multinomial-Poisson transformation.
+* **[Stevens et al.](https://www.medrxiv.org/content/10.1101/2022.07.27.22278071v2)** model for population size estimates (PSEs) of key populations in sub-Saharan Africa at an admin-1 level (regional), including female sex workers (FSW), men who have sex with men (MSM), and people who inject drugs (PWID).
+* **[Nguyen et al.](https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-022-13451-y)**: model for age at sexual debut in sub-Saharan Africa.
 
-These outputs of these models are stored in an external repository, [naomi-resources](http://github/mrc-ide/naomi.resources) and are updated regularly to incorporate newly released survey data and to align with changes in geographic changes in country specific administrative boundaries required for planning.
+The outputs of these models are stored in an external repository, [naomi-resources](http://github/mrc-ide/naomi.resources) and are updated annually to incorporate newly released survey data and to align with changes in geographic areas in country specific administrative boundaries required for planning.
 
-In addition to estimates produced by subnational models, the tool incorporates consensus estimates of KP population size that are developed by national HIV estimates teams as part of the annual UNAIDS HIV estimates process. For more information on this exercise please see [14G Key Population Workbook](https://hivtools.unaids.org/hiv-estimates-training-material-en/).
+In addition to estimates produced by subnational models, the tool incorporates consensus estimates of KP population size and HIV incidence that are developed by national HIV estimates teams as part of the annual UNAIDS HIV estimates process. For more information on this exercise please see [14G Key Population Workbook](https://hivtools.unaids.org/hiv-estimates-training-material-en/).
 
 ## Tool workflow
 
-This tool is now integrated into the [Naomi web application](https://naomi.unaids.org/login) and is generated after completing Naomi model as described in [22G Naomi sub-national estimates: Creating subnational HIV estimates](https://hivtools.unaids.org/hiv-estimates-training-material-en/).
+This tool is now integrated into the [Naomi web application](https://naomi.unaids.org/login) and is generated after completing the Naomi model as described in [22G Naomi sub-national estimates: Creating subnational HIV estimates](https://hivtools.unaids.org/hiv-estimates-training-material-en/).
 
-If you are running a Naomi model fit with updated administrative boundaries, you may receive an error that the external database containing the SRB or KP PSE model is out of date:
+If you are running a Naomi model fit with updated administrative boundaries, you may receive an error that the external database containing the sexual behaviour or KP PSE model is out of date:
 
 ```{r, echo = FALSE, results = 'asis', warning = FALSE}
 
@@ -49,34 +49,28 @@ cat("Error: Available KP PSE estimates for: \n",
                      "\n\nTo update estimates, please contact Naomi support.")
 ```
 
-## HIV prevention need calculation
+## SHIPP Tool estimates process
 
-**1.Calculate key population sizes by district and age**: 
+**1. Estimate key population sizes by district and age**: 
 
 Regional KP proportion estimates from [Stevens et al.](https://www.medrxiv.org/content/10.1101/2022.07.27.22278071v2) are disaggregated by age and district. 
 
-  * _Calculating sexually active population by age and sex_: Country specific estimates of age of first sexual debut are applied to district level population estimates from Naomi:
-    * For age distribution of MSM and women who sell sex, we assume a combination of:
-      * Age at first sex from [Nguyen et al.](https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-022-13451-y)
-      * Age-specific MSM and FSW propensity estimates from [Thembisa](https://www.thembisa.org/content/downloadPage/Thembisa4_3) of:
-        * MSM: Mean age of 25, SD 7
-        * FSW: Mean age of 29, SD 9
-    * For age distribution of PWID literature estimate of age distribution from [Hines et al](web)
-        * PWID: Mean age 29.4, SD 7
-  * _Calculating total KP population by age_: Country specific regional KP proportions from the _Stevens et al_ model are applied to district level population estimates from Naomi and adjusted by age distribution of sexually active population calculated above.
-    * This includes an assumption that a nominal number of PWID (~9%) are female and as a result these are removed from the denominator and PWID are assumed as male from this point onwards.
+  * _Nationally adjusted age distributions for MSM and FSW_: For MSM and FSW, estimates for ages 15-49 are age-disaggregated using South Africa's [Thembisa](https://www.thembisa.org/content/downloadPage/Thembisa4_3) model estimates of the age distribution for FSW and MSM - Gamma(29,9) for FSW and Gamma(25,7) for MSM - then adjusted for the distribution of age at sexual debut by sex and country [(Nguyen et al.)](https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-022-13451-y) to account for differences between South Africa and the other countries included in the SHIPP tool.
+  * _Uniformly adjusted age distribution for PWID_: For age distribution of PWID, we utilize a uniform literature estimate of age distribution from [Hines et al](web) across countries (mean age 29.4, SD 7).
+  * _Calculating total KP population by age and district_: Country specific regional KP proportions from the _Stevens et al_ model are applied to district level population estimates from Naomi and adjusted by age distribution described above.
+    * This includes an assumption that a nominal number of PWID (~9%) are female who are removed from the denominator. As such, PWID are assumed as male from this point onwards.
     
   
-**2. Separate general population sexual risk behaviour groups removing KP populations calculated in (1)**:
+**2. Separate general population sexual behaviour groups from KP populations calculated in (1)**:
 
 ```{r, echo = FALSE, results = 'asis', warning = FALSE}
 
 tibble::tribble(
-  ~"SRB category", ~ "HIV related risk", 
-  "Low: No sex", "Not sexually active", 
-  "Mid: One regular", "Sexually active, one cohabiting partner", 
-  "High: Non-regular", "Non-regular sexual partner(s)", 
-  "KPs", "FSW, MSM and PWID"
+  ~"Category", ~ "HIV related risk", 
+  "No sex", "Not sexually active", 
+  "One regular", "Sexually active, one cohabiting/marital partner", 
+  "Non-regular", "Non-regular sexual partner(s)", 
+  "Key Populations", "FSW (women), MSM and PWID (men)"
 ) %>%
   gt() %>%
   tab_header("HIV prevention priority groups") %>%
@@ -92,54 +86,51 @@ tibble::tribble(
 
 ```
 
-Subtract the proportion of KPs from low, mid and high sexual risk behaviour groups estimated in _Risher et al_ model:
+Subtract the proportion of KPs from sexual behaviour groups estimated in _Risher et al_ model:
 
-  * FSWs only subtracted from high risk SRB group of females aged 15-49.
-  * MSM and PWID subtracted proportionally from all male SRB groups.
+  * FSWs only subtracted from non-regular partner(s) group of females across ages.
+  * MSM and PWID subtracted proportionally from all male behaviour categories.
 
-**3. Calculate risk group logit prevalence**
+**3. Estimate HIV prevalence by behaviour**
 
-To calculate HIV prevalence for specific sexual risk behaviour groups by district and age, we maintain
-HIV prevalence from Naomi for a district-age-sex, but disaggregate to different
-risk behaviours using HIV prevalence ratios from household surveys for those reporting no
-sex vs one cohabiting vs non-regular sexual partner(s).
+HIV prevalence ratios by behaviour group are used to distribute PLHIV between behavioural risk groups.
 
-* Prevalence ratios by behaviour group are used to distribute PLHIV between behavioural risk groups.
 * Household survey data is used to estimate HIV prevalence in the no sex, one regular and non-regular partner(s) groups to calculate log odds-ratios for each behavioural category.
-* HIV prevalence ratio for KPs is based on the ratio of KP HIV prevalence1 to HIV prevalence among all women (FSW) or men (MSM and PWID).
-* HIV prevalence by behaviour is not explicitly presented – it is used to subtract off population sizes to present population susceptible to HIV (HIV-negative) in the previous caclculation step.
+* HIV prevalence ratios for KPs are based on the ratio of KP HIV prevalence from the _Stevens et al_ model to HIV prevalence among all women (FSW) or men (MSM and PWID).
+* HIV prevalence by behaviour is not explicitly presented – it is used to subtract off population sizes to present the population susceptible to HIV (HIV-negative).
 
 
-_For females, this is adjusted by:_
+_For female KPs, HIV prevalence ratios are derived based on:_
 
-  * A linear regression through admin-1 level estimates of the ratio of KP prevalence to gen-pop prevalence used to predict an age-district-specific FSW to general population prevalence ratio.
+  * A linear regression through regional (admin-1 level) estimates of the ratio of KP prevalence to general population prevalence used to predict an age-district-specific FSW to general population prevalence ratio.
   
-_For males this this is adjusted by:_
+_For males KPs, HIV prevalence ratios are derived based on:_
 
-  * Admin-1 level estimates of the ratio of KP prevalence to gen-pop prevalence among 15-24 year olds for MSM (due to the young age distribution of MSM) or among 15-49 year olds for PWID (due to the older age distribution of PWID) applied to all age groups among MSM and PWID in districts by admin-1 unit.
+  * Regional (admin-1 level) estimates of the ratio of KP prevalence to general population prevalence among 15-24 year olds for MSM (due to the young age distribution of MSM) or among 15-49 year olds for PWID (due to the older age distribution of PWID) applied to all age groups among MSM and PWID in districts by region (admin-1 unit).
   
-**3. Calculate risk group incidence and new infections**
+**4. Estimate HIV incidence rates and new HIV infections by behaviour**
 
-While maintaining age/sex/district-specific HIV incidence from Naomi, distribute HIV incidence between our 4 different behavioural groups utilizing IRRs from the literature:
+While maintaining age/sex/district-specific HIV incidence from Naomi, distribute HIV incidence between our 4 different behavioural groups utilizing incidence rate ratios (IRRs) from the literature:
 
-* Risk ratios for non-regular sex partners relative to those with a single cohabiting sex partner for females <sup>_1,2,3_</sup> and males <sup>_1,2,4_</sup>. 
-* For FSW, ratio of HIV incidence among women in key populations vs single cohabiting partner women derived based on HIV incidence category in district and tiered risk ratio <sup> 5 </sup>. 
-* For MSM and PWID, using ddmin 1 KP prevalence estimates relative to general population prevalence <sup>6</sup> and estimates from systematic review & meta-regression <sup> 7 </sup>.
+* Risk ratios for non-regular sex partner(s) relative to those with a single cohabiting/marital sex partner for females<sup>_1,2,3_</sup> and males.<sup>_1,2,4_</sup>
+* For FSW, ratio of HIV incidence among women in key populations vs general population women derived based on HIV incidence category in district and tiered risk ratio.<sup>_5_</sup>
+* For MSM and PWID, using regional (admin-1) KP prevalence estimates relative to general population prevalence<sup>_6_</sup> and estimates from systematic review & meta-regression.<sup>_7_</sup>
+* National KP new infections are scaled to consensus estimates of KP HIV incidence maintaining the relative district-level proportions associated with the above two bullet points. As such, the precise risk ratios for KPs will not necessarily match the risk ratios listed in the table below.
 
 
-Number of new infections by risk group is derived by multiplying these estimated incidence rates by risk behaviour times the population sizes of HIV-negative individuals by risk behaviour, in each of the 5-year age groups
+Number of new infections by sexual behaviour group is derived by multiplying these estimated HIV incidence rates by behaviour times the population sizes of HIV-negative individuals by behaviour, in each of the 5-year age groups
 
 ```{r, echo = FALSE, results = 'asis', warning = FALSE}
 
 tibble::tribble(
-  ~"SRB category", ~ "Females",  ~"Males" , 
-  "Low: No sex", "0", "0", 
-  "Mid: One regular", "1 (reference category)", "1 (reference category)", 
-  "High: Non-regular",
+  ~"Category", ~ "Females",  ~"Males" , 
+  "No sex", "0", "0", 
+  "One regular", "1 (reference category)", "1 (reference category)", 
+  "Non-regular",
   "_Aged 15-24_: 1.72 <br> Aged 25-49: 2.1 <sup>_1,2,3_</sup>", 
   "_Aged 15-24_: 1.89 <br> Aged 25-49: 2.1 <sup>_1,2,4_</sup>", 
-  "KPs", 
-  "**FSW**: <br> Very high: 3 <br> High: 6 <br>Moderate: 9 <br>Low: 13 <br>Very low: 25 <sup>_5_</sup>",
+  "Key Populations", 
+  "**FSW**: <br> Very high: 3 <br> High: 4 <br>Moderate: 7 <br>Low: 11 <br>Very low: 17 <sup>_5_</sup>",
   "**MSM**: 2.5-250<sup>_6,7_</sup> <br> **PWID** 2.5-55 <sup>_6_</sup>"
 ) %>%
   gt() %>%
@@ -178,9 +169,9 @@ tibble::tribble(
 
 ## Limitations of tool
 
-Risk behaviour population size estimates at a district level have a high degree of uncertainty, which is not captured in the current version of the tool
+Risk behaviour population size estimates at a district level have a high degree of uncertainty, which is not captured in the current version of the tool.
 
-Uncertainty in:
+There is uncertainty in:
 
 * KP sizes and their age and geographical disaggregation
 * Small area estimates based on survey data
@@ -188,11 +179,11 @@ Uncertainty in:
 * Incidence rate ratios by behaviour
 * Naomi estimates
 
-PSEs should be considered indicative rather than exact 
+As such, SHIPP tool estimates should be considered indicative rather than exact. 
 
 
 
 ## Usage of tool outputs
 
-Usage of tool output for prioritising groups for HIV prevention can be found [on the UNAIDS website](https://hivtools.unaids.org/pse/).
+Recommendations for usage of tool outputs for prioritising groups for HIV prevention can be found [on the UNAIDS website](https://hivtools.unaids.org/pse/).
 

From 7b6f99ef19b858cf0355c6d3b90e584ece98f4b6 Mon Sep 17 00:00:00 2001
From: Katie Risher <krisher@ic.ac.uk>
Date: Mon, 19 Feb 2024 10:00:33 -0500
Subject: [PATCH 49/53] Sum infections_pwid not pwid

---
 R/shipp.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/shipp.R b/R/shipp.R
index 3782049f..4ce2df3e 100644
--- a/R/shipp.R
+++ b/R/shipp.R
@@ -1436,7 +1436,7 @@ shipp_calculate_incidence_male <- function(naomi_output,
 
   # Sum prior count of new infections
   msm_sum <- sum(df1$infections_msm)
-  pwid_sum <- sum(df1$pwid)
+  pwid_sum <- sum(df1$infections_pwid)
 
   # Generate a ratio to scale FSW new infections by
   msm_ratio <- msm_consensus / msm_sum

From 2b649ce0a3054e1af1c6f2195eca5b220007e3fc Mon Sep 17 00:00:00 2001
From: Robert Ashton <r.ashton@imperial.ac.uk>
Date: Wed, 21 Feb 2024 11:54:41 +0000
Subject: [PATCH 50/53] Force installation of latest naomi.resources from
 github

---
 docker/Dockerfile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 5336b2d8..f5a05a6e 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -16,6 +16,9 @@ RUN install_packages --repo=https://mrc-ide.r-universe.dev \
     testthat.buildkite \
     openxlsx
 
+RUN install_remote \
+    mrc-ide/naomi.resources
+
 ## Model run will try to parallelise over as many threads as are available
 ## potentially slowing the application, manually limit threads to 1
 ENV OMP_NUM_THREADS=1

From 2dc75efea8785cba1374bbd1630d0373973532c5 Mon Sep 17 00:00:00 2001
From: Rob Ashton <r.ashton@imperial.ac.uk>
Date: Thu, 22 Feb 2024 10:59:46 +0000
Subject: [PATCH 51/53] Bump version number

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 82d9b40f..c3341492 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: naomi
 Title: Naomi Model for Subnational HIV Estimates
-Version: 2.9.23
+Version: 2.9.24
 Authors@R:
     person(given = "Jeff",
            family = "Eaton",

From d37d5be468141c443445adda5c5eadf9a28ca8ac Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Mon, 26 Feb 2024 15:28:21 +0000
Subject: [PATCH 52/53] edge case for 0 pse

---
 R/shipp.R | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/R/shipp.R b/R/shipp.R
index 4ce2df3e..7ce76c43 100644
--- a/R/shipp.R
+++ b/R/shipp.R
@@ -244,7 +244,7 @@ shipp_disaggregate_fsw <- function(outputs,
   # Check for consensus estimate of FSW
   fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$population_size
 
-  if(!is.na(fsw_consensus)){
+  if(!is.na(fsw_consensus) & fsw_consensus == 0){
 
     # Check if consensus estimate is larger than age matched population denominator
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "female",]$population
@@ -395,10 +395,10 @@ shipp_disaggregate_pwid <- function(outputs,
     dplyr::mutate(total_pwid = population * prop_pwid) %>%
     dplyr::select(iso3, area_id, total_pwid, age_group, area_level)
 
-  # Check for consensus estimate of MSM
+  # Check for consensus estimate of PWID
   pwid_consensus <- kp_consensus[kp_consensus$key_population == "PWID", ]$population_size
 
-  if(!is.na(pwid_consensus)){
+  if(!is.na(pwid_consensus) & pwid_consensus != 0){
     # Check if consensus estimate is larger than age matched population denominator
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
     prop_pop <- pwid_consensus / pop
@@ -498,7 +498,7 @@ shipp_disaggregate_msm <- function(outputs,
   # Check for consensus estimate of MSM
   msm_consensus <- kp_consensus[kp_consensus$key_population == "MSM", ]$population_size
 
-  if(!is.na(msm_consensus)){
+  if(!is.na(msm_consensus) & msm_consensus != 0){
 
     # Check if consensus estimate is larger than age matched population denominator
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
@@ -1187,7 +1187,7 @@ shipp_calculate_incidence_female <- function(naomi_output,
   # Check for consensus estimate of FSW new infections
   fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$infections
 
-  if(is.na(fsw_consensus)){
+  if(is.na(fsw_consensus) || fsw_consensus != 0){
     # If no KP workbook present, read in FSW new infections from GOALS
     goals <- naomi.resources::load_shipp_exdata("goals", "SSA")
     fsw_consensus <- goals[goals$iso3 == options$area_scope, ]$`fsw-new_inf`
@@ -1419,17 +1419,18 @@ shipp_calculate_incidence_male <- function(naomi_output,
   msm_consensus <- kp_consensus[kp_consensus$key_population == "MSM", ]$infections
   pwid_consensus <- kp_consensus[kp_consensus$key_population == "PWID", ]$infections
 
-  if(is.na(msm_consensus)){
+  if(is.na(msm_consensus) || msm_consensus == 0){
     # If no KP workbook present, read in FSW new infections from GOALS
     goals <- naomi.resources::load_shipp_exdata("goals", "SSA")
     msm_consensus <- goals[goals$iso3 == options$area_scope, ]$`msm-new_inf`
   }
 
-  if(is.na(pwid_consensus)){
+  if(is.na(pwid_consensus) || pwid_consensus == 0){
     # If no KP workbook present, read in FSW new infections from GOALS
     goals <- naomi.resources::load_shipp_exdata("goals", "SSA")
     pwid_consensus <- goals[goals$iso3 == options$area_scope, ]$`pwid-new_inf`
   }
+
   # scale consensus that we'll use to account for the 1:10 ratio assumption of
   # male:female PWID
   pwid_consensus <- pwid_consensus * 0.91

From 8aaa93bb936d1162d5e201268dea48bb47ebf093 Mon Sep 17 00:00:00 2001
From: rtesra <70330391+rtesra@users.noreply.github.com>
Date: Tue, 27 Feb 2024 17:03:37 +0000
Subject: [PATCH 53/53] aggregate KP workbook from subnational pjnz files

---
 R/inputs-spectrum.R | 12 ++++++++++++
 R/shipp.R           | 11 ++++++-----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/R/inputs-spectrum.R b/R/inputs-spectrum.R
index b1db7c17..8af893e9 100644
--- a/R/inputs-spectrum.R
+++ b/R/inputs-spectrum.R
@@ -722,6 +722,18 @@ extract_kp_workbook <- function(pjnz_list){
   # If no consensus estimates present, return empty dataframe
   if(nrow(kp_out) == 0){kp_out <- kp[[1]]}
 
+  # If multiple pjnz files, aggreagte consensus estimates
+  if(nrow(kp_out) > 4){
+
+    kp_out <- kp_out |>
+      dplyr::group_by(key_population, year, workbook_file) |>
+      dplyr::summarise(population_size = sum(population_size),
+                hiv_prevalence = sum(hiv_prevalence),
+                art_coverage = sum(art_coverage),
+                infections = sum(infections))
+
+  }
+
   kp_out
 
 }
diff --git a/R/shipp.R b/R/shipp.R
index 7ce76c43..c1494af0 100644
--- a/R/shipp.R
+++ b/R/shipp.R
@@ -123,7 +123,6 @@ shipp_format_naomi <- function(outputs, options){
                                      "Guinea", "GIN",
                                      "Liberia", "LBR",
                                      "Mali", "MLI",
-                                     "Niger", "NER",
                                      "Sierra Leone", "SLE",
                                      "Togo", "TGO",
                                      "Burkina Faso", "BFA",
@@ -244,7 +243,7 @@ shipp_disaggregate_fsw <- function(outputs,
   # Check for consensus estimate of FSW
   fsw_consensus <- kp_consensus[kp_consensus$key_population == "FSW", ]$population_size
 
-  if(!is.na(fsw_consensus) & fsw_consensus == 0){
+  if(!is.na(fsw_consensus) & fsw_consensus > 0){
 
     # Check if consensus estimate is larger than age matched population denominator
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "female",]$population
@@ -398,7 +397,7 @@ shipp_disaggregate_pwid <- function(outputs,
   # Check for consensus estimate of PWID
   pwid_consensus <- kp_consensus[kp_consensus$key_population == "PWID", ]$population_size
 
-  if(!is.na(pwid_consensus) & pwid_consensus != 0){
+  if(!is.na(pwid_consensus) & pwid_consensus > 0){
     # Check if consensus estimate is larger than age matched population denominator
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
     prop_pop <- pwid_consensus / pop
@@ -498,7 +497,7 @@ shipp_disaggregate_msm <- function(outputs,
   # Check for consensus estimate of MSM
   msm_consensus <- kp_consensus[kp_consensus$key_population == "MSM", ]$population_size
 
-  if(!is.na(msm_consensus) & msm_consensus != 0){
+  if(!is.na(msm_consensus) & msm_consensus > 0){
 
     # Check if consensus estimate is larger than age matched population denominator
     pop <- naomi_pop[naomi_pop$area_level == 0 & naomi_pop$age_group == "Y015_049" & naomi_pop$sex == "male",]$population
@@ -1191,6 +1190,7 @@ shipp_calculate_incidence_female <- function(naomi_output,
     # If no KP workbook present, read in FSW new infections from GOALS
     goals <- naomi.resources::load_shipp_exdata("goals", "SSA")
     fsw_consensus <- goals[goals$iso3 == options$area_scope, ]$`fsw-new_inf`
+
   }
 
   # Sum prior count of new infections
@@ -1439,7 +1439,7 @@ shipp_calculate_incidence_male <- function(naomi_output,
   msm_sum <- sum(df1$infections_msm)
   pwid_sum <- sum(df1$infections_pwid)
 
-  # Generate a ratio to scale FSW new infections by
+  # Generate a ratio to scale MSM and PWID new infections by
   msm_ratio <- msm_consensus / msm_sum
   pwid_ratio <- pwid_consensus / pwid_sum
 
@@ -1462,6 +1462,7 @@ shipp_calculate_incidence_male <- function(naomi_output,
       infections_nosex12m = 0,
       infections_sexcohab = susceptible_sexcohab * incidence_sexcohab,
       infections_sexnonreg = susceptible_sexnonreg * incidence_sexnonreg,
+
       rr_msm = incidence_msm / incidence_sexcohab,
       rr_pwid = incidence_pwid / incidence_sexcohab
     )