From 8934da5781596feb339f0be9c25f28425a50516b Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Tue, 22 Jun 2021 16:45:33 -0500
Subject: [PATCH 01/14] pkg dependency

---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 62f8ed11..69a3fb0e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -43,6 +43,7 @@ Imports:
     knitr,
     limma,
     magrittr,
+    meta,
     mclust,
     mvIC,
     plyr,

From 25f32faea1feeaa9295d757343477f00389f77d5 Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Tue, 22 Jun 2021 16:45:46 -0500
Subject: [PATCH 02/14] take residuals as inputs from synapse

---
 config.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config.yml b/config.yml
index 20149fb4..28d763c5 100644
--- a/config.yml
+++ b/config.yml
@@ -30,3 +30,4 @@ default:
   skip model: TRUE
   report: "openSci"
   store output: syn23572479
+  residuals: ["syn123", "syn456", "syn678"]

From 3a384fca0cf3d527a4cd9da7f4e388581e885290 Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Thu, 1 Jul 2021 13:26:54 -0500
Subject: [PATCH 03/14] adds function to compute rowmeans and rowsds for gene
 matrices

---
 R/functions.R                         | 35 +++++++++++++++++++++++++++
 tests/testthat/test-compute_mean_sd.R | 24 ++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 tests/testthat/test-compute_mean_sd.R

diff --git a/R/functions.R b/R/functions.R
index a1627542..1de15645 100644
--- a/R/functions.R
+++ b/R/functions.R
@@ -818,3 +818,38 @@ provenance_helper <- function(metadata_id,  counts_id, metadata_version = NULL,
   }
   ids
 }
+#' Compute mean and standard deviation by gene feature
+#'
+#' Subsets counts matrix to include samples in `clean_metadata` for computation.
+#'
+#' @inheritParams filter_genes
+#' @inheritParams simple_filter
+#' @return A data frame with columns feature, n, mean and sd.
+#' @export
+compute_mean_sd <- function(clean_metadata, count_df) {
+  clean_metadata <- tibble::rownames_to_column(clean_metadata, "sampleID")
+  count_df <- tibble::rownames_to_column(count_df, "feature")
+  # function to compute rowwise mean and sd, remove missing values
+  f_byrow <- function(x) {
+    tibble::tibble(
+      mean = mean(x, na.rm = TRUE),
+      sd = sd(x, na.rm = TRUE))
+  }
+  # subset expression matrix to include relevant samples
+  subset <- count_df[,clean_metadata$sampleID]
+  # compute row means and standard deviation
+  compute <- subset %>%
+    # capture row elements with ... and concatenate into a vector
+    dplyr::transmute(
+      out = purrr::pmap(
+        ., ~ f_byrow(c(...))
+      )
+    ) %>%
+    tidyr::unnest(cols = c(out))
+  # output a data frame with gene features, n, mean, and sd
+  dat <- data.frame(
+    feature = count_df$feature,
+    n = dim(clean_metadata)[1]
+  )
+  dplyr::bind_cols(dat, compute)
+}
diff --git a/tests/testthat/test-compute_mean_sd.R b/tests/testthat/test-compute_mean_sd.R
new file mode 100644
index 00000000..e0337913
--- /dev/null
+++ b/tests/testthat/test-compute_mean_sd.R
@@ -0,0 +1,24 @@
+metadata <- data.frame(
+  batch = c("1", "2", "1", "2", "1", "2", "1", "2"),
+  diagnosis = c("dx", "dx", "ct", "ct", "dx", "dx", "ct", "ct"),
+  sex = c("M", "F", "M", "F", "M", "F", "M", "F"),
+  RIN = c(5, 5, 5, 5, 5, 5, 5, 5),
+  row.names = c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8"),
+  stringsAsFactors = FALSE
+)
+
+counts <- data.frame(matrix(
+  sample(0:100, size = 16),
+  ncol = 8,
+  dimnames = list(c("ENSG00000229807.12", "ENSG00000183878.12"),
+                  c("S1", "S2", "S3", "S4",  "S5", "S6", "S7", "S8"))
+))
+
+
+test_that("function runs and computation is correct", {
+  dat <- compute_mean_sd(metadata, counts)
+  expect_equal(colnames(dat), c("feature", "n", "mean", "sd"))
+  expect_equal(sd(counts[1,]), dat$sd[1])
+  expect_equal(mean(counts[1,], dat$mean[2]))
+  expect_equal(dim(metadata[1]), 8)
+})

From a80b47345d3bffc14f33a53f3b0b21290a8bd4d6 Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Thu, 1 Jul 2021 13:28:01 -0500
Subject: [PATCH 04/14] documentation

---
 man/compute_mean_sd.Rd | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 man/compute_mean_sd.Rd

diff --git a/man/compute_mean_sd.Rd b/man/compute_mean_sd.Rd
new file mode 100644
index 00000000..52be2178
--- /dev/null
+++ b/man/compute_mean_sd.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/functions.R
+\name{compute_mean_sd}
+\alias{compute_mean_sd}
+\title{Compute mean and standard deviation by gene feature}
+\usage{
+compute_mean_sd(clean_metadata, count_df)
+}
+\arguments{
+\item{clean_metadata}{A data frame with sample identifiers as rownames and variables as
+factors or numeric as determined by \code{"sageseqr::clean_covariates()"}.}
+
+\item{count_df}{A counts data frame with sample identifiers as column names
+and gene Ids are rownames.}
+}
+\description{
+Compute mean and standard deviation by gene feature
+}

From cebe1dc7416b8ff5448738a9bdb9c2772f633d33 Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Thu, 1 Jul 2021 14:31:43 -0500
Subject: [PATCH 05/14] drop dimnames and fix tests

---
 R/functions.R                         | 10 +++++-----
 man/compute_mean_sd.Rd                | 11 +++++++++--
 tests/testthat/test-compute_mean_sd.R | 15 +++++++++------
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/R/functions.R b/R/functions.R
index 1de15645..53de0051 100644
--- a/R/functions.R
+++ b/R/functions.R
@@ -824,11 +824,11 @@ provenance_helper <- function(metadata_id,  counts_id, metadata_version = NULL,
 #'
 #' @inheritParams filter_genes
 #' @inheritParams simple_filter
+#' @inheritParams clean_covariates
+#' @param gene_id_input Column name of the gene ids in the counts_id file.
 #' @return A data frame with columns feature, n, mean and sd.
 #' @export
-compute_mean_sd <- function(clean_metadata, count_df) {
-  clean_metadata <- tibble::rownames_to_column(clean_metadata, "sampleID")
-  count_df <- tibble::rownames_to_column(count_df, "feature")
+compute_mean_sd <- function(clean_metadata, sample_identifier, count_df, gene_id_input) {
   # function to compute rowwise mean and sd, remove missing values
   f_byrow <- function(x) {
     tibble::tibble(
@@ -836,7 +836,7 @@ compute_mean_sd <- function(clean_metadata, count_df) {
       sd = sd(x, na.rm = TRUE))
   }
   # subset expression matrix to include relevant samples
-  subset <- count_df[,clean_metadata$sampleID]
+  subset <- count_df[,clean_metadata[[sample_identifier]]]
   # compute row means and standard deviation
   compute <- subset %>%
     # capture row elements with ... and concatenate into a vector
@@ -848,7 +848,7 @@ compute_mean_sd <- function(clean_metadata, count_df) {
     tidyr::unnest(cols = c(out))
   # output a data frame with gene features, n, mean, and sd
   dat <- data.frame(
-    feature = count_df$feature,
+    feature = count_df[[gene_id_input]],
     n = dim(clean_metadata)[1]
   )
   dplyr::bind_cols(dat, compute)
diff --git a/man/compute_mean_sd.Rd b/man/compute_mean_sd.Rd
index 52be2178..510a271f 100644
--- a/man/compute_mean_sd.Rd
+++ b/man/compute_mean_sd.Rd
@@ -4,15 +4,22 @@
 \alias{compute_mean_sd}
 \title{Compute mean and standard deviation by gene feature}
 \usage{
-compute_mean_sd(clean_metadata, count_df)
+compute_mean_sd(clean_metadata, sample_identifier, count_df, gene_id_input)
 }
 \arguments{
 \item{clean_metadata}{A data frame with sample identifiers as rownames and variables as
 factors or numeric as determined by \code{"sageseqr::clean_covariates()"}.}
 
+\item{sample_identifier}{The name of the column with the sample identifiers that map to the gene counts data frame.}
+
 \item{count_df}{A counts data frame with sample identifiers as column names
 and gene Ids are rownames.}
+
+\item{gene_id_input}{Column name of the gene ids in the counts_id file.}
+}
+\value{
+A data frame with columns feature, n, mean and sd.
 }
 \description{
-Compute mean and standard deviation by gene feature
+Subsets counts matrix to include samples in `clean_metadata` for computation.
 }
diff --git a/tests/testthat/test-compute_mean_sd.R b/tests/testthat/test-compute_mean_sd.R
index e0337913..4cc9608c 100644
--- a/tests/testthat/test-compute_mean_sd.R
+++ b/tests/testthat/test-compute_mean_sd.R
@@ -3,22 +3,25 @@ metadata <- data.frame(
   diagnosis = c("dx", "dx", "ct", "ct", "dx", "dx", "ct", "ct"),
   sex = c("M", "F", "M", "F", "M", "F", "M", "F"),
   RIN = c(5, 5, 5, 5, 5, 5, 5, 5),
-  row.names = c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8"),
+  sampleID = c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8"),
   stringsAsFactors = FALSE
 )
 
 counts <- data.frame(matrix(
-  sample(0:100, size = 16),
+  sample(seq(0, 100, by = 0.01), size = 16),
   ncol = 8,
   dimnames = list(c("ENSG00000229807.12", "ENSG00000183878.12"),
                   c("S1", "S2", "S3", "S4",  "S5", "S6", "S7", "S8"))
 ))
 
+counts_df <- tibble::rownames_to_column(counts, "feature")
 
 test_that("function runs and computation is correct", {
-  dat <- compute_mean_sd(metadata, counts)
+  dat <- compute_mean_sd(metadata, "sampleID", counts_df, "feature")
+  #drop gene feature to test row means and row sds
+  test <- counts_df[,-1]
   expect_equal(colnames(dat), c("feature", "n", "mean", "sd"))
-  expect_equal(sd(counts[1,]), dat$sd[1])
-  expect_equal(mean(counts[1,], dat$mean[2]))
-  expect_equal(dim(metadata[1]), 8)
+  expect_equal(sd(test[1,], na.rm = TRUE), dat$sd[1])
+  expect_equal(rowMeans(test, na.rm = TRUE), dat$mean)
+  expect_equal(dim(metadata)[1], 8)
 })

From 52345ce44d148da4dabe45d9a84ae6257142bf5a Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Thu, 1 Jul 2021 15:15:57 -0500
Subject: [PATCH 06/14] parallelize row means and row sds computation

---
 R/functions.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/functions.R b/R/functions.R
index 53de0051..30f0db5d 100644
--- a/R/functions.R
+++ b/R/functions.R
@@ -841,7 +841,7 @@ compute_mean_sd <- function(clean_metadata, sample_identifier, count_df, gene_id
   compute <- subset %>%
     # capture row elements with ... and concatenate into a vector
     dplyr::transmute(
-      out = purrr::pmap(
+      out = furrr::future_pmap(
         ., ~ f_byrow(c(...))
       )
     ) %>%

From 34bb855685af7e9e716af9544c4f5924e71c307e Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Thu, 1 Jul 2021 15:32:40 -0500
Subject: [PATCH 07/14] cite testthat pkg

---
 tests/testthat/test-compute_mean_sd.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-compute_mean_sd.R b/tests/testthat/test-compute_mean_sd.R
index 4cc9608c..98507250 100644
--- a/tests/testthat/test-compute_mean_sd.R
+++ b/tests/testthat/test-compute_mean_sd.R
@@ -20,8 +20,8 @@ test_that("function runs and computation is correct", {
   dat <- compute_mean_sd(metadata, "sampleID", counts_df, "feature")
   #drop gene feature to test row means and row sds
   test <- counts_df[,-1]
-  expect_equal(colnames(dat), c("feature", "n", "mean", "sd"))
-  expect_equal(sd(test[1,], na.rm = TRUE), dat$sd[1])
-  expect_equal(rowMeans(test, na.rm = TRUE), dat$mean)
-  expect_equal(dim(metadata)[1], 8)
+  testthat::expect_equal(colnames(dat), c("feature", "n", "mean", "sd"))
+  testthat::expect_equal(sd(test[1,], na.rm = TRUE), dat$sd[1])
+  testthat::expect_equal(rowMeans(test, na.rm = TRUE), dat$mean)
+  testthat::expect_equal(dim(metadata)[1], 8)
 })

From 3ab03e9ac12b61092a571d65b1c8d8bdf272ad3c Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Thu, 8 Jul 2021 20:08:03 -0500
Subject: [PATCH 08/14] return warning when metadata and counts don't match

---
 R/functions.R                         | 70 ++++++++++++++++++++++++++-
 tests/testthat/test-compute_mean_sd.R | 24 +++++++++
 2 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/R/functions.R b/R/functions.R
index 30f0db5d..96d3332a 100644
--- a/R/functions.R
+++ b/R/functions.R
@@ -835,8 +835,26 @@ compute_mean_sd <- function(clean_metadata, sample_identifier, count_df, gene_id
       mean = mean(x, na.rm = TRUE),
       sd = sd(x, na.rm = TRUE))
   }
+  # add a check that all samples in the metadata are represented in the counts
+  if (!(all(clean_metadata[[sample_identifier]] %in% colnames(count_df)))) {
+    dropped <- clean_metadata[[sample_identifier]][
+      !clean_metadata[[sample_identifier]] %in% colnames(count_df)
+      ]
+    dropped <- glue::glue_collapse(dropped, sep = ",")
+    warning(
+      glue::glue(
+        "Not all samples in the metadata are present in the counts matrix. {dropped} is/are not analyzed."
+        )
+      )
+    samples <- clean_metadata[[sample_identifier]][
+      clean_metadata[[sample_identifier]] %in% colnames(count_df)
+    ]
+  } else{
+    samples <- clean_metadata[[sample_identifier]]
+  }
+
   # subset expression matrix to include relevant samples
-  subset <- count_df[,clean_metadata[[sample_identifier]]]
+  subset <- count_df[,samples]
   # compute row means and standard deviation
   compute <- subset %>%
     # capture row elements with ... and concatenate into a vector
@@ -846,6 +864,7 @@ compute_mean_sd <- function(clean_metadata, sample_identifier, count_df, gene_id
       )
     ) %>%
     tidyr::unnest(cols = c(out))
+
   # output a data frame with gene features, n, mean, and sd
   dat <- data.frame(
     feature = count_df[[gene_id_input]],
@@ -853,3 +872,52 @@ compute_mean_sd <- function(clean_metadata, sample_identifier, count_df, gene_id
   )
   dplyr::bind_cols(dat, compute)
 }
+#' Meta-analysis
+#'
+#'@inheritParams build_formula
+#'@inheritParams compute_mean_sd
+meta_express <- function(clean_metadata, sample_identifier, count_df,
+                         gene_id_input, primary_variable, group_variables
+                         ) {
+  vars <- c(primary_variable, group_variables)
+  #compute mean and standard deviation by gene for grouped variables of interest
+  by_gene <- clean_metadata %>%
+    dplyr::group_by_at(vars(dplyr::one_of(vars))) %>%
+    dplyr::group_modify(
+      ~ compute_mean_sd(
+        .,
+        sample_identifier,
+        count_df,
+        gene_id_input
+        )
+      )
+
+  # metacont accepts an experimental and control group. identify all possible
+  # combinations of comparing the primary_variable
+  # primary_variable needs to be character
+  vec <- clean_metadata[[primary_variable]]
+  comparisons <- gtools::combinations(
+    n = length(unique(vec)),
+    r = 2,
+    v = unique(vec)
+    )
+
+  # collapse group_variables into one variable
+  # metadata <- build_formula(
+  #   clean_metadata,
+  #   group_variables,
+  #   colnames(clean_metadata[!(colnames(clean_metadata) %in% group_variables)])
+  #   )
+  # map over meta() for each condition
+  meta <- function(comparison_values, by_gene, metadata, primary_variable) {
+    # subset both metadata and expression matrix to include comparison_values
+    # only
+    # metadata <- metadata[metadata[[primary_variable]] %in% comparison_values,]
+    # by_gene <- by_gene[]
+    # experiment_group <- by_gene
+    #metacont can only accept one label for studLabel (e.g. Tissue in this
+    # example) Need to collapse all other model var categories.
+
+  }
+
+}
diff --git a/tests/testthat/test-compute_mean_sd.R b/tests/testthat/test-compute_mean_sd.R
index 98507250..67ca98c3 100644
--- a/tests/testthat/test-compute_mean_sd.R
+++ b/tests/testthat/test-compute_mean_sd.R
@@ -25,3 +25,27 @@ test_that("function runs and computation is correct", {
   testthat::expect_equal(rowMeans(test, na.rm = TRUE), dat$mean)
   testthat::expect_equal(dim(metadata)[1], 8)
 })
+
+metadata <- data.frame(
+  batch = c("1", "2", "1", "2", "1", "2", "1", "2"),
+  diagnosis = c("dx", "dx", "ct", "ct", "dx", "dx", "ct", "ct"),
+  sex = c("M", "F", "M", "F", "M", "F", "M", "F"),
+  RIN = c(5, 5, 5, 5, 5, 5, 5, 5),
+  sampleID = c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S12"),
+  stringsAsFactors = FALSE
+)
+
+counts <- data.frame(matrix(
+  sample(seq(0, 100, by = 0.01), size = 16),
+  ncol = 8,
+  dimnames = list(c("ENSG00000229807.12", "ENSG00000183878.12"),
+                  c("S1", "S2", "S3", "S4",  "S5", "S6", "S7", "S8"))
+))
+
+counts_df <- tibble::rownames_to_column(counts, "feature")
+
+test_that("message when metadata does not map to counts", {
+  testthat::expect_warning(
+    dat <- compute_mean_sd(metadata, "sampleID", counts_df, "feature")
+    )
+})

From 0efe401058c04fee04d2bd716a85935eb9959b90 Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Fri, 9 Jul 2021 13:50:20 -0500
Subject: [PATCH 09/14] add furrr package

---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 69a3fb0e..1c68eba4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,6 +31,7 @@ Imports:
     edgeR,
     foreach,
     fs,
+    furrr,
     graphics,
     GenomicRanges,
     glue,

From 4b145fd13409be586018a767792f145499300f6d Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Fri, 9 Jul 2021 14:03:20 -0500
Subject: [PATCH 10/14] tests and new functions to wrap metacont

---
 R/functions.R                       | 127 +++++++++++++++++++++++-----
 man/meta_express.Rd                 |  43 ++++++++++
 man/pairwise_meta.Rd                |  24 ++++++
 tests/testthat/test-pairwise_meta.R |  37 ++++++++
 4 files changed, 208 insertions(+), 23 deletions(-)
 create mode 100644 man/meta_express.Rd
 create mode 100644 man/pairwise_meta.Rd
 create mode 100644 tests/testthat/test-pairwise_meta.R

diff --git a/R/functions.R b/R/functions.R
index 96d3332a..7c84f3b9 100644
--- a/R/functions.R
+++ b/R/functions.R
@@ -872,12 +872,80 @@ compute_mean_sd <- function(clean_metadata, sample_identifier, count_df, gene_id
   )
   dplyr::bind_cols(dat, compute)
 }
-#' Meta-analysis
+#'Apply metacont() between two comparison groups
 #'
-#'@inheritParams build_formula
-#'@inheritParams compute_mean_sd
+#'\code{"sageseqr::meta_express()"} wraps \code{"sageseqr::pairwise_meta()"}
+#'since the input format for `by_gene` is constrained. `by_gene` is a dataframe
+#'grouped by gene feature and other metadata with gene summary statistics nested
+#'under "data".
+#'
+#' @param comparison_values A vector of two values to compare from
+#'`primary_variable`.
+#' @param by_gene A grouped data frame with
+#' @inheritParams meta_express
+#'
+#' @export
+pairwise_meta <- function(comparison_values, by_gene, primary_variable) {
+  # wrap metacont() to return relevant components for each gene feature
+  wrap_meta <- function(df1, df2){
+    m <- meta::metacont(
+      n.e = df1$n,
+      mean.e = df1$mean,
+      sd.e = df1$sd,
+      n.c = df2$n,
+      mean.c = df2$mean,
+      sd.c = df2$sd,
+      sm = "SMD",
+      method.smd = "Hedges",
+      method.tau = "REML"
+    )
+    return(m[c("TE.fixed", "seTE.fixed", "lower.fixed", "upper.fixed",
+               "zval.fixed", "pval.fixed", "TE.random", "seTE.random",
+               "lower.random", "upper.random", "zval.random", "pval.random",
+               "Q", "tau", "H", "I2")])
+  }
+  # subset summaries by gene to relevant comparison_values
+  to_analyze <- by_gene[by_gene[[primary_variable]] %in% comparison_values,]
+  split <- dplyr::group_by(to_analyze, .data[[primary_variable]]) %>%
+    dplyr::group_split()
+  # split pairwise comparisons into experimental and control data frames
+  experimental <- split[[1]]
+  control <- split[[2]]
+  # parallelize meta-analysis over every gene
+  m <- furrr::future_map2_dfr(
+    experimental$data,
+    control$data,
+    ~wrap_meta(.x, .y)
+  )
+  # bind gene features and other metadata
+  feature_vars <- colnames(by_gene)[
+    !colnames(by_gene) %in% c(primary_variable, "data")
+  ]
+  gene_features <- dplyr::bind_rows(split) %>%
+    dplyr::select(!!!feature_vars) %>%
+    dplyr::distinct()
+  output <- dplyr::bind_cols(gene_features, m)
+  output
+}
+#' Meta-analysis of continuous outcome data
+#'
+#' Meta-analysis by standardized mean difference with the Hedges' g method
+#' (1981).
+#'
+#' @param group_variables These variables are referenced when computing summary
+#' statistics for every gene. Therefore, a gene feature will have an observation
+#' for each group of variables (i.e. each gene feature across unique tissue
+#' types, sex and diagnosis).
+#' @param primary_variable The primary meta-analysis condition. The values of
+#' the `primary_variable` will make up the experimental and control groups of
+#' \code{"meta::metacont()"}.
+#' @param collapse_condition Compute one estimate across the `collapse_condition`
+#' which is a variabel in `clean_metadata`.
+#' @inheritParams compute_mean_sd
+#' @export
 meta_express <- function(clean_metadata, sample_identifier, count_df,
-                         gene_id_input, primary_variable, group_variables
+                         gene_id_input, primary_variable, group_variables,
+                         collapse_condition
                          ) {
   vars <- c(primary_variable, group_variables)
   #compute mean and standard deviation by gene for grouped variables of interest
@@ -895,29 +963,42 @@ meta_express <- function(clean_metadata, sample_identifier, count_df,
   # metacont accepts an experimental and control group. identify all possible
   # combinations of comparing the primary_variable
   # primary_variable needs to be character
-  vec <- clean_metadata[[primary_variable]]
+  vec <- as.character(clean_metadata[[primary_variable]])
   comparisons <- gtools::combinations(
     n = length(unique(vec)),
     r = 2,
     v = unique(vec)
     )
-
-  # collapse group_variables into one variable
-  # metadata <- build_formula(
-  #   clean_metadata,
-  #   group_variables,
-  #   colnames(clean_metadata[!(colnames(clean_metadata) %in% group_variables)])
+  # return comparisons in a list
+  # comparisons <- map(
+  #   transpose(
+  #     as.data.frame(comparisons)
+  #     ),
+  #   unlist,
+  #   use.names = F
   #   )
-  # map over meta() for each condition
-  meta <- function(comparison_values, by_gene, metadata, primary_variable) {
-    # subset both metadata and expression matrix to include comparison_values
-    # only
-    # metadata <- metadata[metadata[[primary_variable]] %in% comparison_values,]
-    # by_gene <- by_gene[]
-    # experiment_group <- by_gene
-    #metacont can only accept one label for studLabel (e.g. Tissue in this
-    # example) Need to collapse all other model var categories.
-
-  }
-
+  comparisons <- split(comparisons, row(comparisons))
+
+  # map over pairwise_meta() for each condition but also need to
+  # group_by(feature, other_metadata)
+  group_variables <- group_variables[!group_variables %in% collapse_condition]
+  multi_groups <- syms(group_variables)
+  by_gene <- by_gene %>%
+    dplyr::group_by(.data[[gene_id_input]], !!!multi_groups) %>%
+    tidyr::nest() %>%
+    # required for each group to be ordered since furrr will map over rows with
+    # metacont()
+    dplyr::arrange(.data[[gene_id_input]], .by_group = TRUE)
+
+  # map pairwise_meta
+  by_primary_variable_values <- furrr::future_map(
+    comparisons,
+    ~pairwise_meta(
+      .,
+      by_gene,
+      primary_variable
+      )
+    )
+  by_primary_variable_values
+  # ADJUST PVALS
 }
diff --git a/man/meta_express.Rd b/man/meta_express.Rd
new file mode 100644
index 00000000..ccd759c0
--- /dev/null
+++ b/man/meta_express.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/functions.R
+\name{meta_express}
+\alias{meta_express}
+\title{Meta-analysis of continuous outcome data}
+\usage{
+meta_express(
+  clean_metadata,
+  sample_identifier,
+  count_df,
+  gene_id_input,
+  primary_variable,
+  group_variables,
+  collapse_condition
+)
+}
+\arguments{
+\item{clean_metadata}{A data frame with sample identifiers as rownames and variables as
+factors or numeric as determined by \code{"sageseqr::clean_covariates()"}.}
+
+\item{sample_identifier}{The name of the column with the sample identifiers that map to the gene counts data frame.}
+
+\item{count_df}{A counts data frame with sample identifiers as column names
+and gene Ids are rownames.}
+
+\item{gene_id_input}{Column name of the gene ids in the counts_id file.}
+
+\item{primary_variable}{The primary meta-analysis condition. The values of
+the `primary_variable` will make up the experimental and control groups of
+\code{"meta::metacont()"}.}
+
+\item{group_variables}{These variables are referenced when computing summary
+statistics for every gene. Therefore, a gene feature will have an observation
+for each group of variables (i.e. each gene feature across unique tissue
+types, sex and diagnosis).}
+
+\item{collapse_condition}{Compute one estimate across the `collapse_condition`
+which is a variabel in `clean_metadata`.}
+}
+\description{
+Meta-analysis by standardized mean difference with the Hedges' g method
+(1981).
+}
diff --git a/man/pairwise_meta.Rd b/man/pairwise_meta.Rd
new file mode 100644
index 00000000..36616919
--- /dev/null
+++ b/man/pairwise_meta.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/functions.R
+\name{pairwise_meta}
+\alias{pairwise_meta}
+\title{Apply metacont() between two comparison groups}
+\usage{
+pairwise_meta(comparison_values, by_gene, primary_variable)
+}
+\arguments{
+\item{comparison_values}{A vector of two values to compare from
+`primary_variable`.}
+
+\item{by_gene}{A grouped data frame with}
+
+\item{primary_variable}{The primary meta-analysis condition. The values of
+the `primary_variable` will make up the experimental and control groups of
+\code{"meta::metacont()"}.}
+}
+\description{
+\code{"sageseqr::meta_express()"} wraps \code{"sageseqr::pairwise_meta()"}
+since the input format for `by_gene` is constrained. `by_gene` is a dataframe
+grouped by gene feature and other metadata with gene summary statistics nested
+under "data".
+}
diff --git a/tests/testthat/test-pairwise_meta.R b/tests/testthat/test-pairwise_meta.R
new file mode 100644
index 00000000..67e5a4e4
--- /dev/null
+++ b/tests/testthat/test-pairwise_meta.R
@@ -0,0 +1,37 @@
+dat <- tibble::tribble(~tissue, ~diagnosis, ~sex, ~feature, ~n, ~mean, ~sd,
+               "cbe", "dx", "f", "ENSG00000225630", 47, -0.24, 1.256,
+               "pf", "dx", "m", "ENSG00000237973", 54, -0.23, 0.1,
+               "cbe", "dx", "m", "ENSG00000237973", 47, -0.4, 0.213,
+               "cbe", "dx", "f", "ENSG00000237973", 47, -0.277, 0.56,
+               "cbe", "other", "m", "ENSG00000237973", 72,1.003, 0.5,
+               "cbe", "ct", "f", "ENSG00000225630", 88, -2.0, 2.2,
+               "cbe", "ct", "f", "ENSG00000237973", 88, -1.6, 0.44,
+               "cbe", "other", "f", "ENSG00000225630", 72, 1.34, 1.01,
+               "pf", "other", "m", "ENSG00000237973", 66,0.98, 0.7,
+               "cbe", "ct", "m", "ENSG00000237973", 88,-1, 0.24,
+               "pf", "dx", "f", "ENSG00000237973", 54, -0.16, 0.5,
+               "pf", "dx", "f", "ENSG00000225630", 54, -0.3, 1.1,
+               "pf", "ct", "m", "ENSG00000237973", 57,-0.87, 0.2,
+               "pf", "other", "f", "ENSG00000225630", 66, 1.03, 0.98,
+               "pf", "other", "f", "ENSG00000237973", 66, 1.6, 0.8,
+               "pf", "ct", "f", "ENSG00000225630", 57, -2.06, 2.0,
+               "cbe", "other", "f", "ENSG00000237973", 72, 1.78, 0.7,
+               "pf", "ct", "f", "ENSG00000237973", 57, -1.67, 0.3
+)
+
+# pairwise_meta requires a constrained format when gene feature statistics are
+# nested by groups
+input <- dat %>%
+  dplyr::group_by(feature,sex,diagnosis) %>%
+  tidyr::nest() %>%
+  dplyr::arrange(feature, .by_group = TRUE)
+
+meta_stats <- pairwise_meta(c("dx", "ct"), input, "diagnosis")
+
+test_that("expect dimensions of output to be 3", {
+  expect_equal(3, dim(meta_stats)[1])
+})
+
+test_that("columns of output match expectations", {
+  expect_equal(colnames(meta_stats)[1:2], c("sex", "feature"))
+})

From 6f3ca563fdef54adffd2017740c26c8cbb02fade Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Fri, 9 Jul 2021 14:04:06 -0500
Subject: [PATCH 11/14] update namespace with new functions

---
 NAMESPACE | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/NAMESPACE b/NAMESPACE
index f71707d4..8a718a6d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,6 +9,7 @@ export(build_formula)
 export(clean_covariates)
 export(collapse_duplicate_hgnc_symbol)
 export(complete_path)
+export(compute_mean_sd)
 export(conditional_plot_sexcheck)
 export(convert_geneids)
 export(correlate_and_plot)
@@ -20,6 +21,8 @@ export(get_biomart)
 export(get_data)
 export(identify_outliers)
 export(mclustBIC)
+export(meta_express)
+export(pairwise_meta)
 export(plot_coexpression)
 export(plot_pcs_with_covariates)
 export(plot_sexcheck)

From 1ede2d7447ac1a96967a5cac75337ac94f37f319 Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Sun, 11 Jul 2021 12:53:15 -0500
Subject: [PATCH 12/14] ideas for improving efficiency and qc

---
 R/functions.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/R/functions.R b/R/functions.R
index 7c84f3b9..741fa30e 100644
--- a/R/functions.R
+++ b/R/functions.R
@@ -903,6 +903,9 @@ pairwise_meta <- function(comparison_values, by_gene, primary_variable) {
                "zval.fixed", "pval.fixed", "TE.random", "seTE.random",
                "lower.random", "upper.random", "zval.random", "pval.random",
                "Q", "tau", "H", "I2")])
+
+    # move bind_cols logic here.
+
   }
   # subset summaries by gene to relevant comparison_values
   to_analyze <- by_gene[by_gene[[primary_variable]] %in% comparison_values,]
@@ -911,12 +914,17 @@ pairwise_meta <- function(comparison_values, by_gene, primary_variable) {
   # split pairwise comparisons into experimental and control data frames
   experimental <- split[[1]]
   control <- split[[2]]
+
+  # add assert functions here that dims match
+
   # parallelize meta-analysis over every gene
+  # try pmap
   m <- furrr::future_map2_dfr(
     experimental$data,
     control$data,
     ~wrap_meta(.x, .y)
   )
+  # create index column
   # bind gene features and other metadata
   feature_vars <- colnames(by_gene)[
     !colnames(by_gene) %in% c(primary_variable, "data")

From 1b65b549dacf729c2ea5edd0d8c96b3516411025 Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Thu, 22 Jul 2021 15:32:32 -0500
Subject: [PATCH 13/14] add assert statements

---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 1c68eba4..3acb2fd4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,6 +22,7 @@ Suggests:
 RoxygenNote: 7.1.1
 biocViews:
 Imports:
+    assertthat,
     biomaRt,
     config,
     cqn,

From 44345821ed90e21aae062a8a8fdcdaefd891f13a Mon Sep 17 00:00:00 2001
From: kelshmo <kelsey.montgomery@sagebase.org>
Date: Thu, 22 Jul 2021 15:33:38 -0500
Subject: [PATCH 14/14] safer computations with inner joins

---
 NAMESPACE                           |   1 -
 R/functions.R                       | 178 ++++++++++++++--------------
 man/meta_express.Rd                 |  16 +--
 man/pairwise_meta.Rd                |   9 +-
 tests/testthat/test-pairwise_meta.R |  19 ++-
 5 files changed, 118 insertions(+), 105 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 8a718a6d..5c2b7795 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -22,7 +22,6 @@ export(get_data)
 export(identify_outliers)
 export(mclustBIC)
 export(meta_express)
-export(pairwise_meta)
 export(plot_coexpression)
 export(plot_pcs_with_covariates)
 export(plot_sexcheck)
diff --git a/R/functions.R b/R/functions.R
index 741fa30e..1c6bbb53 100644
--- a/R/functions.R
+++ b/R/functions.R
@@ -832,7 +832,8 @@ compute_mean_sd <- function(clean_metadata, sample_identifier, count_df, gene_id
   # function to compute rowwise mean and sd, remove missing values
   f_byrow <- function(x) {
     tibble::tibble(
-      mean = mean(x, na.rm = TRUE),
+      n = length(x),
+      mean = mean(as.numeric(x), na.rm = TRUE),
       sd = sd(x, na.rm = TRUE))
   }
   # add a check that all samples in the metadata are represented in the counts
@@ -846,31 +847,33 @@ compute_mean_sd <- function(clean_metadata, sample_identifier, count_df, gene_id
         "Not all samples in the metadata are present in the counts matrix. {dropped} is/are not analyzed."
         )
       )
-    samples <- clean_metadata[[sample_identifier]][
-      clean_metadata[[sample_identifier]] %in% colnames(count_df)
-    ]
+    # character vector required to subset matrix
+    samples <- as.character(
+      clean_metadata[[sample_identifier]][
+        clean_metadata[[sample_identifier]] %in% colnames(count_df)
+        ]
+      )
   } else{
-    samples <- clean_metadata[[sample_identifier]]
+    samples <- as.character(clean_metadata[[sample_identifier]])
   }
 
-  # subset expression matrix to include relevant samples
-  subset <- count_df[,samples]
-  # compute row means and standard deviation
-  compute <- subset %>%
-    # capture row elements with ... and concatenate into a vector
-    dplyr::transmute(
-      out = furrr::future_pmap(
-        ., ~ f_byrow(c(...))
-      )
-    ) %>%
-    tidyr::unnest(cols = c(out))
+  # create index of gene features
+  index <- dplyr::select(count_df, dplyr::one_of(gene_id_input))
 
-  # output a data frame with gene features, n, mean, and sd
-  dat <- data.frame(
-    feature = count_df[[gene_id_input]],
-    n = dim(clean_metadata)[1]
-  )
-  dplyr::bind_cols(dat, compute)
+  # compute row means and standard deviation
+  compute <- furrr::future_pmap_dfr(index, function(...) {
+    # capture row elements with ...
+    index <- list(...) %>% tibble::as_tibble()
+    # drop feature column so observations are numeric only
+    one_row <- index %>%
+      dplyr::inner_join(count_df) %>%
+      dplyr::select(-dplyr::one_of(gene_id_input))
+
+    out <- f_byrow(one_row)
+    # output a data frame with gene features, n, mean, and sd
+    return(dplyr::bind_cols(index, out))
+    })
+  return(compute)
 }
 #'Apply metacont() between two comparison groups
 #'
@@ -881,81 +884,74 @@ compute_mean_sd <- function(clean_metadata, sample_identifier, count_df, gene_id
 #'
 #' @param comparison_values A vector of two values to compare from
 #'`primary_variable`.
-#' @param by_gene A grouped data frame with
+#' @param by_gene A grouped data frame with summary statistics computed on each gene
+#' per condition nested. Required columns are n, mean and sd.
+#' @param index A data frame where every row contains a unique gene feature and
+#' other metadata to serve as the index for comparing summary statistics across
+#' groups defined by `primary_variable`.
 #' @inheritParams meta_express
-#'
-#' @export
-pairwise_meta <- function(comparison_values, by_gene, primary_variable) {
-  # wrap metacont() to return relevant components for each gene feature
-  wrap_meta <- function(df1, df2){
+pairwise_meta <- function(comparison_values, by_gene, primary_variable, index) {
+  assertthat::assert_that(length(comparison_values) == 2)
+
+  # subset by_gene to data matching comparison_values
+  input <- by_gene[by_gene[[primary_variable]] %in% comparison_values,]
+
+  # wrap metacont() to return relevant statistics for each gene feature. Gene
+  # feature corresponds to values in index
+  output <- furrr::future_pmap_dfr(index, function(...) {
+    index <- list(...) %>% tibble::as_tibble()
+    df <- purrr::map(
+      comparison_values,
+      function(c) {
+        index %>% dplyr::inner_join(input) %>% filter(.data[[primary_variable]] == c)
+        }
+      )
     m <- meta::metacont(
-      n.e = df1$n,
-      mean.e = df1$mean,
-      sd.e = df1$sd,
-      n.c = df2$n,
-      mean.c = df2$mean,
-      sd.c = df2$sd,
+      n.e = df[[1]]$data[[1]]$n,
+      mean.e = df[[1]]$data[[1]]$mean,
+      sd.e = df[[1]]$data[[1]]$sd,
+      n.c = df[[2]]$data[[1]]$n,
+      mean.c = df[[2]]$data[[1]]$mean,
+      sd.c = df[[2]]$data[[1]]$sd,
       sm = "SMD",
       method.smd = "Hedges",
       method.tau = "REML"
     )
-    return(m[c("TE.fixed", "seTE.fixed", "lower.fixed", "upper.fixed",
-               "zval.fixed", "pval.fixed", "TE.random", "seTE.random",
-               "lower.random", "upper.random", "zval.random", "pval.random",
-               "Q", "tau", "H", "I2")])
-
-    # move bind_cols logic here.
-
-  }
-  # subset summaries by gene to relevant comparison_values
-  to_analyze <- by_gene[by_gene[[primary_variable]] %in% comparison_values,]
-  split <- dplyr::group_by(to_analyze, .data[[primary_variable]]) %>%
-    dplyr::group_split()
-  # split pairwise comparisons into experimental and control data frames
-  experimental <- split[[1]]
-  control <- split[[2]]
-
-  # add assert functions here that dims match
-
-  # parallelize meta-analysis over every gene
-  # try pmap
-  m <- furrr::future_map2_dfr(
-    experimental$data,
-    control$data,
-    ~wrap_meta(.x, .y)
+    return(
+      bind_cols(
+        index,
+        m[c("TE.fixed", "seTE.fixed", "lower.fixed",
+            "upper.fixed", "zval.fixed", "pval.fixed",
+            "TE.random", "seTE.random", "lower.random",
+            "upper.random", "zval.random", "pval.random",
+            "Q", "tau", "H", "I2")]
+        )
+      )
+    }
   )
-  # create index column
-  # bind gene features and other metadata
-  feature_vars <- colnames(by_gene)[
-    !colnames(by_gene) %in% c(primary_variable, "data")
-  ]
-  gene_features <- dplyr::bind_rows(split) %>%
-    dplyr::select(!!!feature_vars) %>%
-    dplyr::distinct()
-  output <- dplyr::bind_cols(gene_features, m)
-  output
-}
+    return(output)
+  }
 #' Meta-analysis of continuous outcome data
 #'
 #' Meta-analysis by standardized mean difference with the Hedges' g method
 #' (1981).
 #'
-#' @param group_variables These variables are referenced when computing summary
-#' statistics for every gene. Therefore, a gene feature will have an observation
-#' for each group of variables (i.e. each gene feature across unique tissue
-#' types, sex and diagnosis).
+#' @param group_variables These variables, in addition to the `primary_variable`,
+#' are referenced when computing summary statistics for every gene. Therefore,
+#' a gene feature will have an observation for each group of variables (i.e.
+#' each gene feature by sex).
 #' @param primary_variable The primary meta-analysis condition. The values of
 #' the `primary_variable` will make up the experimental and control groups of
 #' \code{"meta::metacont()"}.
-#' @param collapse_condition Compute one estimate across the `collapse_condition`
-#' which is a variabel in `clean_metadata`.
+#' @param collapse_condition Compute one estimate for the meta-analysis across
+#' the `collapse_condition` which is a variable in `clean_metadata`.
 #' @inheritParams compute_mean_sd
 #' @export
 meta_express <- function(clean_metadata, sample_identifier, count_df,
-                         gene_id_input, primary_variable, group_variables,
-                         collapse_condition
+                         gene_id_input, primary_variable, group_variables = NULL,
+                         collapse_condition = NULL
                          ) {
-  vars <- c(primary_variable, group_variables)
+  vars <- c(primary_variable, group_variables, collapse_condition)
   #compute mean and standard deviation by gene for grouped variables of interest
   by_gene <- clean_metadata %>%
     dplyr::group_by_at(vars(dplyr::one_of(vars))) %>%
@@ -989,24 +985,30 @@ meta_express <- function(clean_metadata, sample_identifier, count_df,
 
   # map over pairwise_meta() for each condition but also need to
   # group_by(feature, other_metadata)
-  group_variables <- group_variables[!group_variables %in% collapse_condition]
-  multi_groups <- syms(group_variables)
+  vars <- vars[!vars %in% collapse_condition]
+  multi_groups <- syms(vars)
   by_gene <- by_gene %>%
     dplyr::group_by(.data[[gene_id_input]], !!!multi_groups) %>%
-    tidyr::nest() %>%
-    # required for each group to be ordered since furrr will map over rows with
-    # metacont()
-    dplyr::arrange(.data[[gene_id_input]], .by_group = TRUE)
+    tidyr::nest()
+
+  index <- by_gene[,c(gene_id_input, group_variables)] %>%
+    dplyr::distinct()
 
   # map pairwise_meta
   by_primary_variable_values <- furrr::future_map(
     comparisons,
-    ~pairwise_meta(
+    ~ pairwise_meta(
       .,
       by_gene,
-      primary_variable
+      primary_variable,
+      index
       )
     )
-  by_primary_variable_values
-  # ADJUST PVALS
+  # adjust p-values
+  output <- dplyr::mutate(
+    by_primary_variable_values,
+    fdr.fixed = p.adjust(pval.fixed, method = 'fdr'),
+    fdr.random = p.adjust(pval.random, method = 'fdr')
+    )
+  output
 }
diff --git a/man/meta_express.Rd b/man/meta_express.Rd
index ccd759c0..3bdaa0be 100644
--- a/man/meta_express.Rd
+++ b/man/meta_express.Rd
@@ -10,8 +10,8 @@ meta_express(
   count_df,
   gene_id_input,
   primary_variable,
-  group_variables,
-  collapse_condition
+  group_variables = NULL,
+  collapse_condition = NULL
 )
 }
 \arguments{
@@ -29,13 +29,13 @@ and gene Ids are rownames.}
 the `primary_variable` will make up the experimental and control groups of
 \code{"meta::metacont()"}.}
 
-\item{group_variables}{These variables are referenced when computing summary
-statistics for every gene. Therefore, a gene feature will have an observation
-for each group of variables (i.e. each gene feature across unique tissue
-types, sex and diagnosis).}
+\item{group_variables}{These variables, in addition to the `primary_variable`,
+are referenced when computing summary statistics for every gene. Therefore,
+a gene feature will have an observation for each group of variables (i.e.
+each gene feature by sex).}
 
-\item{collapse_condition}{Compute one estimate across the `collapse_condition`
-which is a variabel in `clean_metadata`.}
+\item{collapse_condition}{Compute one estimate for the meta-analysis across
+the `collapse_condition` which is a variable in `clean_metadata`.}
 }
 \description{
 Meta-analysis by standardized mean difference with the Hedges' g method
diff --git a/man/pairwise_meta.Rd b/man/pairwise_meta.Rd
index 36616919..b6b950f7 100644
--- a/man/pairwise_meta.Rd
+++ b/man/pairwise_meta.Rd
@@ -4,17 +4,22 @@
 \alias{pairwise_meta}
 \title{Apply metacont() between two comparison groups}
 \usage{
-pairwise_meta(comparison_values, by_gene, primary_variable)
+pairwise_meta(comparison_values, by_gene, primary_variable, index)
 }
 \arguments{
 \item{comparison_values}{A vector of two values to compare from
 `primary_variable`.}
 
-\item{by_gene}{A grouped data frame with}
+\item{by_gene}{A grouped data frame with summary statistics computed on each gene
+per condition nested. Required columns are n, mean and sd.}
 
 \item{primary_variable}{The primary meta-analysis condition. The values of
 the `primary_variable` will make up the experimental and control groups of
 \code{"meta::metacont()"}.}
+
+\item{index}{A data frame where every row contains a unique gene feature and
+other metadata to serve as the index for comparing summary statistics across
+groups defined by `primary_variable`.}
 }
 \description{
 \code{"sageseqr::meta_express()"} wraps \code{"sageseqr::pairwise_meta()"}
diff --git a/tests/testthat/test-pairwise_meta.R b/tests/testthat/test-pairwise_meta.R
index 67e5a4e4..2f999ecf 100644
--- a/tests/testthat/test-pairwise_meta.R
+++ b/tests/testthat/test-pairwise_meta.R
@@ -18,15 +18,16 @@ dat <- tibble::tribble(~tissue, ~diagnosis, ~sex, ~feature, ~n, ~mean, ~sd,
                "cbe", "other", "f", "ENSG00000237973", 72, 1.78, 0.7,
                "pf", "ct", "f", "ENSG00000237973", 57, -1.67, 0.3
 )
-
-# pairwise_meta requires a constrained format when gene feature statistics are
-# nested by groups
-input <- dat %>%
-  dplyr::group_by(feature,sex,diagnosis) %>%
+# pairwise_meta() requires a constrained format where summary statistics are
+# grouped by a condition of interest
+input <- dplyr::group_by(dat, diagnosis, sex, feature) %>%
   tidyr::nest() %>%
   dplyr::arrange(feature, .by_group = TRUE)
 
-meta_stats <- pairwise_meta(c("dx", "ct"), input, "diagnosis")
+index <- input[,c("sex", "feature")] %>%
+  dplyr::distinct()
+
+meta_stats <- pairwise_meta(c("dx", "ct"), input, "diagnosis", index)
 
 test_that("expect dimensions of output to be 3", {
   expect_equal(3, dim(meta_stats)[1])
@@ -35,3 +36,9 @@ test_that("expect dimensions of output to be 3", {
 test_that("columns of output match expectations", {
   expect_equal(colnames(meta_stats)[1:2], c("sex", "feature"))
 })
+
+test_that("fails when more than two comparison_values are passed",{
+  expect_error(
+    pairwise_meta(c("dx", "ct", "ct"), input, diagnosis, index)
+  )
+})