From e36acbc50920b0a9aa072134d8177bb16f0180d4 Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Wed, 16 Jun 2021 18:17:05 -0700 Subject: [PATCH 01/64] add plangeneration to functions --- R/functions.R | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) diff --git a/R/functions.R b/R/functions.R index 15f8a8fd..937d83cd 100644 --- a/R/functions.R +++ b/R/functions.R @@ -1,3 +1,188 @@ +#' Execute the drake RNA-seq plan +#' +#' This function wraps the \code{"drake::plan()"} and copies the R markdown +#' report to the user's working directory. +#' +#' @param metadata_id Synapse ID to clean metadata file with sample identifiers +#' in a column and variables of interest as column names. There cannot be any +#' missing values. +#' @param metadata_version Optionally, include Synapse file version number. If +#' omitted, current version will be downloaded. +#' @param counts_id Synapse ID to counts data frame with identifiers to the +#' metadata as column names and gene ids in a column. +#' @param counts_version Optionally, include Synapse file version number. +#' @param gene_id_input Column name of the gene ids in the counts_id file. +#' @param sample_id_input Column name of the sample ids in the metadata_id file. +#' @param factor_input Vector of factor variables. Variables must be present +#' in the metadata as column names. +#' @param continuous_input Vector of continuous variables. Variables must be +#' present in the metadata as column names. +#' @param biomart_id Synapse ID to biomart object. +#' @param biomart_version Optionally, include Synapse file version number. +#' @param primary_variable Baseline variable for model selection and variable to +#' stratify groups in the boxplot. +#' @param report_name Name of output markdown file. +#' @inheritParams plot_sexcheck +#' @inheritParams get_biomart +#' @inheritParams filter_genes +#' @inheritParams identify_outliers +#' @inheritParams store_results +#' @inheritParams prepare_results +#' @param skip_model If TRUE, does not run regression model. +#' @export +rnaseq_plan <- function(metadata_id, metadata_version, counts_id, + counts_version, gene_id_input, sample_id_input, + factor_input, continuous_input, + biomart_id, biomart_version, host, filters, + organism, conditions, cpm_threshold = 1, + conditions_threshold = 0.5, + primary_variable, sex_var, color, shape, size, + report_name, skip_model, parent_id, + rownames, config_file) { + # Copies markdown to user's working directory + if (!file.exists("sageseqr-report.Rmd")) { + fs::file_copy(system.file("sageseqr-report.Rmd", package = "sageseqr"), + new_path = getwd()) + } + + #drake::drake_plan( + list( + targets::tar_manifest( + import_metadata, + get_data(!!metadata_id, + !!metadata_version) + ), + targets::tar_manifest( + import_counts, + get_data(!!counts_id, + !!counts_version) + ), + targets::tar_manifest( + counts, + tibble::column_to_rownames(import_counts, + var = !!gene_id_input) + ), + targets::tar_manifest( + clean_md, + clean_covariates(md = import_metadata, + factors = !!factor_input, + continuous = !!continuous_input, + sample_identifier = !!sample_id_input) + ), + targets::tar_manifest( + biomart_results, + get_biomart(count_df = counts, + synid = !!biomart_id, + version = !!biomart_version, + filters = !!filters, + host = !!host, + organism = !!organism) + ), + targets::tar_manifest( + filtered_counts, + filter_genes(clean_metadata = clean_md, + count_df = counts, + conditions = !!conditions, + cpm_threshold = !!cpm_threshold, + conditions_threshold = !!conditions_threshold) + ), + targets::tar_manifest( + biotypes, + summarize_biotypes(filtered_counts, biomart_results) + ), + targets::tar_manifest( + cqn_counts, + cqn(filtered_counts, biomart_results) + ), + targets::tar_manifest( + gene_coexpression, + plot_coexpression(cqn_counts) + ), + + targets::tar_manifest( + boxplots, + boxplot_vars(md = clean_md, + include_vars = !!continuous_input, + x_var = !!primary_variable) + ), + targets::tar_manifest( + sex_plot, + conditional_plot_sexcheck(clean_md, + counts, + biomart_results, + !!sex_var) + ), + targets::tar_manifest(sex_plot_pca, + plot_sexcheck_pca( + clean_md, + counts, + biomart_results, + !!sex_var) + ), + targets::tar_manifest( + correlation_plot, + get_association_statistics(clean_md) + ), + targets::tar_manifest( + significant_covariates_plot, + run_pca_and_plot_correlations(cqn_counts$E,clean_md) + ), + targets::tar_manifest( + outliers, + identify_outliers(filtered_counts, clean_md, !!color, !!shape, !!size) + ), + targets::tar_manifest( + model, + stepwise_regression( + clean_md, + primary_variable = !!primary_variable, + cqn_counts = cqn_counts, + skip = !!skip_model + ) + ), + targets::tar_manifest( + report, + rmarkdown::render( + #drake::knitr_in("sageseqr-report.Rmd"), + #output_file = drake::file_out( + #!!glue::glue("{getwd()}/{report_name}.html") + #), + input, + tarchetypes::tar_render(name="sageseqr-report.Rmd", + path=glue::glue("{getwd()}", + '/inst/sageseqr-report.Rmd') + ) + ) + ), + targets::tar_manifest( + document_inputs, + provenance_helper( + !!metadata_id, !!counts_id, + !!metadata_version, !!counts_version, + !!biomart_id, !!biomart_version + ) + ), + targets::tar_manifest( + Synapse, + store_results( + parent_id = !!parent_id, + cqn_counts = cqn_counts$counts, + clean_md = clean_md, + filtered_counts = filtered_counts, + biomart_results = biomart_results, + rownames = !!rownames, + syn_names = list("Covariates", "Filtered counts (greater than 1cpm)", + "BioMart query results", "Normalized counts (CQN)"), + data_names = list("clean_md", "filtered_counts", "biomart_results", + "cqn_counts"), + inputs = document_inputs, + activity_provenance = "Analyze RNA-seq data with sageseqr pkg", + config_file = !!config_file, + report_name = !!report_name + ) + ) + ) +} #'Detect file type and download data #' #'This function takes synIds and version number to download any From 319622ffacbaa68658b68d9154a685afba463203 Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Wed, 16 Jun 2021 18:29:25 -0700 Subject: [PATCH 02/64] redo plan --- R/plan.R | 173 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 116 insertions(+), 57 deletions(-) diff --git a/R/plan.R b/R/plan.R index b0ac9ff8..e74fa0df 100644 --- a/R/plan.R +++ b/R/plan.R @@ -45,82 +45,141 @@ rnaseq_plan <- function(metadata_id, metadata_version, counts_id, new_path = getwd()) } - drake::drake_plan( - import_metadata = get_data(!!metadata_id, - !!metadata_version), - import_counts = get_data(!!counts_id, - !!counts_version), - counts = tibble::column_to_rownames(import_counts, - var = !!gene_id_input), - clean_md = clean_covariates(md = import_metadata, + #drake::drake_plan( + list( + targets::tar_manifest( + import_metadata, + get_data(!!metadata_id, + !!metadata_version) + ), + targets::tar_manifest( + import_counts, + get_data(!!counts_id, + !!counts_version) + ), + targets::tar_manifest( + counts, + tibble::column_to_rownames(import_counts, + var = !!gene_id_input) + ), + targets::tar_manifest( + clean_md, + clean_covariates(md = import_metadata, factors = !!factor_input, continuous = !!continuous_input, - sample_identifier = !!sample_id_input), - biomart_results = get_biomart(count_df = counts, + sample_identifier = !!sample_id_input) + ), + targets::tar_manifest( + biomart_results, + get_biomart(count_df = counts, synid = !!biomart_id, version = !!biomart_version, filters = !!filters, host = !!host, - organism = !!organism), - filtered_counts = filter_genes(clean_metadata = clean_md, + organism = !!organism) + ), + targets::tar_manifest( + filtered_counts, + filter_genes(clean_metadata = clean_md, count_df = counts, conditions = !!conditions, cpm_threshold = !!cpm_threshold, - conditions_threshold = !!conditions_threshold), - biotypes = summarize_biotypes(filtered_counts, biomart_results), - cqn_counts = cqn(filtered_counts, - biomart_results), - gene_coexpression = plot_coexpression(cqn_counts), - boxplots = boxplot_vars(md = clean_md, + conditions_threshold = !!conditions_threshold) + ), + targets::tar_manifest( + biotypes, + summarize_biotypes(filtered_counts, biomart_results) + ), + targets::tar_manifest( + cqn_counts, + cqn(filtered_counts, biomart_results) + ), + targets::tar_manifest( + gene_coexpression, + plot_coexpression(cqn_counts) + ), + + targets::tar_manifest( + boxplots, + boxplot_vars(md = clean_md, include_vars = !!continuous_input, - x_var = !!primary_variable), - sex_plot = conditional_plot_sexcheck(clean_md, + x_var = !!primary_variable) + ), + targets::tar_manifest( + sex_plot, + conditional_plot_sexcheck(clean_md, counts, biomart_results, - !!sex_var), - sex_plot_pca = plot_sexcheck_pca( - clean_md, - counts, - biomart_results, - !!sex_var), - correlation_plot = get_association_statistics(clean_md), - significant_covariates_plot = run_pca_and_plot_correlations(cqn_counts$E, - clean_md), - outliers = identify_outliers(filtered_counts, clean_md, !!color, !!shape, - !!size), - model = stepwise_regression( + !!sex_var) + ), + targets::tar_manifest(sex_plot_pca, + plot_sexcheck_pca( + clean_md, + counts, + biomart_results, + !!sex_var) + ), + targets::tar_manifest( + correlation_plot, + get_association_statistics(clean_md) + ), + targets::tar_manifest( + significant_covariates_plot, + run_pca_and_plot_correlations(cqn_counts$E,clean_md) + ), + targets::tar_manifest( + outliers, + identify_outliers(filtered_counts, clean_md, !!color, !!shape, !!size) + ), + targets::tar_manifest( + model, + stepwise_regression( clean_md, primary_variable = !!primary_variable, cqn_counts = cqn_counts, skip = !!skip_model - ), - report = rmarkdown::render( - drake::knitr_in("sageseqr-report.Rmd"), - output_file = drake::file_out( - !!glue::glue("{getwd()}/{report_name}.html") - ), - output_dir = "." - ), - document_inputs = provenance_helper( - !!metadata_id, !!counts_id, - !!metadata_version, !!counts_version, - !!biomart_id, !!biomart_version - ), - Synapse = store_results( - parent_id = !!parent_id, - cqn_counts = cqn_counts$counts, - clean_md = clean_md, - filtered_counts = filtered_counts, - biomart_results = biomart_results, - rownames = !!rownames, - syn_names = list("Covariates", "Filtered counts (greater than 1cpm)", - "BioMart query results", "Normalized counts (CQN)"), - data_names = list("clean_md", "filtered_counts", "biomart_results", - "cqn_counts"), + ) + ), + targets::tar_manifest( + report, + rmarkdown::render( + #drake::knitr_in("sageseqr-report.Rmd"), + #output_file = drake::file_out( + #!!glue::glue("{getwd()}/{report_name}.html") + #), + input, + tarchetypes::tar_render(name="sageseqr-report.Rmd", + path=glue::glue("{getwd()}", + '/inst/sageseqr-report.Rmd') + ) + ) + ), + targets::tar_manifest( + document_inputs, + provenance_helper( + !!metadata_id, !!counts_id, + !!metadata_version, !!counts_version, + !!biomart_id, !!biomart_version + ) + ), + targets::tar_manifest( + Synapse, + store_results( + parent_id = !!parent_id, + cqn_counts = cqn_counts$counts, + clean_md = clean_md, + filtered_counts = filtered_counts, + biomart_results = biomart_results, + rownames = !!rownames, + syn_names = list("Covariates", "Filtered counts (greater than 1cpm)", + "BioMart query results", "Normalized counts (CQN)"), + data_names = list("clean_md", "filtered_counts", "biomart_results", + "cqn_counts"), inputs = document_inputs, activity_provenance = "Analyze RNA-seq data with sageseqr pkg", config_file = !!config_file, report_name = !!report_name ) ) - } + ) +} From 7a690d0f4c16f5c5db59c62b74b367e2fea61441 Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Wed, 16 Jun 2021 21:14:47 -0700 Subject: [PATCH 03/64] targets targetlist --- R/functions.R | 186 +------------------------------------------------- R/plan.R | 38 +++++------ 2 files changed, 20 insertions(+), 204 deletions(-) diff --git a/R/functions.R b/R/functions.R index 937d83cd..386de70b 100644 --- a/R/functions.R +++ b/R/functions.R @@ -1,188 +1,4 @@ -#' Execute the drake RNA-seq plan -#' -#' This function wraps the \code{"drake::plan()"} and copies the R markdown -#' report to the user's working directory. -#' -#' @param metadata_id Synapse ID to clean metadata file with sample identifiers -#' in a column and variables of interest as column names. There cannot be any -#' missing values. -#' @param metadata_version Optionally, include Synapse file version number. If -#' omitted, current version will be downloaded. -#' @param counts_id Synapse ID to counts data frame with identifiers to the -#' metadata as column names and gene ids in a column. -#' @param counts_version Optionally, include Synapse file version number. -#' @param gene_id_input Column name of the gene ids in the counts_id file. -#' @param sample_id_input Column name of the sample ids in the metadata_id file. -#' @param factor_input Vector of factor variables. Variables must be present -#' in the metadata as column names. -#' @param continuous_input Vector of continuous variables. Variables must be -#' present in the metadata as column names. -#' @param biomart_id Synapse ID to biomart object. -#' @param biomart_version Optionally, include Synapse file version number. -#' @param primary_variable Baseline variable for model selection and variable to -#' stratify groups in the boxplot. -#' @param report_name Name of output markdown file. -#' @inheritParams plot_sexcheck -#' @inheritParams get_biomart -#' @inheritParams filter_genes -#' @inheritParams identify_outliers -#' @inheritParams store_results -#' @inheritParams prepare_results -#' @param skip_model If TRUE, does not run regression model. -#' @export -rnaseq_plan <- function(metadata_id, metadata_version, counts_id, - counts_version, gene_id_input, sample_id_input, - factor_input, continuous_input, - biomart_id, biomart_version, host, filters, - organism, conditions, cpm_threshold = 1, - conditions_threshold = 0.5, - primary_variable, sex_var, color, shape, size, - report_name, skip_model, parent_id, - rownames, config_file) { - # Copies markdown to user's working directory - if (!file.exists("sageseqr-report.Rmd")) { - fs::file_copy(system.file("sageseqr-report.Rmd", package = "sageseqr"), - new_path = getwd()) - } - - #drake::drake_plan( - list( - targets::tar_manifest( - import_metadata, - get_data(!!metadata_id, - !!metadata_version) - ), - targets::tar_manifest( - import_counts, - get_data(!!counts_id, - !!counts_version) - ), - targets::tar_manifest( - counts, - tibble::column_to_rownames(import_counts, - var = !!gene_id_input) - ), - targets::tar_manifest( - clean_md, - clean_covariates(md = import_metadata, - factors = !!factor_input, - continuous = !!continuous_input, - sample_identifier = !!sample_id_input) - ), - targets::tar_manifest( - biomart_results, - get_biomart(count_df = counts, - synid = !!biomart_id, - version = !!biomart_version, - filters = !!filters, - host = !!host, - organism = !!organism) - ), - targets::tar_manifest( - filtered_counts, - filter_genes(clean_metadata = clean_md, - count_df = counts, - conditions = !!conditions, - cpm_threshold = !!cpm_threshold, - conditions_threshold = !!conditions_threshold) - ), - targets::tar_manifest( - biotypes, - summarize_biotypes(filtered_counts, biomart_results) - ), - targets::tar_manifest( - cqn_counts, - cqn(filtered_counts, biomart_results) - ), - targets::tar_manifest( - gene_coexpression, - plot_coexpression(cqn_counts) - ), - - targets::tar_manifest( - boxplots, - boxplot_vars(md = clean_md, - include_vars = !!continuous_input, - x_var = !!primary_variable) - ), - targets::tar_manifest( - sex_plot, - conditional_plot_sexcheck(clean_md, - counts, - biomart_results, - !!sex_var) - ), - targets::tar_manifest(sex_plot_pca, - plot_sexcheck_pca( - clean_md, - counts, - biomart_results, - !!sex_var) - ), - targets::tar_manifest( - correlation_plot, - get_association_statistics(clean_md) - ), - targets::tar_manifest( - significant_covariates_plot, - run_pca_and_plot_correlations(cqn_counts$E,clean_md) - ), - targets::tar_manifest( - outliers, - identify_outliers(filtered_counts, clean_md, !!color, !!shape, !!size) - ), - targets::tar_manifest( - model, - stepwise_regression( - clean_md, - primary_variable = !!primary_variable, - cqn_counts = cqn_counts, - skip = !!skip_model - ) - ), - targets::tar_manifest( - report, - rmarkdown::render( - #drake::knitr_in("sageseqr-report.Rmd"), - #output_file = drake::file_out( - #!!glue::glue("{getwd()}/{report_name}.html") - #), - input, - tarchetypes::tar_render(name="sageseqr-report.Rmd", - path=glue::glue("{getwd()}", - '/inst/sageseqr-report.Rmd') - ) - ) - ), - targets::tar_manifest( - document_inputs, - provenance_helper( - !!metadata_id, !!counts_id, - !!metadata_version, !!counts_version, - !!biomart_id, !!biomart_version - ) - ), - targets::tar_manifest( - Synapse, - store_results( - parent_id = !!parent_id, - cqn_counts = cqn_counts$counts, - clean_md = clean_md, - filtered_counts = filtered_counts, - biomart_results = biomart_results, - rownames = !!rownames, - syn_names = list("Covariates", "Filtered counts (greater than 1cpm)", - "BioMart query results", "Normalized counts (CQN)"), - data_names = list("clean_md", "filtered_counts", "biomart_results", - "cqn_counts"), - inputs = document_inputs, - activity_provenance = "Analyze RNA-seq data with sageseqr pkg", - config_file = !!config_file, - report_name = !!report_name - ) - ) - ) -} + #'Detect file type and download data #' #'This function takes synIds and version number to download any diff --git a/R/plan.R b/R/plan.R index e74fa0df..91955360 100644 --- a/R/plan.R +++ b/R/plan.R @@ -47,29 +47,29 @@ rnaseq_plan <- function(metadata_id, metadata_version, counts_id, #drake::drake_plan( list( - targets::tar_manifest( + targets::tar_target( import_metadata, get_data(!!metadata_id, !!metadata_version) ), - targets::tar_manifest( + targets::tar_target( import_counts, get_data(!!counts_id, !!counts_version) ), - targets::tar_manifest( + targets::tar_target( counts, tibble::column_to_rownames(import_counts, var = !!gene_id_input) ), - targets::tar_manifest( + targets::tar_target( clean_md, clean_covariates(md = import_metadata, factors = !!factor_input, continuous = !!continuous_input, sample_identifier = !!sample_id_input) ), - targets::tar_manifest( + targets::tar_target( biomart_results, get_biomart(count_df = counts, synid = !!biomart_id, @@ -78,7 +78,7 @@ rnaseq_plan <- function(metadata_id, metadata_version, counts_id, host = !!host, organism = !!organism) ), - targets::tar_manifest( + targets::tar_target( filtered_counts, filter_genes(clean_metadata = clean_md, count_df = counts, @@ -86,52 +86,52 @@ rnaseq_plan <- function(metadata_id, metadata_version, counts_id, cpm_threshold = !!cpm_threshold, conditions_threshold = !!conditions_threshold) ), - targets::tar_manifest( + targets::tar_target( biotypes, summarize_biotypes(filtered_counts, biomart_results) ), - targets::tar_manifest( + targets::tar_target( cqn_counts, cqn(filtered_counts, biomart_results) ), - targets::tar_manifest( + targets::tar_target( gene_coexpression, plot_coexpression(cqn_counts) ), - targets::tar_manifest( + targets::tar_target( boxplots, boxplot_vars(md = clean_md, include_vars = !!continuous_input, x_var = !!primary_variable) ), - targets::tar_manifest( + targets::tar_target( sex_plot, conditional_plot_sexcheck(clean_md, counts, biomart_results, !!sex_var) ), - targets::tar_manifest(sex_plot_pca, + targets::tar_target(sex_plot_pca, plot_sexcheck_pca( clean_md, counts, biomart_results, !!sex_var) ), - targets::tar_manifest( + targets::tar_target( correlation_plot, get_association_statistics(clean_md) ), - targets::tar_manifest( + targets::tar_target( significant_covariates_plot, run_pca_and_plot_correlations(cqn_counts$E,clean_md) ), - targets::tar_manifest( + targets::tar_target( outliers, identify_outliers(filtered_counts, clean_md, !!color, !!shape, !!size) ), - targets::tar_manifest( + targets::tar_target( model, stepwise_regression( clean_md, @@ -140,7 +140,7 @@ rnaseq_plan <- function(metadata_id, metadata_version, counts_id, skip = !!skip_model ) ), - targets::tar_manifest( + targets::tar_target( report, rmarkdown::render( #drake::knitr_in("sageseqr-report.Rmd"), @@ -154,7 +154,7 @@ rnaseq_plan <- function(metadata_id, metadata_version, counts_id, ) ) ), - targets::tar_manifest( + targets::tar_target( document_inputs, provenance_helper( !!metadata_id, !!counts_id, @@ -162,7 +162,7 @@ rnaseq_plan <- function(metadata_id, metadata_version, counts_id, !!biomart_id, !!biomart_version ) ), - targets::tar_manifest( + targets::tar_target( Synapse, store_results( parent_id = !!parent_id, From ec7d0956b8f09faa48c61e9a61c0e0ab3d6521ce Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Thu, 17 Jun 2021 14:30:43 -0700 Subject: [PATCH 04/64] Changed plan to targets --- .gitignore | 4 + DESCRIPTION | 2 + R/functions.R | 1 - _targets_template.R | 152 +++++++++++++++++++++++++++++++++++++ vignettes/run-the-plan.Rmd | 62 ++++++--------- 5 files changed, 181 insertions(+), 40 deletions(-) create mode 100644 _targets_template.R diff --git a/.gitignore b/.gitignore index e3c9491b..7bc650f5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +vignettes/sageseqr-report.Rmd +sageseqr-report.Rmd +.Rhistory +_targets.R .Rproj.user *.txt *.tsv diff --git a/DESCRIPTION b/DESCRIPTION index 62f8ed11..bd3a0b63 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,6 +56,8 @@ Imports: stats, stringr, synapser, + tarchetypes, + targets, tidyr, tibble, utils, diff --git a/R/functions.R b/R/functions.R index 386de70b..15f8a8fd 100644 --- a/R/functions.R +++ b/R/functions.R @@ -1,4 +1,3 @@ - #'Detect file type and download data #' #'This function takes synIds and version number to download any diff --git a/_targets_template.R b/_targets_template.R new file mode 100644 index 00000000..124cc940 --- /dev/null +++ b/_targets_template.R @@ -0,0 +1,152 @@ +#Set the cofiguration profile here: +Sys.setenv(R_CONFIG_ACTIVE = "INSERT_CONFIG_PROFILE_HERE") + +if (!file.exists("sageseqr-report.Rmd")) { + fs::file_copy(system.file("sageseqr-report.Rmd", package = "sageseqr"), + new_path = getwd()) +} + +#Build Plan +list( + targets::tar_target( + import_metadata, + get_data(!!config::get("metadata")$synID, + !!config::get("metadata")$version) + ), + targets::tar_target( + import_counts, + get_data(!!config::get("counts")$synID, + !!config::get("counts")$version) + ), + targets::tar_target( + counts, + tibble::column_to_rownames(import_counts, + var = !!config::get("counts")$`gene id`) + ), + targets::tar_target( + clean_md, + clean_covariates(md = import_metadata, + factors = !!config::get("factors"), + continuous = !!config::get("continuous"), + sample_identifier = !!config::get("metadata")$`sample id`) + ), + targets::tar_target( + biomart_results, + get_biomart(count_df = counts, + synid = !!config::get("biomart")$synID, + version = !!config::get("biomart")$version, + filters = !!config::get("biomart")$filters, + host = !!config::get("biomart")$host, + organism = !!config::get("biomart")$organism) + ), + targets::tar_target( + filtered_counts, + filter_genes(clean_metadata = clean_md, + count_df = counts, + conditions = !!config::get("conditions"), + cpm_threshold = !!config::get("cpm threshold"), + conditions_threshold = !!config::get("percent threshold")) + ), + targets::tar_target( + biotypes, + summarize_biotypes(filtered_counts, biomart_results) + ), + targets::tar_target( + cqn_counts, + cqn(filtered_counts, biomart_results) + ), + targets::tar_target( + gene_coexpression, + plot_coexpression(cqn_counts) + ), + + targets::tar_target( + boxplots, + boxplot_vars(md = clean_md, + include_vars = !!config::get("continuous"), + x_var = !!config::get("x_var")) + ), + targets::tar_target( + sex_plot, + conditional_plot_sexcheck(clean_md, + counts, + biomart_results, + !!config::get("sex check")) + ), + targets::tar_target(sex_plot_pca, + plot_sexcheck_pca( + clean_md, + counts, + biomart_results, + !!config::get("sex check")) + ), + targets::tar_target( + correlation_plot, + get_association_statistics(clean_md) + ), + targets::tar_target( + significant_covariates_plot, + run_pca_and_plot_correlations(cqn_counts$E,clean_md) + ), + targets::tar_target( + outliers, + identify_outliers(filtered_counts, + clean_md, + !!config::get("dimensions")$color, + !!config::get("dimensions")$shape, + !!config::get("dimensions")$size + ) + ), + targets::tar_target( + model, + stepwise_regression( + clean_md, + primary_variable = !!config::get("x_var"), + cqn_counts = cqn_counts, + skip = !!config::get("skip model") + ) + ), + targets::tar_target( + report, + rmarkdown::render( + input, + tarchetypes::tar_render(name="sageseqr-report.Rmd", + path=glue::glue("{getwd()}", + '/inst/sageseqr-report.Rmd') + ) + ) + ), + targets::tar_target( + document_inputs, + provenance_helper( + !!config::get("metadata")$synID, !!config::get("counts")$synID, + !!config::get("metadata")$version, !!config::get("counts")$version, + !!config::get("biomart")$synID, !!config::get("biomart")$version + ) + ), + targets::tar_target( + Synapse, + store_results( + parent_id = !!config::get("store output"), + cqn_counts = cqn_counts$counts, + clean_md = clean_md, + filtered_counts = filtered_counts, + biomart_results = biomart_results, + rownames = !!list( + config::get("metadata")$`sample id`, + config::get("counts")$`gene id`, + config::get("biomart")$filters, + config::get("counts")$`gene id` + ), + syn_names = list("Covariates", "Filtered counts (greater than 1cpm)", + "BioMart query results", "Normalized counts (CQN)"), + data_names = list("clean_md", "filtered_counts", "biomart_results", + "cqn_counts"), + inputs = document_inputs, + activity_provenance = "Analyze RNA-seq data with sageseqr pkg", + config_file = "config.yml", + report_name = !!config::get("report") + ) + ) + +) \ No newline at end of file diff --git a/vignettes/run-the-plan.Rmd b/vignettes/run-the-plan.Rmd index a899ac49..2b0bad38 100644 --- a/vignettes/run-the-plan.Rmd +++ b/vignettes/run-the-plan.Rmd @@ -20,56 +20,40 @@ knitr::opts_chunk$set( 2. Specify the active configuration by setting `R_CONFIG_ACTIVE`. ```{r config-setup} -Sys.setenv(R_CONFIG_ACTIVE = "default") +profile_set <- "default" +Sys.setenv(R_CONFIG_ACTIVE = profile_set) ``` 3. Load the `sageseqr` library and login to [Synapse](https://www.synapse.org/). `rnaseq_plan()` calls the arguments from the config file and creates the `drake` plan. Execute `drake::make(plan)` to compute. Run this code from your project root. ```{r run-plan} library(sageseqr) - # Login to Synapse. Make a Synapse account and use synapser to login: https://r-docs.synapse.org/articles/manageSynapseCredentials.html synapser::synLogin() # Run the analysis -plan <- sageseqr::rnaseq_plan( - metadata_id = config::get("metadata")$synID, - metadata_version = config::get("metadata")$version, - counts_id = config::get("counts")$synID, - counts_version = config::get("counts")$version, - gene_id_input = config::get("counts")$`gene id`, - sample_id_input = config::get("metadata")$`sample id`, - factor_input = config::get("factors"), - continuous_input = config::get("continuous"), - biomart_id = config::get("biomart")$synID, - biomart_version = config::get("biomart")$version, - filters = config::get("biomart")$filters, - host = config::get("biomart")$host, - organism = config::get("biomart")$organism, - conditions = config::get("conditions"), - cpm_threshold = config::get("cpm threshold"), - conditions_threshold = config::get("percent threshold"), - x_var_for_plot = config::get("x_var"), - sex_var = config::get("sex check"), - color = config::get("dimensions")$color, - shape = config::get("dimensions")$shape, - size = config::get("dimensions")$size, - report_name = config::get("report"), - skip_model = config::get("skip model"), - parent_id = config::get("store output"), - rownames = list( - config::get("metadata")$`sample id`, - config::get("counts")$`gene id`, - config::get("biomart")$filters, - config::get("counts")$`gene id` - ), - config_file = "config.yml" +replace_command <- paste('system( "sed ', + paste0('s/INSERT_CONFIG_PROFILE_HERE/', + profile_set, + "/g" + ), + " ../_targets_template.R > ../_targets.R\")", + sep = "'" + ) + +eval(parse( text= + glue::glue(replace_command) + ) ) -drake::make( - plan, - envir = getNamespace("sageseqr") - ) +setwd('../') +targets::tar_manifest(fields="command") +targets::tar_glimpse() + +#drake::make( +# plan, +# envir = getNamespace("sageseqr") +# ) ``` 4. Visualize the results of your work. @@ -80,4 +64,4 @@ drake::vis_drake_graph( envir = getNamespace("sageseqr"), targets_only = TRUE ) -``` +``` \ No newline at end of file From c45e41d7a48d95f57353acc3f1650beb211779eb Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Fri, 18 Jun 2021 12:15:51 -0700 Subject: [PATCH 05/64] test git code --- Nonsense.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 Nonsense.R diff --git a/Nonsense.R b/Nonsense.R new file mode 100644 index 00000000..e69de29b From e005eb7de8ae613a31f445dc6d5f6631f57fb03d Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Fri, 18 Jun 2021 12:19:54 -0700 Subject: [PATCH 06/64] test git code --- Nonsense.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 Nonsense.R diff --git a/Nonsense.R b/Nonsense.R deleted file mode 100644 index e69de29b..00000000 From 01f83df642360e7ebd8ca94cfc84841df862bf30 Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Fri, 18 Jun 2021 14:03:28 -0700 Subject: [PATCH 07/64] plan almost there --- _targets_template.R | 6 +++++- vignettes/run-the-plan.Rmd | 10 +++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/_targets_template.R b/_targets_template.R index 124cc940..c00b70a9 100644 --- a/_targets_template.R +++ b/_targets_template.R @@ -1,3 +1,8 @@ +library(sageseqr) + +# Login to Synapse. Make a Synapse account and use synapser to login: https://r-docs.synapse.org/articles/manageSynapseCredentials.html +synapser::synLogin() + #Set the cofiguration profile here: Sys.setenv(R_CONFIG_ACTIVE = "INSERT_CONFIG_PROFILE_HERE") @@ -109,7 +114,6 @@ list( targets::tar_target( report, rmarkdown::render( - input, tarchetypes::tar_render(name="sageseqr-report.Rmd", path=glue::glue("{getwd()}", '/inst/sageseqr-report.Rmd') diff --git a/vignettes/run-the-plan.Rmd b/vignettes/run-the-plan.Rmd index 2b0bad38..462c9747 100644 --- a/vignettes/run-the-plan.Rmd +++ b/vignettes/run-the-plan.Rmd @@ -48,7 +48,8 @@ eval(parse( text= setwd('../') targets::tar_manifest(fields="command") -targets::tar_glimpse() + +targets::tar_make() #drake::make( # plan, @@ -59,9 +60,8 @@ targets::tar_glimpse() 4. Visualize the results of your work. ```{r visualize} -drake::vis_drake_graph( - plan, - envir = getNamespace("sageseqr"), +setwd('../') +targets::tar_glimpse( targets_only = TRUE - ) +) ``` \ No newline at end of file From 1a9734df60a09f3f194bad3af0d89ac0a75ccbe1 Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Tue, 22 Jun 2021 13:08:45 -0700 Subject: [PATCH 08/64] replace vis-func drake::cancel_if --- R/visualization-functions.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/visualization-functions.R b/R/visualization-functions.R index 685a13b0..a4318841 100644 --- a/R/visualization-functions.R +++ b/R/visualization-functions.R @@ -770,12 +770,13 @@ plot_sexcheck <- function(clean_metadata, count_df, biomart_results, sex_var) { #' Conditionally wrap plot_sexcheck for drake #' #' Work around to expose plot_sexcheck to testing and export but also leverage -#' drakes function for skipping targets conditionally (see \code{"drake::cancel_if()"}). +#' targets function for skipping targets conditionally (see \code{"targets::tar_cancel()"}). #' @inheritParams plot_sexcheck #' @inheritParams get_biomart #' @export conditional_plot_sexcheck <- function(clean_metadata, count_df, biomart_results, sex_var) { - drake::cancel_if(is.null(sex_var)) + #drake::cancel_if(is.null(sex_var)) + targets::tar_cancel(is.null(sex_var)) plot_sexcheck(clean_metadata, count_df, biomart_results, sex_var) } #' Explore samples that are outliers From 524786de6318137cbb8cb32d7de9ec2b9db3bdbf Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Tue, 22 Jun 2021 14:22:53 -0700 Subject: [PATCH 09/64] Transfered Files --- .gitignore | 2 + _targets_template.R => R/_targets_template.R | 4 +- R/functions.R | 12 +- R/plan.R | 185 ------------------- R/visualization-functions.R | 3 +- README.md | 8 +- inst/sageseqr-report.Rmd | 30 +-- vignettes/run-the-plan.Rmd | 12 +- 8 files changed, 33 insertions(+), 223 deletions(-) rename _targets_template.R => R/_targets_template.R (99%) delete mode 100644 R/plan.R diff --git a/.gitignore b/.gitignore index 7bc650f5..92ea4efc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +_targets.R +_targets/ vignettes/sageseqr-report.Rmd sageseqr-report.Rmd .Rhistory diff --git a/_targets_template.R b/R/_targets_template.R similarity index 99% rename from _targets_template.R rename to R/_targets_template.R index c00b70a9..d630c668 100644 --- a/_targets_template.R +++ b/R/_targets_template.R @@ -113,12 +113,10 @@ list( ), targets::tar_target( report, - rmarkdown::render( tarchetypes::tar_render(name="sageseqr-report.Rmd", path=glue::glue("{getwd()}", '/inst/sageseqr-report.Rmd') ) - ) ), targets::tar_target( document_inputs, @@ -153,4 +151,4 @@ list( ) ) -) \ No newline at end of file +) diff --git a/R/functions.R b/R/functions.R index 15f8a8fd..f3340842 100644 --- a/R/functions.R +++ b/R/functions.R @@ -593,7 +593,7 @@ wrap_de <- function(conditions, filtered_counts, cqn_counts, md, #' @inheritParams differential_expression #' @inheritParams build_formula #' @param skip Defaults to NULL. If TRUE, this step will be skipped in the -#' Drake plan. +#' targets plan. #' @export stepwise_regression <- function(md, primary_variable, cqn_counts, model_variables = names(md), @@ -674,15 +674,15 @@ prepare_results <- function(target, data_name, rowname = NULL) { #' #' @param parent_id A Synapse Id that corresponds to a project or #' folder to store output. -#' @param cqn_counts The drake target containing Conditional Quantile Normalized +#' @param cqn_counts The targets target containing Conditional Quantile Normalized #' (CQN) counts. Defaults to target name constrained by #' \code{"sageseqr::rnaseq_plan()"}. -#' @param clean_md The drake target containing the metadata data frame. +#' @param clean_md The targets target containing the metadata data frame. #' Defaults to target name constrained by \code{"sageseqr::rnaseq_plan()"}. -#' @param filtered_counts The drake target containing counts after low gene +#' @param filtered_counts The targets target containing counts after low gene #' expression has been removed. Defaults to target name constrained by #' \code{"sageseqr::rnaseq_plan()"}. -#' @param biomart_results The drake target containing gene annotations from +#' @param biomart_results The targets target containing gene annotations from #' biomart. Defaults to target name constrained by #' \code{"sageseqr::rnaseq_plan()"}. #' @param rownames A list of variables to store rownames ordered by `metadata`, @@ -715,7 +715,7 @@ store_results <- function(clean_md = clean_md, "analyzed with sageseqr {ver}" ) - # nest drake targets in a list. Every time a new target is to-be stored, it + # nest targets targets in a list. Every time a new target is to-be stored, it # must be added as an argument to this function and then added to this list. targets <- list(clean_md, filtered_counts, biomart_results, cqn_counts) diff --git a/R/plan.R b/R/plan.R deleted file mode 100644 index 91955360..00000000 --- a/R/plan.R +++ /dev/null @@ -1,185 +0,0 @@ -#' Execute the drake RNA-seq plan -#' -#' This function wraps the \code{"drake::plan()"} and copies the R markdown -#' report to the user's working directory. -#' -#' @param metadata_id Synapse ID to clean metadata file with sample identifiers -#' in a column and variables of interest as column names. There cannot be any -#' missing values. -#' @param metadata_version Optionally, include Synapse file version number. If -#' omitted, current version will be downloaded. -#' @param counts_id Synapse ID to counts data frame with identifiers to the -#' metadata as column names and gene ids in a column. -#' @param counts_version Optionally, include Synapse file version number. -#' @param gene_id_input Column name of the gene ids in the counts_id file. -#' @param sample_id_input Column name of the sample ids in the metadata_id file. -#' @param factor_input Vector of factor variables. Variables must be present -#' in the metadata as column names. -#' @param continuous_input Vector of continuous variables. Variables must be -#' present in the metadata as column names. -#' @param biomart_id Synapse ID to biomart object. -#' @param biomart_version Optionally, include Synapse file version number. -#' @param primary_variable Baseline variable for model selection and variable to -#' stratify groups in the boxplot. -#' @param report_name Name of output markdown file. -#' @inheritParams plot_sexcheck -#' @inheritParams get_biomart -#' @inheritParams filter_genes -#' @inheritParams identify_outliers -#' @inheritParams store_results -#' @inheritParams prepare_results -#' @param skip_model If TRUE, does not run regression model. -#' @export -rnaseq_plan <- function(metadata_id, metadata_version, counts_id, - counts_version, gene_id_input, sample_id_input, - factor_input, continuous_input, - biomart_id, biomart_version, host, filters, - organism, conditions, cpm_threshold = 1, - conditions_threshold = 0.5, - primary_variable, sex_var, color, shape, size, - report_name, skip_model, parent_id, - rownames, config_file) { - # Copies markdown to user's working directory - if (!file.exists("sageseqr-report.Rmd")) { - fs::file_copy(system.file("sageseqr-report.Rmd", package = "sageseqr"), - new_path = getwd()) - } - - #drake::drake_plan( - list( - targets::tar_target( - import_metadata, - get_data(!!metadata_id, - !!metadata_version) - ), - targets::tar_target( - import_counts, - get_data(!!counts_id, - !!counts_version) - ), - targets::tar_target( - counts, - tibble::column_to_rownames(import_counts, - var = !!gene_id_input) - ), - targets::tar_target( - clean_md, - clean_covariates(md = import_metadata, - factors = !!factor_input, - continuous = !!continuous_input, - sample_identifier = !!sample_id_input) - ), - targets::tar_target( - biomart_results, - get_biomart(count_df = counts, - synid = !!biomart_id, - version = !!biomart_version, - filters = !!filters, - host = !!host, - organism = !!organism) - ), - targets::tar_target( - filtered_counts, - filter_genes(clean_metadata = clean_md, - count_df = counts, - conditions = !!conditions, - cpm_threshold = !!cpm_threshold, - conditions_threshold = !!conditions_threshold) - ), - targets::tar_target( - biotypes, - summarize_biotypes(filtered_counts, biomart_results) - ), - targets::tar_target( - cqn_counts, - cqn(filtered_counts, biomart_results) - ), - targets::tar_target( - gene_coexpression, - plot_coexpression(cqn_counts) - ), - - targets::tar_target( - boxplots, - boxplot_vars(md = clean_md, - include_vars = !!continuous_input, - x_var = !!primary_variable) - ), - targets::tar_target( - sex_plot, - conditional_plot_sexcheck(clean_md, - counts, - biomart_results, - !!sex_var) - ), - targets::tar_target(sex_plot_pca, - plot_sexcheck_pca( - clean_md, - counts, - biomart_results, - !!sex_var) - ), - targets::tar_target( - correlation_plot, - get_association_statistics(clean_md) - ), - targets::tar_target( - significant_covariates_plot, - run_pca_and_plot_correlations(cqn_counts$E,clean_md) - ), - targets::tar_target( - outliers, - identify_outliers(filtered_counts, clean_md, !!color, !!shape, !!size) - ), - targets::tar_target( - model, - stepwise_regression( - clean_md, - primary_variable = !!primary_variable, - cqn_counts = cqn_counts, - skip = !!skip_model - ) - ), - targets::tar_target( - report, - rmarkdown::render( - #drake::knitr_in("sageseqr-report.Rmd"), - #output_file = drake::file_out( - #!!glue::glue("{getwd()}/{report_name}.html") - #), - input, - tarchetypes::tar_render(name="sageseqr-report.Rmd", - path=glue::glue("{getwd()}", - '/inst/sageseqr-report.Rmd') - ) - ) - ), - targets::tar_target( - document_inputs, - provenance_helper( - !!metadata_id, !!counts_id, - !!metadata_version, !!counts_version, - !!biomart_id, !!biomart_version - ) - ), - targets::tar_target( - Synapse, - store_results( - parent_id = !!parent_id, - cqn_counts = cqn_counts$counts, - clean_md = clean_md, - filtered_counts = filtered_counts, - biomart_results = biomart_results, - rownames = !!rownames, - syn_names = list("Covariates", "Filtered counts (greater than 1cpm)", - "BioMart query results", "Normalized counts (CQN)"), - data_names = list("clean_md", "filtered_counts", "biomart_results", - "cqn_counts"), - inputs = document_inputs, - activity_provenance = "Analyze RNA-seq data with sageseqr pkg", - config_file = !!config_file, - report_name = !!report_name - ) - ) - ) -} diff --git a/R/visualization-functions.R b/R/visualization-functions.R index a4318841..13e76362 100644 --- a/R/visualization-functions.R +++ b/R/visualization-functions.R @@ -767,7 +767,7 @@ plot_sexcheck <- function(clean_metadata, count_df, biomart_results, sex_var) { sex_specific_counts = results) p } -#' Conditionally wrap plot_sexcheck for drake +#' Conditionally wrap plot_sexcheck for targets #' #' Work around to expose plot_sexcheck to testing and export but also leverage #' targets function for skipping targets conditionally (see \code{"targets::tar_cancel()"}). @@ -775,7 +775,6 @@ plot_sexcheck <- function(clean_metadata, count_df, biomart_results, sex_var) { #' @inheritParams get_biomart #' @export conditional_plot_sexcheck <- function(clean_metadata, count_df, biomart_results, sex_var) { - #drake::cancel_if(is.null(sex_var)) targets::tar_cancel(is.null(sex_var)) plot_sexcheck(clean_metadata, count_df, biomart_results, sex_var) } diff --git a/README.md b/README.md index a93e753f..a0c4fac0 100644 --- a/README.md +++ b/README.md @@ -6,17 +6,17 @@ # RNA-seq normalization workflow in R -The `sageseqr` package integrates the [`drake` R package](https://github.com/ropensci/drake/), the [`config` package for R](https://cran.r-project.org/web/packages/config/vignettes/introduction.html), and [Synapse](https://www.synapse.org/). `drake` tracks dependency relationships in the workflow and only updates data when it has changed. A `config` file allows inputs and parameters to be explicitly defined in one location. Synapse is a data repository that allows sensitive data to be [stored and shared responsibly](https://docs.synapse.org/articles/article_index.html#governance). +The `sageseqr` package integrates the [`targets` R package](https://github.com/ropensci/targets/), the [`config` package for R](https://cran.r-project.org/web/packages/config/vignettes/introduction.html), and [Synapse](https://www.synapse.org/). `targets` tracks dependency relationships in the workflow and only updates data when it has changed. A `config` file allows inputs and parameters to be explicitly defined in one location. Synapse is a data repository that allows sensitive data to be [stored and shared responsibly](https://docs.synapse.org/articles/article_index.html#governance). The workflow takes RNA-seq gene counts and sample metadata as inputs, normalizes counts by conditional quantile normalization [(CQN)](https://bioconductor.org/packages/release/bioc/html/cqn.html), removes outliers based on a user-defined threshold, empirically selects meaningful covariates and returns differential expression analysis results. The data is also visualized in several ways to help you understand meaningful trends. The visualizations include a heatmap identifying highly correlated covariates, a sample-specific x and y marker gene check, boxplots visualizing the distribution of continuous variables and a principal component analysis (PCA) to visualize sample distribution. # The Targets -The series of steps that make up the workflow are called targets. The target objects are stored in a cache and can either be read or loaded into your environment with the `drake` functions `readd` or `loadd`. Source code for each target can be visualized by setting `show_source = TRUE` with `loadd` and `readd`. +The series of steps that make up the workflow are called targets. The target objects are stored in a cache and can either be read or loaded into your environment with the `targets` functions `tar_read` or `tar_load`. Source code for each target can be visualized by setting `show_source = TRUE` with `loadd` and `readd`. -Importantly, running `clean` will remove the data stored as targets ([but, the data is never completely gone!](https://books.ropensci.org/drake/walkthrough.html)). You may specific targets by name by passing them to the `clean` function. +Importantly, running `clean` will remove the data stored as targets ([but, the data is never completely gone!](https://books.ropensci.org/targets/walkthrough.html)). You may specific targets by name by passing them to the `tar_destroy()` function. -The targets are called by the `sageseqr` `rnaseq_plan()` function and are: +The targets are called by the `targets` `tar_make()` function and are: Raw data: - `import_metadata`- imports the raw metadata directly from synapse diff --git a/inst/sageseqr-report.Rmd b/inst/sageseqr-report.Rmd index 3fe22213..3f6517f2 100644 --- a/inst/sageseqr-report.Rmd +++ b/inst/sageseqr-report.Rmd @@ -37,7 +37,7 @@ library(sageseqr) # Explore Metadata ```{r sample_summary, message = FALSE} -drake::loadd(clean_md) +targets::tar_load(clean_md) var <- config::get("x_var") summary <- dplyr::group_by_at(clean_md, var) summary <- dplyr::summarise(summary, count = dplyr::n()) @@ -46,27 +46,27 @@ knitr::kable(summary) Visualize the distribution of data. ```{r boxplot} -drake::readd(boxplots) +targets::tar_read(boxplots) ``` ```{r boxplot_sex_chromosomes, results = "asis", fig.dim = c(5,5)} if (!is.null(config::get("sex check"))) { cat("Visualize gene expression of sex chromosomes using XIST as a X-chromosome marker and UTY as a Y-chromosome marker.") - drake::readd(sex_plot)$plot + targets::tar_read(sex_plot)$plot } ``` ```{r sex_chromosome_pca, results = "asis", fig.dim = c(10,5)} if (!is.null(config::get("sex check"))) { cat("Visualize gene expression across X and Y chromosomes by principal component analysis (PCA).") - drake::readd(sex_plot_pca)$plot + targets::tar_read(sex_plot)$plot } ``` ```{r outlier_list, results = "asis"} -if (!is.null(drake::readd(sex_plot_pca)$outliers)) { - list <- glue::glue_collapse(drake::readd(sex_plot_pca)$outliers, ", ", last = " and ") +if (!is.null(targets::tar_read(sex_plot_pca)$outliers)) { + list <- glue::glue_collapse(targets::tar_read(sex_plot_pca)$outliers, ", ", last = " and ") glue::glue("Possible outliers, identified as 2 standard deviations from the mean of samples n split by {config::get('sex check')} are {list}.") } else { cat("No outliers identified.") @@ -75,7 +75,7 @@ if (!is.null(drake::readd(sex_plot_pca)$outliers)) { Visualize the relationships between covariates. ```{r heatmap_covariates} -correlation_input <- drake::readd( +correlation_input <- targets::tar_read( correlation_plot )$plot col2 <- grDevices::colorRampPalette(rev(c("#67001F", "#B2182B", "#D6604D", "#F4A582", @@ -88,29 +88,29 @@ corrplot::corrplot(correlation_input, col = col2(200), tl.col = "#000000") Remove genes that have less than `r config::get("cpm threshold")` counts per million (CPM) in at least `r config::get("percent threshold")` of samples per specified condition. -`r dim(drake::readd(filtered_counts))[1]` genes are used in this analysis. +`r dim(targets::tar_read(filtered_counts))[1]` genes are used in this analysis. ```{r filter_genes} -knitr::kable(drake::readd(biotypes)) +knitr::kable(targets::tar_read(biotypes)) ``` Check distribution of correlation between genes. ```{r histogram} -drake::readd(gene_coexpression) +targets::tar_read(gene_coexpression) ``` # Identify Outliers ```{r plot_outliers} -drake::readd(outliers)$plot +targets::tar_read(outliers)$plot ``` -Outliers, based on logCPM expression, are `r glue::glue_collapse(drake::readd(outliers)$outliers, ", ", last = " and ")`. +Outliers, based on logCPM expression, are `r glue::glue_collapse(targets::tar_read(outliers)$outliers, ", ", last = " and ")`. # Significant Covariates -Significant covariates are identified by the pearson correlation (p-value of 1%) between principal component analysis (PCA) of normalized transcripts and variables that meet a 0.1 false discovery rate (FDR) threshold. Significant covariates to adjust for are `r glue::glue_collapse(drake::readd(significant_covariates_plot)$significant_covariates, ", ", last = " and ")`. +Significant covariates are identified by the pearson correlation (p-value of 1%) between principal component analysis (PCA) of normalized transcripts and variables that meet a 0.1 false discovery rate (FDR) threshold. Significant covariates to adjust for are `r glue::glue_collapse(targets::tar_read(significant_covariates_plot)$significant_covariates, ", ", last = " and ")`. ```{r pca_and_significant_covariates} -drake::readd( +targets::tar_read( significant_covariates_plot )$plot ``` @@ -120,7 +120,7 @@ if (!isTRUE(config::get("skip model"))) { cat("# Model Identification Covariates are added as fixed and random effects iteratively if model improvement by Bayesian Information Criteria (BIC) was observed.") -summary <- drake::readd(model) +summary <- targets::tar_read(model) knitr::kable(summary$to_visualize) as.character(summary$formula)[2] } diff --git a/vignettes/run-the-plan.Rmd b/vignettes/run-the-plan.Rmd index 462c9747..c4a7ad6f 100644 --- a/vignettes/run-the-plan.Rmd +++ b/vignettes/run-the-plan.Rmd @@ -1,8 +1,8 @@ --- -title: "Run the drake plan" +title: "Run the targets plan" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Run the drake plan} + %\VignetteIndexEntry{Run the targets plan} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -24,7 +24,7 @@ profile_set <- "default" Sys.setenv(R_CONFIG_ACTIVE = profile_set) ``` -3. Load the `sageseqr` library and login to [Synapse](https://www.synapse.org/). `rnaseq_plan()` calls the arguments from the config file and creates the `drake` plan. Execute `drake::make(plan)` to compute. Run this code from your project root. +3. Load the `sageseqr` library and login to [Synapse](https://www.synapse.org/). `rnaseq_plan()` calls the arguments from the config file and creates the `targets` plan. Execute `targets::make()` to compute. Run this code from your project root. ```{r run-plan} library(sageseqr) @@ -37,7 +37,7 @@ replace_command <- paste('system( "sed ', profile_set, "/g" ), - " ../_targets_template.R > ../_targets.R\")", + " ../R/_targets_template.R > ../_targets.R\")", sep = "'" ) @@ -51,10 +51,6 @@ targets::tar_manifest(fields="command") targets::tar_make() -#drake::make( -# plan, -# envir = getNamespace("sageseqr") -# ) ``` 4. Visualize the results of your work. From 97696af51bb81cc4a2941545b6b8f68d48d783cd Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Tue, 22 Jun 2021 14:30:36 -0700 Subject: [PATCH 10/64] Document changes --- DESCRIPTION | 1 - NAMESPACE | 1 - R/functions.R | 22 +++++-- man/conditional_plot_sexcheck.Rd | 4 +- man/rnaseq_plan.Rd | 110 ------------------------------- man/stepwise_regression.Rd | 2 +- man/store_results.Rd | 16 ++--- 7 files changed, 26 insertions(+), 130 deletions(-) delete mode 100644 man/rnaseq_plan.Rd diff --git a/DESCRIPTION b/DESCRIPTION index bd3a0b63..3c733602 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,6 @@ Imports: config, cqn, data.table, - drake, dplyr, edgeR, foreach, diff --git a/NAMESPACE b/NAMESPACE index f71707d4..f62a26cc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,7 +26,6 @@ export(plot_sexcheck) export(plot_sexcheck_pca) export(prepare_results) export(provenance_helper) -export(rnaseq_plan) export(rq) export(run_pca) export(run_pca_and_plot_correlations) diff --git a/R/functions.R b/R/functions.R index f3340842..f37c4b7f 100644 --- a/R/functions.R +++ b/R/functions.R @@ -676,15 +676,15 @@ prepare_results <- function(target, data_name, rowname = NULL) { #' folder to store output. #' @param cqn_counts The targets target containing Conditional Quantile Normalized #' (CQN) counts. Defaults to target name constrained by -#' \code{"sageseqr::rnaseq_plan()"}. +#' \code{"targets::tar_make"}. #' @param clean_md The targets target containing the metadata data frame. -#' Defaults to target name constrained by \code{"sageseqr::rnaseq_plan()"}. +#' Defaults to target name constrained by \code{"targets::tar_make"}. #' @param filtered_counts The targets target containing counts after low gene #' expression has been removed. Defaults to target name constrained by -#' \code{"sageseqr::rnaseq_plan()"}. +#' \code{"targets::tar_make()"}. #' @param biomart_results The targets target containing gene annotations from #' biomart. Defaults to target name constrained by -#' \code{"sageseqr::rnaseq_plan()"}. +#' \code{"targets::tar_make"}. #' @param rownames A list of variables to store rownames ordered by `metadata`, #' `filtered_counts`, `biomart_results`, `cqn_counts`. If not applicable, #' set as NULL. @@ -698,7 +698,7 @@ prepare_results <- function(target, data_name, rowname = NULL) { #' @param data_names A list of identifiers to embed in the file name ordered #' by `metadata`, `filtered_counts`, `biomart_results`, `cqn_counts`. #' @param config_file Optional. Path to configuration file. -#' @inheritParams rnaseq_plan +#' @param report_name Name of output markdown file. #' @export store_results <- function(clean_md = clean_md, filtered_counts = filtered_counts, @@ -810,8 +810,16 @@ store_results <- function(clean_md = clean_md, #' Provenance helper #' #' Collapse Synapse Ids and version. -#' -#' @inheritParams rnaseq_plan +#' @param metadata_id Synapse ID to clean metadata file with sample identifiers +#' in a column and variables of interest as column names. There cannot be any +#' missing values. +#' @param counts_id Synapse ID to counts data frame with identifiers to the +#' metadata as column names and gene ids in a column. +#' @param metadata_version Optionally, include Synapse file version number. If +#' omitted, current version will be downloaded. +#' @param counts_version Optionally, include Synapse file version number. +#' @param biomart_id Synapse ID to biomart object. +#' @param biomart_version Optionally, include Synapse file version number. #' @export provenance_helper <- function(metadata_id, counts_id, metadata_version = NULL, counts_version = NULL, biomart_id = NULL, diff --git a/man/conditional_plot_sexcheck.Rd b/man/conditional_plot_sexcheck.Rd index 65e3309f..e1a41741 100644 --- a/man/conditional_plot_sexcheck.Rd +++ b/man/conditional_plot_sexcheck.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/visualization-functions.R \name{conditional_plot_sexcheck} \alias{conditional_plot_sexcheck} -\title{Conditionally wrap plot_sexcheck for drake} +\title{Conditionally wrap plot_sexcheck for targets} \usage{ conditional_plot_sexcheck(clean_metadata, count_df, biomart_results, sex_var) } @@ -20,5 +20,5 @@ stored as rownames.} } \description{ Work around to expose plot_sexcheck to testing and export but also leverage -drakes function for skipping targets conditionally (see \code{"drake::cancel_if()"}). +targets function for skipping targets conditionally (see \code{"targets::tar_cancel()"}). } diff --git a/man/rnaseq_plan.Rd b/man/rnaseq_plan.Rd deleted file mode 100644 index 8a1a46fb..00000000 --- a/man/rnaseq_plan.Rd +++ /dev/null @@ -1,110 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plan.R -\name{rnaseq_plan} -\alias{rnaseq_plan} -\title{Execute the drake RNA-seq plan} -\usage{ -rnaseq_plan( - metadata_id, - metadata_version, - counts_id, - counts_version, - gene_id_input, - sample_id_input, - factor_input, - continuous_input, - biomart_id, - biomart_version, - host, - filters, - organism, - conditions, - cpm_threshold = 1, - conditions_threshold = 0.5, - primary_variable, - sex_var, - color, - shape, - size, - report_name, - skip_model, - parent_id, - rownames, - config_file -) -} -\arguments{ -\item{metadata_id}{Synapse ID to clean metadata file with sample identifiers -in a column and variables of interest as column names. There cannot be any -missing values.} - -\item{metadata_version}{Optionally, include Synapse file version number. If -omitted, current version will be downloaded.} - -\item{counts_id}{Synapse ID to counts data frame with identifiers to the -metadata as column names and gene ids in a column.} - -\item{counts_version}{Optionally, include Synapse file version number.} - -\item{gene_id_input}{Column name of the gene ids in the counts_id file.} - -\item{sample_id_input}{Column name of the sample ids in the metadata_id file.} - -\item{factor_input}{Vector of factor variables. Variables must be present -in the metadata as column names.} - -\item{continuous_input}{Vector of continuous variables. Variables must be -present in the metadata as column names.} - -\item{biomart_id}{Synapse ID to biomart object.} - -\item{biomart_version}{Optionally, include Synapse file version number.} - -\item{host}{An optional character vector specifying the release version. -This specification is highly recommended for a reproducible workflow. -(see \code{"biomaRt::listEnsemblArchives()"})} - -\item{filters}{A character vector listing biomaRt query filters. -(For a list of filters see \code{"biomaRt::listFilters()"})} - -\item{organism}{A character vector of the organism name. This argument -takes partial strings. For example,"hsa" will match "hsapiens_gene_ensembl".} - -\item{conditions}{Optional. Conditions to bin gene counts that correspond to -variables in `md`.} - -\item{cpm_threshold}{The minimum number of CPM allowed.} - -\item{conditions_threshold}{Percentage of samples that should contain the minimum CPM.} - -\item{primary_variable}{Baseline variable for model selection and variable to -stratify groups in the boxplot.} - -\item{sex_var}{Column name of the sex or gender-specific metadata.} - -\item{color}{Discrete variable in `clean_metadata` differentiated -by color.} - -\item{shape}{Discrete variable in `clean_metadata` differentiated -by shape.} - -\item{size}{Continuous variable in `clean_metadata` differentiated -by size.} - -\item{report_name}{Name of output markdown file.} - -\item{skip_model}{If TRUE, does not run regression model.} - -\item{parent_id}{A Synapse Id that corresponds to a project or -folder to store output.} - -\item{rownames}{A list of variables to store rownames ordered by `metadata`, -`filtered_counts`, `biomart_results`, `cqn_counts`. If not applicable, -set as NULL.} - -\item{config_file}{Optional. Path to configuration file.} -} -\description{ -This function wraps the \code{"drake::plan()"} and copies the R markdown -report to the user's working directory. -} diff --git a/man/stepwise_regression.Rd b/man/stepwise_regression.Rd index 749c1ace..5e9a2c0f 100644 --- a/man/stepwise_regression.Rd +++ b/man/stepwise_regression.Rd @@ -24,7 +24,7 @@ fixed effect interaction term.} If not supplied, the model will include all variables in \code{md}.} \item{skip}{Defaults to NULL. If TRUE, this step will be skipped in the -Drake plan.} +targets plan.} } \description{ This function performs multivariate forward stepwise regression evaluated by multivariate Bayesian Information diff --git a/man/store_results.Rd b/man/store_results.Rd index 8d62b24d..a59bf0cc 100644 --- a/man/store_results.Rd +++ b/man/store_results.Rd @@ -20,20 +20,20 @@ store_results( ) } \arguments{ -\item{clean_md}{The drake target containing the metadata data frame. -Defaults to target name constrained by \code{"sageseqr::rnaseq_plan()"}.} +\item{clean_md}{The targets target containing the metadata data frame. +Defaults to target name constrained by \code{"targets::tar_make"}.} -\item{filtered_counts}{The drake target containing counts after low gene +\item{filtered_counts}{The targets target containing counts after low gene expression has been removed. Defaults to target name constrained by - \code{"sageseqr::rnaseq_plan()"}.} + \code{"targets::tar_make()"}.} -\item{biomart_results}{The drake target containing gene annotations from +\item{biomart_results}{The targets target containing gene annotations from biomart. Defaults to target name constrained by -\code{"sageseqr::rnaseq_plan()"}.} +\code{"targets::tar_make"}.} -\item{cqn_counts}{The drake target containing Conditional Quantile Normalized +\item{cqn_counts}{The targets target containing Conditional Quantile Normalized (CQN) counts. Defaults to target name constrained by -\code{"sageseqr::rnaseq_plan()"}.} +\code{"targets::tar_make"}.} \item{syn_names}{A list of human-readable names for the Synapse entities ordered From 78754603b2e539a2919298bf0d7ce22b7940b137 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 22 Jul 2021 16:59:39 -0500 Subject: [PATCH 11/64] remove redundant objects in ignore file --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index 92ea4efc..acdd8f66 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,9 @@ _targets.R _targets/ -vignettes/sageseqr-report.Rmd sageseqr-report.Rmd .Rhistory -_targets.R .Rproj.user *.txt *.tsv *.csv -/.drake docs From a4001a172e9d0868866992b12037c4ecc4f155ab Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 22 Jul 2021 17:04:32 -0500 Subject: [PATCH 12/64] targets file in inst/ dir to be installed where pkg is installed --- .gitignore | 1 - R/_targets_template.R => inst/_targets.R | 21 +++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) rename R/_targets_template.R => inst/_targets.R (92%) diff --git a/.gitignore b/.gitignore index acdd8f66..32441bed 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -_targets.R _targets/ sageseqr-report.Rmd .Rhistory diff --git a/R/_targets_template.R b/inst/_targets.R similarity index 92% rename from R/_targets_template.R rename to inst/_targets.R index d630c668..610fd0d7 100644 --- a/R/_targets_template.R +++ b/inst/_targets.R @@ -1,6 +1,7 @@ library(sageseqr) -# Login to Synapse. Make a Synapse account and use synapser to login: https://r-docs.synapse.org/articles/manageSynapseCredentials.html +# Login to Synapse. Make a Synapse account and use synapser to login: +# https://r-docs.synapse.org/articles/manageSynapseCredentials.html synapser::synLogin() #Set the cofiguration profile here: @@ -14,7 +15,7 @@ if (!file.exists("sageseqr-report.Rmd")) { #Build Plan list( targets::tar_target( - import_metadata, + import_metadata, get_data(!!config::get("metadata")$synID, !!config::get("metadata")$version) ), @@ -64,7 +65,7 @@ list( gene_coexpression, plot_coexpression(cqn_counts) ), - + targets::tar_target( boxplots, boxplot_vars(md = clean_md, @@ -84,7 +85,7 @@ list( counts, biomart_results, !!config::get("sex check")) - ), + ), targets::tar_target( correlation_plot, get_association_statistics(clean_md) @@ -95,10 +96,10 @@ list( ), targets::tar_target( outliers, - identify_outliers(filtered_counts, - clean_md, - !!config::get("dimensions")$color, - !!config::get("dimensions")$shape, + identify_outliers(filtered_counts, + clean_md, + !!config::get("dimensions")$color, + !!config::get("dimensions")$shape, !!config::get("dimensions")$size ) ), @@ -114,7 +115,7 @@ list( targets::tar_target( report, tarchetypes::tar_render(name="sageseqr-report.Rmd", - path=glue::glue("{getwd()}", + path=glue::glue("{getwd()}", '/inst/sageseqr-report.Rmd') ) ), @@ -150,5 +151,5 @@ list( report_name = !!config::get("report") ) ) - + ) From ba351bc00d7c07593507322481bb937343ad4203 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 22 Jul 2021 17:59:48 -0500 Subject: [PATCH 13/64] function to load targets file to users working directory --- R/functions.R | 21 +++ inst/_targets.R | 260 ++++++++++++++++++++----------------- vignettes/run-the-plan.Rmd | 29 +---- 3 files changed, 171 insertions(+), 139 deletions(-) diff --git a/R/functions.R b/R/functions.R index f37c4b7f..90396e6a 100644 --- a/R/functions.R +++ b/R/functions.R @@ -846,3 +846,24 @@ provenance_helper <- function(metadata_id, counts_id, metadata_version = NULL, } ids } +#' Initialize differential expression analysis workflow +#' +#' The `sageseqr` package provides a `targets` workflow to string together the +#' data processing and computational steps of RNA-seq differential expression +#' analysis. This funciton copies a markdown document and _targets.R file to your +#' working directory. The _targets.R file in your working directory is required +#' for the workflow to run. +#' +start_de <- function() { + # copy sageseqr-report.Rmd markdown to working directory + if (!file.exists("sageseqr-report.Rmd")) { + fs::file_copy(system.file("sageseqr-report.Rmd", package = "sageseqr"), + new_path = getwd()) + } + + # copy _targets.R file to working directory + if (!file.exists("_targets.R")) { + fs::file_copy(system.file("_targets.R", package = "sageseqr"), + new_path = getwd()) + } +} diff --git a/inst/_targets.R b/inst/_targets.R index 610fd0d7..0eb03cc0 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -1,136 +1,159 @@ -library(sageseqr) - -# Login to Synapse. Make a Synapse account and use synapser to login: -# https://r-docs.synapse.org/articles/manageSynapseCredentials.html -synapser::synLogin() - -#Set the cofiguration profile here: -Sys.setenv(R_CONFIG_ACTIVE = "INSERT_CONFIG_PROFILE_HERE") - -if (!file.exists("sageseqr-report.Rmd")) { - fs::file_copy(system.file("sageseqr-report.Rmd", package = "sageseqr"), - new_path = getwd()) -} - -#Build Plan +library(config) +library(targets) +# build plan list( - targets::tar_target( + tar_target( import_metadata, - get_data(!!config::get("metadata")$synID, - !!config::get("metadata")$version) - ), - targets::tar_target( + get_data( + get("metadata")$synID, + get("metadata")$version + ) + ), + tar_target( import_counts, - get_data(!!config::get("counts")$synID, - !!config::get("counts")$version) - ), - targets::tar_target( + get_data( + get("counts")$synID, + get("counts")$version + ) + ), + tar_target( counts, - tibble::column_to_rownames(import_counts, - var = !!config::get("counts")$`gene id`) - ), - targets::tar_target( + tibble::column_to_rownames( + import_counts, + var = get("counts")$`gene id` + ) + ), + tar_target( clean_md, - clean_covariates(md = import_metadata, - factors = !!config::get("factors"), - continuous = !!config::get("continuous"), - sample_identifier = !!config::get("metadata")$`sample id`) - ), - targets::tar_target( + clean_covariates( + md = import_metadata, + factors = get("factors"), + continuous = get("continuous"), + sample_identifier = get("metadata")$`sample id` + ) + ), + tar_target( biomart_results, - get_biomart(count_df = counts, - synid = !!config::get("biomart")$synID, - version = !!config::get("biomart")$version, - filters = !!config::get("biomart")$filters, - host = !!config::get("biomart")$host, - organism = !!config::get("biomart")$organism) - ), - targets::tar_target( + get_biomart( + count_df = counts, + synid = get("biomart")$synID, + version = get("biomart")$version, + filters = get("biomart")$filters, + host = get("biomart")$host, + organism = get("biomart")$organism + ) + ), + tar_target( filtered_counts, - filter_genes(clean_metadata = clean_md, - count_df = counts, - conditions = !!config::get("conditions"), - cpm_threshold = !!config::get("cpm threshold"), - conditions_threshold = !!config::get("percent threshold")) - ), - targets::tar_target( + filter_genes( + clean_metadata = clean_md, + count_df = counts, + conditions = get("conditions"), + cpm_threshold = get("cpm threshold"), + conditions_threshold = get("percent threshold") + ) + ), + tar_target( biotypes, - summarize_biotypes(filtered_counts, biomart_results) - ), - targets::tar_target( + summarize_biotypes( + filtered_counts, + biomart_results + ) + ), + tar_target( cqn_counts, - cqn(filtered_counts, biomart_results) - ), - targets::tar_target( + cqn( + filtered_counts, + biomart_results + ) + ), + tar_target( gene_coexpression, - plot_coexpression(cqn_counts) - ), - - targets::tar_target( + plot_coexpression( + cqn_counts + ) + ), + tar_target( boxplots, - boxplot_vars(md = clean_md, - include_vars = !!config::get("continuous"), - x_var = !!config::get("x_var")) - ), - targets::tar_target( + boxplot_vars( + md = clean_md, + include_vars = get("continuous"), + x_var = get("x_var") + ) + ), + tar_target( sex_plot, - conditional_plot_sexcheck(clean_md, - counts, - biomart_results, - !!config::get("sex check")) - ), - targets::tar_target(sex_plot_pca, - plot_sexcheck_pca( - clean_md, - counts, - biomart_results, - !!config::get("sex check")) - ), - targets::tar_target( + conditional_plot_sexcheck( + clean_md, + counts, + biomart_results, + get("sex check") + ) + ), + tar_target( + sex_plot_pca, + plot_sexcheck_pca( + clean_md, + counts, + biomart_results, + get("sex check") + ) + ), + tar_target( correlation_plot, - get_association_statistics(clean_md) - ), - targets::tar_target( + get_association_statistics( + clean_md + ) + ), + tar_target( significant_covariates_plot, - run_pca_and_plot_correlations(cqn_counts$E,clean_md) - ), - targets::tar_target( + run_pca_and_plot_correlations( + cqn_counts$E,clean_md + ) + ), + tar_target( outliers, - identify_outliers(filtered_counts, - clean_md, - !!config::get("dimensions")$color, - !!config::get("dimensions")$shape, - !!config::get("dimensions")$size - ) - ), - targets::tar_target( + identify_outliers( + filtered_counts, + clean_md, + get("dimensions")$color, + get("dimensions")$shape, + get("dimensions")$size + ) + ), + tar_target( model, stepwise_regression( clean_md, - primary_variable = !!config::get("x_var"), + primary_variable = get("x_var"), cqn_counts = cqn_counts, - skip = !!config::get("skip model") - ) - ), - targets::tar_target( - report, - tarchetypes::tar_render(name="sageseqr-report.Rmd", - path=glue::glue("{getwd()}", - '/inst/sageseqr-report.Rmd') + skip = get("skip model") ) - ), - targets::tar_target( + ), + tar_target( + report, + tarchetypes::tar_render( + name = "sageseqr-report.Rmd", + path = glue::glue( + "{getwd()}/{get('report')}.Rmd" + ) + ) + ), + tar_target( document_inputs, provenance_helper( - !!config::get("metadata")$synID, !!config::get("counts")$synID, - !!config::get("metadata")$version, !!config::get("counts")$version, - !!config::get("biomart")$synID, !!config::get("biomart")$version - ) - ), - targets::tar_target( + get("metadata")$synID, + get("counts")$synID, + get("metadata")$version, + get("counts")$version, + get("biomart")$synID, + get("biomart")$version + ) + ), + tar_target( Synapse, store_results( - parent_id = !!config::get("store output"), + parent_id = get("store output"), cqn_counts = cqn_counts$counts, clean_md = clean_md, filtered_counts = filtered_counts, @@ -140,16 +163,19 @@ list( config::get("counts")$`gene id`, config::get("biomart")$filters, config::get("counts")$`gene id` - ), - syn_names = list("Covariates", "Filtered counts (greater than 1cpm)", - "BioMart query results", "Normalized counts (CQN)"), - data_names = list("clean_md", "filtered_counts", "biomart_results", - "cqn_counts"), + ), + syn_names = list( + "Covariates", "Filtered counts (greater than 1cpm)", + "BioMart query results", "Normalized counts (CQN)" + ), + data_names = list( + "clean_md", "filtered_counts", + "biomart_results", "cqn_counts" + ), inputs = document_inputs, activity_provenance = "Analyze RNA-seq data with sageseqr pkg", config_file = "config.yml", - report_name = !!config::get("report") + report_name = get("report") + ) ) ) - -) diff --git a/vignettes/run-the-plan.Rmd b/vignettes/run-the-plan.Rmd index c4a7ad6f..0270fd42 100644 --- a/vignettes/run-the-plan.Rmd +++ b/vignettes/run-the-plan.Rmd @@ -24,40 +24,25 @@ profile_set <- "default" Sys.setenv(R_CONFIG_ACTIVE = profile_set) ``` -3. Load the `sageseqr` library and login to [Synapse](https://www.synapse.org/). `rnaseq_plan()` calls the arguments from the config file and creates the `targets` plan. Execute `targets::make()` to compute. Run this code from your project root. +3. Load the `sageseqr` library and login to [Synapse](https://www.synapse.org/). `start_de()` copies the `targets` plan to your working directory. Execute `targets::make()` to compute. Run this code from your project root. ```{r run-plan} library(sageseqr) # Login to Synapse. Make a Synapse account and use synapser to login: https://r-docs.synapse.org/articles/manageSynapseCredentials.html synapser::synLogin() -# Run the analysis -replace_command <- paste('system( "sed ', - paste0('s/INSERT_CONFIG_PROFILE_HERE/', - profile_set, - "/g" - ), - " ../R/_targets_template.R > ../_targets.R\")", - sep = "'" - ) - -eval(parse( text= - glue::glue(replace_command) - ) -) - -setwd('../') -targets::tar_manifest(fields="command") - +# gather the dependencies +start_de() +# inspect the steps of the workflow +targets::tar_manifest() +# run the analysis targets::tar_make() - ``` 4. Visualize the results of your work. ```{r visualize} -setwd('../') targets::tar_glimpse( targets_only = TRUE ) -``` \ No newline at end of file +``` From 8ae34e8497ab935bf8de2f3bfb87498204eaf456 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 23 Jul 2021 18:23:23 -0500 Subject: [PATCH 14/64] point to mvIC pkg version until bug is fixed --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9d31fef4..a8169436 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -74,7 +74,7 @@ Imports: Additional_repositories: http://ran.synapse.org Remotes: - GabrielHoffman/mvIC@master, + GabrielHoffman/mvIC@v1.5.1, Sage-Bionetworks/sagethemes@main, GabrielHoffman/variancePartition VignetteBuilder: From d8396960aa2ef3a7fdf756241c2c35af71d2e037 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 23 Jul 2021 18:25:00 -0500 Subject: [PATCH 15/64] point to mvIC pkg version until bug is fixed --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a8169436..74526b5b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -74,7 +74,7 @@ Imports: Additional_repositories: http://ran.synapse.org Remotes: - GabrielHoffman/mvIC@v1.5.1, + GabrielHoffman/mvIC@fb4f5e2, Sage-Bionetworks/sagethemes@main, GabrielHoffman/variancePartition VignetteBuilder: From 06e29c16cbf6f5ddd1e300d6b9e96967a905facb Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 23 Jul 2021 18:46:54 -0500 Subject: [PATCH 16/64] export start function --- R/functions.R | 1 + man/plot_volcano.Rd | 23 +++++++++++++++++++++++ man/start_de.Rd | 15 +++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 man/plot_volcano.Rd create mode 100644 man/start_de.Rd diff --git a/R/functions.R b/R/functions.R index 979f95f2..933e2b12 100644 --- a/R/functions.R +++ b/R/functions.R @@ -889,6 +889,7 @@ provenance_helper <- function(metadata_id, counts_id, metadata_version = NULL, #' working directory. The _targets.R file in your working directory is required #' for the workflow to run. #' +#' @export start_de <- function() { # copy sageseqr-report.Rmd markdown to working directory if (!file.exists("sageseqr-report.Rmd")) { diff --git a/man/plot_volcano.Rd b/man/plot_volcano.Rd new file mode 100644 index 00000000..7a256f1a --- /dev/null +++ b/man/plot_volcano.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization-functions.R +\name{plot_volcano} +\alias{plot_volcano} +\title{Volcano plot differential expression results'} +\usage{ +plot_volcano(de, p_value_threshold, log_fold_threshold, gene_list) +} +\arguments{ +\item{p_value_threshold}{Numeric. P-values are adjusted by Benjamini and +Hochberg (BH) false discovery rate (FDR). Significant genes are those with an +adjusted p-value greater than this threshold.} + +\item{log_fold_threshold}{Numeric. Significant genes are those with a log +fold-change greater than this threshold} + +\item{gene_list}{A vector of genes to label in the volcano plot.} +} +\description{ +The plot colors genes where the adjusted p-value exceed the `p_value_threshold` +and the `log_fold_threshold`. Optionally, provide a list of genes to label in +the plot via `gene_list`. +} diff --git a/man/start_de.Rd b/man/start_de.Rd new file mode 100644 index 00000000..ab595aad --- /dev/null +++ b/man/start_de.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions.R +\name{start_de} +\alias{start_de} +\title{Initialize differential expression analysis workflow} +\usage{ +start_de() +} +\description{ +The `sageseqr` package provides a `targets` workflow to string together the +data processing and computational steps of RNA-seq differential expression +analysis. This funciton copies a markdown document and _targets.R file to your +working directory. The _targets.R file in your working directory is required +for the workflow to run. +} From 62d58b6c94d61409fa29b6684473cb29fb0de601 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 23 Jul 2021 18:50:48 -0500 Subject: [PATCH 17/64] add start_de to namespace --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index f62a26cc..fa9422e0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ export(provenance_helper) export(rq) export(run_pca) export(run_pca_and_plot_correlations) +export(start_de) export(stepwise_regression) export(store_results) export(summarize_biotypes) From dca31c213d9fde1b921071b8f91816fe4163d045 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 23 Jul 2021 18:51:04 -0500 Subject: [PATCH 18/64] targets script will install at root of users working directory --- inst/_targets.R | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/inst/_targets.R b/inst/_targets.R index 0eb03cc0..5f486672 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -130,6 +130,22 @@ list( skip = get("skip model") ) ), + tar_target( + selected_model, + if(is.null(get("force model with"))) {model$variables_in_model} else {get("force model with")} + ), + tar_target( + de, + wrap_de( + conditions = get("de contrasts"), + filtered_counts, + cqn_counts, + clean_md, + biomart_results, + p_value_threshold = get("de p-value threshold"), + log_fold_threshold = get("de FC") + ) + ), tar_target( report, tarchetypes::tar_render( @@ -158,6 +174,7 @@ list( clean_md = clean_md, filtered_counts = filtered_counts, biomart_results = biomart_results, + de_results = de, rownames = !!list( config::get("metadata")$`sample id`, config::get("counts")$`gene id`, @@ -170,7 +187,8 @@ list( ), data_names = list( "clean_md", "filtered_counts", - "biomart_results", "cqn_counts" + "biomart_results", "cqn_counts", + names(de) ), inputs = document_inputs, activity_provenance = "Analyze RNA-seq data with sageseqr pkg", From 2b2b0ad9f9cfb1126bba3205fb852a7fb4ac00a2 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 23 Jul 2021 18:52:41 -0500 Subject: [PATCH 19/64] update store_results documentation --- man/store_results.Rd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/man/store_results.Rd b/man/store_results.Rd index f9fae409..164f83d8 100644 --- a/man/store_results.Rd +++ b/man/store_results.Rd @@ -37,8 +37,7 @@ biomart. Defaults to target name constrained by \code{"targets::tar_make"}.} \item{de_results}{The drake target containing differential expression gene -lists. Defaults to target name constrained by -\code{"sageseqr::rnaseq_plan()"}.} +lists. Defaults to target name constrained by} \item{syn_names}{A list of human-readable names for the Synapse entities ordered From 440d2fc5b34c414dda67fd84f0b5bf29e215d547 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 19 Aug 2021 11:31:59 -0700 Subject: [PATCH 20/64] fix argument name --- inst/_targets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/_targets.R b/inst/_targets.R index 5f486672..dbcad545 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -143,7 +143,7 @@ list( clean_md, biomart_results, p_value_threshold = get("de p-value threshold"), - log_fold_threshold = get("de FC") + fold_change_threshold = get("de FC") ) ), tar_target( From e24da77ea4fe30b8adeba078f7e069308e66e372 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 19 Aug 2021 15:31:45 -0700 Subject: [PATCH 21/64] force login within the download functions --- R/functions.R | 1 + vignettes/run-the-plan.Rmd | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/functions.R b/R/functions.R index 4f5eb69c..3918c039 100644 --- a/R/functions.R +++ b/R/functions.R @@ -14,6 +14,7 @@ #' #'} get_data <- function(synid, version = NULL) { + synapser::synLogin() df <- tibble::as_tibble(data.table::fread(synapser::synGet(synid, version = as.numeric(version) )$path)) diff --git a/vignettes/run-the-plan.Rmd b/vignettes/run-the-plan.Rmd index 0270fd42..f0f01c17 100644 --- a/vignettes/run-the-plan.Rmd +++ b/vignettes/run-the-plan.Rmd @@ -14,22 +14,24 @@ knitr::opts_chunk$set( eval = FALSE ) ``` +1. This workflow requires file inputs to be stored on [Synapse](https://www.synapse.org). It is free to make an account and store +small files. Store your counts files and metadata on Synapse. -1. [Write your own config file](https://sage-bionetworks.github.io/sageseqr/articles/customize-config.html). +2. [Write your own config file](https://sage-bionetworks.github.io/sageseqr/articles/customize-config.html). -2. Specify the active configuration by setting `R_CONFIG_ACTIVE`. +3. Specify the active configuration by setting `R_CONFIG_ACTIVE`. ```{r config-setup} profile_set <- "default" Sys.setenv(R_CONFIG_ACTIVE = profile_set) ``` -3. Load the `sageseqr` library and login to [Synapse](https://www.synapse.org/). `start_de()` copies the `targets` plan to your working directory. Execute `targets::make()` to compute. Run this code from your project root. +4. Store your Synapse credentials locally. You can achieve this by either creating a `.synapseConfig` file or executing `synLogin()` with `rememberMe = True`. [This article](https://r-docs.synapse.org/articles/manageSynapseCredentials.html) provides more context to storing Synapse credentials. + +5. Load the `sageseqr` library. `start_de()` copies the `targets` plan to your working directory. Execute `targets::make()` to compute. Run this code from your project root. ```{r run-plan} library(sageseqr) -# Login to Synapse. Make a Synapse account and use synapser to login: https://r-docs.synapse.org/articles/manageSynapseCredentials.html -synapser::synLogin() # gather the dependencies start_de() From dda56e45326210cf92f477826df2b8b5a4a8aff0 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 19 Aug 2021 16:25:42 -0700 Subject: [PATCH 22/64] clarify langauge in vignette --- vignettes/run-the-plan.Rmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vignettes/run-the-plan.Rmd b/vignettes/run-the-plan.Rmd index f0f01c17..37adc5f4 100644 --- a/vignettes/run-the-plan.Rmd +++ b/vignettes/run-the-plan.Rmd @@ -32,8 +32,7 @@ Sys.setenv(R_CONFIG_ACTIVE = profile_set) ```{r run-plan} library(sageseqr) - -# gather the dependencies +# gather the dependencies in your working directory by running this function: start_de() # inspect the steps of the workflow targets::tar_manifest() From 4daecf1fec9e4f88e532166d07fdf98d9ab44590 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 19 Aug 2021 17:01:04 -0700 Subject: [PATCH 23/64] fix syntax for tar_render --- inst/_targets.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/inst/_targets.R b/inst/_targets.R index dbcad545..b0ad2f68 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -1,4 +1,5 @@ library(config) +library(sageseqr) library(targets) # build plan list( @@ -149,10 +150,10 @@ list( tar_target( report, tarchetypes::tar_render( - name = "sageseqr-report.Rmd", - path = glue::glue( + name = glue::glue( "{getwd()}/{get('report')}.Rmd" - ) + ), + path = "sageseqr-report.Rmd" ) ), tar_target( From a1809539dc2f5500034dcba2ede33be9a9ffe806 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 20 Aug 2021 08:44:56 -0700 Subject: [PATCH 24/64] incorporate changes from pr 156 --- _targets.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 _targets.R diff --git a/_targets.R b/_targets.R new file mode 100644 index 00000000..13deb93d --- /dev/null +++ b/_targets.R @@ -0,0 +1,16 @@ +library(targets) +library(config) + list( + tar_target( + compute_model, + c("a list", "of", "vars"), + ), + tar_target( + model, + if(is.null(get("de log2F"))) {compute_model} else {get("de log2F")} + ), + tar_target( + store_output, + write.csv(model, "test.csv") + ) + ) From e80f2f06f4decec453bafaed0025c3326c82f901 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 20 Aug 2021 08:47:32 -0700 Subject: [PATCH 25/64] remove reference to arguments --- inst/_targets.R | 149 ++++++++++++++++++++++++++++++------------------ 1 file changed, 93 insertions(+), 56 deletions(-) diff --git a/inst/_targets.R b/inst/_targets.R index b0ad2f68..ad271459 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -8,22 +8,22 @@ list( get_data( get("metadata")$synID, get("metadata")$version - ) - ), + ) + ), tar_target( import_counts, get_data( get("counts")$synID, get("counts")$version - ) - ), + ) + ), tar_target( counts, tibble::column_to_rownames( import_counts, var = get("counts")$`gene id` - ) - ), + ) + ), tar_target( clean_md, clean_covariates( @@ -31,8 +31,8 @@ list( factors = get("factors"), continuous = get("continuous"), sample_identifier = get("metadata")$`sample id` - ) - ), + ) + ), tar_target( biomart_results, get_biomart( @@ -42,8 +42,8 @@ list( filters = get("biomart")$filters, host = get("biomart")$host, organism = get("biomart")$organism - ) - ), + ) + ), tar_target( filtered_counts, filter_genes( @@ -52,36 +52,36 @@ list( conditions = get("conditions"), cpm_threshold = get("cpm threshold"), conditions_threshold = get("percent threshold") - ) - ), + ) + ), tar_target( biotypes, summarize_biotypes( filtered_counts, biomart_results - ) - ), + ) + ), tar_target( cqn_counts, cqn( filtered_counts, biomart_results - ) - ), + ) + ), tar_target( gene_coexpression, plot_coexpression( cqn_counts - ) - ), + ) + ), tar_target( boxplots, boxplot_vars( md = clean_md, include_vars = get("continuous"), x_var = get("x_var") - ) - ), + ) + ), tar_target( sex_plot, conditional_plot_sexcheck( @@ -89,8 +89,8 @@ list( counts, biomart_results, get("sex check") - ) - ), + ) + ), tar_target( sex_plot_pca, plot_sexcheck_pca( @@ -98,20 +98,20 @@ list( counts, biomart_results, get("sex check") - ) - ), + ) + ), tar_target( correlation_plot, get_association_statistics( clean_md - ) - ), + ) + ), tar_target( significant_covariates_plot, run_pca_and_plot_correlations( cqn_counts$E,clean_md - ) - ), + ) + ), tar_target( outliers, identify_outliers( @@ -120,8 +120,8 @@ list( get("dimensions")$color, get("dimensions")$shape, get("dimensions")$size - ) - ), + ) + ), tar_target( model, stepwise_regression( @@ -129,33 +129,54 @@ list( primary_variable = get("x_var"), cqn_counts = cqn_counts, skip = get("skip model") - ) - ), + ) + ), tar_target( selected_model, - if(is.null(get("force model with"))) {model$variables_in_model} else {get("force model with")} + if(is.null(get("force model with"))) { + model$variables_in_model + } else { + get("force model with") + } ), tar_target( de, wrap_de( conditions = get("de contrasts"), filtered_counts, - cqn_counts, + cqn_counts$E, clean_md, biomart_results, p_value_threshold = get("de p-value threshold"), - fold_change_threshold = get("de FC") + fold_change_threshold = get("de FC"), + model_variables = selected_model + ) + ), + tar_target( + get_gene_list, + purrr::map(de, function(x) x$differential_expression) + ), + tar_target( + plot_de_volcano, + purrr::map( + get_gene_list, + function(x) plot_volcano( + x, + p_value_threshold = get("de p-value threshold"), + fold_change_threshold = get("de FC"), + gene_list = get_gene_list ) - ), + ) + ), tar_target( report, - tarchetypes::tar_render( - name = glue::glue( - "{getwd()}/{get('report')}.Rmd" - ), - path = "sageseqr-report.Rmd" - ) - ), + tarchetypes::tar_render( + name = glue::glue( + "{getwd()}/{get('report')}.Rmd" + ), + path = "sageseqr-report.Rmd" + ) + ), tar_target( document_inputs, provenance_helper( @@ -165,36 +186,52 @@ list( get("counts")$version, get("biomart")$synID, get("biomart")$version - ) - ), + ) + ), tar_target( Synapse, store_results( parent_id = get("store output"), - cqn_counts = cqn_counts$counts, + cqn_counts = cqn_counts$E, clean_md = clean_md, filtered_counts = filtered_counts, biomart_results = biomart_results, de_results = de, - rownames = !!list( + rownames = list( config::get("metadata")$`sample id`, config::get("counts")$`gene id`, config::get("biomart")$filters, config::get("counts")$`gene id` + ), + syn_names = append( + list( + "Covariates", + "Filtered counts (greater than 1cpm)", + "BioMart query results", + "Normalized counts (CQN)" ), - syn_names = list( - "Covariates", "Filtered counts (greater than 1cpm)", - "BioMart query results", "Normalized counts (CQN)" - ), - data_names = list( - "clean_md", "filtered_counts", - "biomart_results", "cqn_counts", - names(de) + as.list( + glue::glue( + "Differential Expression ({names(de)})") + ) + ), + data_names = append( + list( + "clean_md", + "filtered_counts", + "biomart_results", + "cqn_counts" ), + as.list( + names( + de + ) + ) + ), inputs = document_inputs, activity_provenance = "Analyze RNA-seq data with sageseqr pkg", config_file = "config.yml", report_name = get("report") - ) ) ) +) From c9aba29d5a0471d36edb2c4e6c002c5298dabd20 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 20 Aug 2021 13:24:28 -0700 Subject: [PATCH 26/64] login to Synapse from store function --- R/functions.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/functions.R b/R/functions.R index 3918c039..9a17abec 100644 --- a/R/functions.R +++ b/R/functions.R @@ -874,6 +874,9 @@ store_results <- function(clean_md = clean_md, parent = parent_id ) + # login to Synapse + synapser::synLogin() + markdown_provenance <- synapser::synStore( obj = file, used = used_ids, From 972700955397f0a6e8b99a7c2041317239c834f5 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 20 Aug 2021 13:27:45 -0700 Subject: [PATCH 27/64] login to Synapse from store function --- R/functions.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/functions.R b/R/functions.R index 9a17abec..e822cd2b 100644 --- a/R/functions.R +++ b/R/functions.R @@ -831,6 +831,9 @@ store_results <- function(clean_md = clean_md, activity_description = description ) + # login to Synapse + synapser::synLogin() + for_provenance <- purrr::pmap( mash, function( @@ -874,9 +877,6 @@ store_results <- function(clean_md = clean_md, parent = parent_id ) - # login to Synapse - synapser::synLogin() - markdown_provenance <- synapser::synStore( obj = file, used = used_ids, From 70e3e41c752acb231edbe2614aaeeb1bcdc247d5 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Mon, 23 Aug 2021 16:56:46 -0500 Subject: [PATCH 28/64] fix ref to drake --- inst/sageseqr-report.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/sageseqr-report.Rmd b/inst/sageseqr-report.Rmd index e899d84f..dea0a509 100644 --- a/inst/sageseqr-report.Rmd +++ b/inst/sageseqr-report.Rmd @@ -138,7 +138,7 @@ if (!isTRUE(config::get("skip model"))) { # Differential Expression ```{r differential_expression} -plots <- drake::readd(plot_de_volcano) +plots <- targets::tar_read(plot_de_volcano) for(p in plots) { print(p) } From b88f9cd5ebc39ec3e9afb7d21a8dd2d737472c8c Mon Sep 17 00:00:00 2001 From: kelshmo Date: Mon, 23 Aug 2021 16:57:11 -0500 Subject: [PATCH 29/64] fix dependency on report --- inst/_targets.R | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/inst/_targets.R b/inst/_targets.R index ad271459..0b2d4335 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -68,12 +68,6 @@ list( biomart_results ) ), - tar_target( - gene_coexpression, - plot_coexpression( - cqn_counts - ) - ), tar_target( boxplots, boxplot_vars( @@ -168,14 +162,10 @@ list( ) ) ), - tar_target( + tarchetypes::tar_render( report, - tarchetypes::tar_render( - name = glue::glue( - "{getwd()}/{get('report')}.Rmd" - ), - path = "sageseqr-report.Rmd" - ) + "sageseqr-report.Rmd", + output_file = glue::glue("{getwd()}/{config::get('report')}.html") ), tar_target( document_inputs, @@ -197,6 +187,7 @@ list( filtered_counts = filtered_counts, biomart_results = biomart_results, de_results = de, + report = report, rownames = list( config::get("metadata")$`sample id`, config::get("counts")$`gene id`, From 2316bf4d2d9af53dafdcdc5f42f8ace446b800c5 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Mon, 23 Aug 2021 17:00:30 -0500 Subject: [PATCH 30/64] first major version release --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0fff0a25..fc8a6b2d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: sageseqr Title: Identify differentially expressed genes counts data -Version: 0.0.0.9000 +Version: 0.1.0.0 Authors@R: c(person(given = "Kelsey", family = "Montgomery", From 54ee77154854e72294fda952821bfd3239431227 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Mon, 23 Aug 2021 17:06:03 -0500 Subject: [PATCH 31/64] load targets in working dir --- vignettes/run-the-plan.Rmd | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vignettes/run-the-plan.Rmd b/vignettes/run-the-plan.Rmd index 37adc5f4..006dff8b 100644 --- a/vignettes/run-the-plan.Rmd +++ b/vignettes/run-the-plan.Rmd @@ -32,18 +32,17 @@ Sys.setenv(R_CONFIG_ACTIVE = profile_set) ```{r run-plan} library(sageseqr) +library(targets) # gather the dependencies in your working directory by running this function: start_de() # inspect the steps of the workflow -targets::tar_manifest() +tar_manifest() # run the analysis -targets::tar_make() +tar_make() ``` 4. Visualize the results of your work. ```{r visualize} -targets::tar_glimpse( - targets_only = TRUE -) +tar_visnetwork() ``` From bc4af71704f9d52573d6052d1b165d519133484f Mon Sep 17 00:00:00 2001 From: kelshmo Date: Tue, 24 Aug 2021 12:07:02 -0500 Subject: [PATCH 32/64] update missing documentation --- R/functions.R | 10 +++++----- man/store_results.Rd | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/functions.R b/R/functions.R index e822cd2b..739a9828 100644 --- a/R/functions.R +++ b/R/functions.R @@ -731,18 +731,18 @@ prepare_results <- function(target, data_name, rowname = NULL) { #' #' @param parent_id A Synapse Id that corresponds to a project or #' folder to store output. -#' @param cqn_counts The targets target containing Conditional Quantile Normalized +#' @param cqn_counts The target containing Conditional Quantile Normalized #' (CQN) counts. Defaults to target name constrained by #' \code{"targets::tar_make"}. -#' @param clean_md The targets target containing the metadata data frame. +#' @param clean_md The target containing the metadata data frame. #' Defaults to target name constrained by \code{"targets::tar_make"}. -#' @param filtered_counts The targets target containing counts after low gene +#' @param filtered_counts The target containing counts after low gene #' expression has been removed. Defaults to target name constrained by #' \code{"targets::tar_make()"}. -#' @param biomart_results The targets target containing gene annotations from +#' @param biomart_results The target containing gene annotations from #' biomart. Defaults to target name constrained by #' \code{"targets::tar_make"}. -#' @param de_results The drake target containing differential expression gene +#' @param de_results The target containing differential expression gene #' lists. Defaults to target name constrained in the _targets.R file. #' @param rownames A list of variables to store rownames ordered by `metadata`, #' `filtered_counts`, `biomart_results`, `cqn_counts`. If not applicable, diff --git a/man/store_results.Rd b/man/store_results.Rd index 2ab88622..d462ee35 100644 --- a/man/store_results.Rd +++ b/man/store_results.Rd @@ -22,22 +22,22 @@ store_results( ) } \arguments{ -\item{clean_md}{The targets target containing the metadata data frame. +\item{clean_md}{The target containing the metadata data frame. Defaults to target name constrained by \code{"targets::tar_make"}.} -\item{filtered_counts}{The targets target containing counts after low gene +\item{filtered_counts}{The target containing counts after low gene expression has been removed. Defaults to target name constrained by \code{"targets::tar_make()"}.} -\item{biomart_results}{The targets target containing gene annotations from +\item{biomart_results}{The target containing gene annotations from biomart. Defaults to target name constrained by \code{"targets::tar_make"}.} -\item{cqn_counts}{The targets target containing Conditional Quantile Normalized +\item{cqn_counts}{The target containing Conditional Quantile Normalized (CQN) counts. Defaults to target name constrained by \code{"targets::tar_make"}.} -\item{de_results}{The drake target containing differential expression gene +\item{de_results}{The target containing differential expression gene lists. Defaults to target name constrained in the _targets.R file.} \item{syn_names}{A list of human-readable names for the Synapse entities From fd42bd9495987d5e6523e1987200c3ca7d425abd Mon Sep 17 00:00:00 2001 From: kelshmo Date: Tue, 24 Aug 2021 12:07:15 -0500 Subject: [PATCH 33/64] remove pacakges no longer imported --- DESCRIPTION | 2 -- 1 file changed, 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fc8a6b2d..d9fb61b6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,12 +33,10 @@ RoxygenNote: 7.1.1 biocViews: Imports: biomaRt, - config, cqn, data.table, dplyr, edgeR, - foreach, fs, graphics, GenomicRanges, From 7ec6a164e8e0c1474b48312043b155e78c9bb350 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Tue, 24 Aug 2021 12:23:44 -0500 Subject: [PATCH 34/64] add documentation for missing arg --- R/functions.R | 2 ++ man/store_results.Rd | 3 +++ 2 files changed, 5 insertions(+) diff --git a/R/functions.R b/R/functions.R index 739a9828..e15e75d2 100644 --- a/R/functions.R +++ b/R/functions.R @@ -744,6 +744,8 @@ prepare_results <- function(target, data_name, rowname = NULL) { #' \code{"targets::tar_make"}. #' @param de_results The target containing differential expression gene #' lists. Defaults to target name constrained in the _targets.R file. +#' @param report The target containing the rendered html document. Defaults +#' to target name constrained in the _targets.R file. #' @param rownames A list of variables to store rownames ordered by `metadata`, #' `filtered_counts`, `biomart_results`, `cqn_counts`. If not applicable, #' set as NULL. diff --git a/man/store_results.Rd b/man/store_results.Rd index d462ee35..6d289c80 100644 --- a/man/store_results.Rd +++ b/man/store_results.Rd @@ -40,6 +40,9 @@ biomart. Defaults to target name constrained by \item{de_results}{The target containing differential expression gene lists. Defaults to target name constrained in the _targets.R file.} +\item{report}{The target containing the rendered html document. Defaults +to target name constrained in the _targets.R file.} + \item{syn_names}{A list of human-readable names for the Synapse entities ordered by `metadata`, `filtered_counts`, `biomart_results`, `cqn_counts`.} From 408edda3ee992c5545444aa4b2807133316acfa4 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Wed, 25 Aug 2021 21:42:34 -0500 Subject: [PATCH 35/64] clarify documentation for primary or predictor variable --- vignettes/customize-config.Rmd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vignettes/customize-config.Rmd b/vignettes/customize-config.Rmd index 28eab475..2e0f7dcd 100644 --- a/vignettes/customize-config.Rmd +++ b/vignettes/customize-config.Rmd @@ -45,8 +45,9 @@ factors: Required. List of factor variables in brackets. Variables must present in the metadata as column names (e.g. [ "donorid", "source"]). continuous: Required. List of continuous variables in brackets. Variables must be present in the metadata as column names (e.g. [ "rin", "rin2"]). -x_var: Required. A boxplot will visualize the distribution of continuous variables using - the x_var as a dimension. +x_var: Required. This is your predictor or primary variable of interest. + Additionally, a boxplot will visualize the distribution of + continuous variables using the x_var as a dimension. conditions: Optional. Filtering low-expression genes is a common practice to improve sensitivity in detection of differentially expressed genes. Low count genes that have less than a user-defined threshold of counts From 06303224993497dff0331de6097aaae8238c1628 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Wed, 25 Aug 2021 21:42:55 -0500 Subject: [PATCH 36/64] add step to compute residuals --- R/functions.R | 69 +++++++++++++++++++++++++++++++++++++++++++++++-- inst/_targets.R | 28 +++++++++++++++++--- 2 files changed, 92 insertions(+), 5 deletions(-) diff --git a/R/functions.R b/R/functions.R index e15e75d2..bc7b5dc0 100644 --- a/R/functions.R +++ b/R/functions.R @@ -767,6 +767,7 @@ store_results <- function(clean_md = clean_md, biomart_results = biomart_results, cqn_counts = cqn_counts$E, de_results = de, + residualized_counts = residualized_counts, report = report, syn_names, data_names, parent_id, inputs, activity_provenance, @@ -792,10 +793,10 @@ store_results <- function(clean_md = clean_md, de_results <- purrr::map(de_results, function(x) x$differential_expression) # append differential expression data frames already nested in list - targets <- append(targets, de_results) + targets <- append(targets, de_results, residualized_counts) # append null rownames for differential expression object - rownames <- append(rownames, rep(list(NULL), length(de_results))) + rownames <- append(rownames, rep(list(NULL), length(de_results) + length(residualized_counts))) mash <- list( target = targets, @@ -949,3 +950,67 @@ start_de <- function() { new_path = getwd()) } } +#' Compute residualized counts matrix +#' +#' Residuals of the best fit linear regression model are computed for each +#' observation. Batch effects are adjusted for in the returned counts matrix +#' while preserving the effect of the predictor variable. +#' +#' Counts are normalized prior to linear modeling to compute residuals. A +#' precision weight is assigned to each gene feature to estimate the +#' mean-variance relationship. Counts normalized by conditional quantile +#' normalization (CQN) are used in place of log2 normalized counts. +#' +#' @inheritParams cqn +#' @inheritParams differential_expression +#' @inheritParams filter_genes +compute_residuals <- function(clean_metadata, filtered_counts, + cqn_counts = cqn_counts$E, primary_variable, + model_variables) { + # force order of samples in metadata to match order of samples in counts. + # Required by variancePartition + clean_metadata <- clean_metadata[match(colnames(filtered_counts),rownames(clean_metadata)),] + + metadata_input <- build_formula(clean_metadata, primary_variable, model_variables) + # Estimate voom weights with DREAM + gene_expression <- edgeR::DGEList(filtered_counts) + gene_expression <- edgeR::calcNormFactors(gene_expression) + voom <- variancePartition::voomWithDreamWeights( + counts = gene_expression, + formula = metadata_input$formula, + data = metadata_input$metadata + ) + + # fit linear model using weights and best model + voom$E <- cqn_counts + adjusted_fit <- variancePartition::dream( + exprObj = voom, + formula = formula, + data = metadata_input$metadata, + computeResiduals = TRUE + ) + + # compute residual matrix + residual_gene_expression <- stats::residuals(adjusted_fit) + + # calculate weighted residuals and add back signal from predictor + variables_to_add_back <- grep( + metadata_input$primary_variable, + colnames(adjusted_fit$design), + value = TRUE + ) + output <- residual_gene_expression + + adjusted_fit$coefficients[ + ,variables_to_add_back + ] %*% t( + adjusted_fit$design[ + ,variables_to_add_back] + ) + + # save gene features + output <- rownames_to_column(as.data.frame(output), var = "feature") + + return(output) + + +} diff --git a/inst/_targets.R b/inst/_targets.R index 0b2d4335..9763e179 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -133,6 +133,18 @@ list( get("force model with") } ), + tar_target( + residualized_counts, + purrr::map( + get("de contrasts"), + function(x) compute_residuals( + clean_metadata, + filtered_counts, + cqn_counts = cqn_counts$E, + primary_variable = x, + model_variables = selected_model + ) + ), tar_target( de, wrap_de( @@ -187,6 +199,7 @@ list( filtered_counts = filtered_counts, biomart_results = biomart_results, de_results = de, + residualized_counts = residualized_counts, report = report, rownames = list( config::get("metadata")$`sample id`, @@ -201,11 +214,15 @@ list( "BioMart query results", "Normalized counts (CQN)" ), + as.list( + glue::glue("Residualized counts({names(residualized_counts)})") + ), as.list( glue::glue( - "Differential Expression ({names(de)})") - ) - ), + "Differential Expression ({names(de)})" + ) + ) + ), data_names = append( list( "clean_md", @@ -213,6 +230,11 @@ list( "biomart_results", "cqn_counts" ), + as.list( + names( + residualized_counts + ) + ), as.list( names( de From 724f443c51fa03d0dbd6c50681edc564464b8f0f Mon Sep 17 00:00:00 2001 From: kelshmo Date: Wed, 25 Aug 2021 22:18:58 -0500 Subject: [PATCH 37/64] test and document new function --- NAMESPACE | 1 + R/functions.R | 20 +++++++++----- inst/_targets.R | 2 +- man/store_results.Rd | 1 + tests/testthat/test-compute_residuals.R | 36 +++++++++++++++++++++++++ 5 files changed, 52 insertions(+), 8 deletions(-) create mode 100644 tests/testthat/test-compute_residuals.R diff --git a/NAMESPACE b/NAMESPACE index b7bcd043..d70c1fb4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(check_mismatch) export(clean_covariates) export(collapse_duplicate_hgnc_symbol) export(complete_path) +export(compute_residuals) export(conditional_plot_sexcheck) export(convert_geneids) export(correlate_and_plot) diff --git a/R/functions.R b/R/functions.R index bc7b5dc0..08755d85 100644 --- a/R/functions.R +++ b/R/functions.R @@ -964,9 +964,10 @@ start_de <- function() { #' @inheritParams cqn #' @inheritParams differential_expression #' @inheritParams filter_genes +#' @export compute_residuals <- function(clean_metadata, filtered_counts, cqn_counts = cqn_counts$E, primary_variable, - model_variables) { + model_variables = NULL) { # force order of samples in metadata to match order of samples in counts. # Required by variancePartition clean_metadata <- clean_metadata[match(colnames(filtered_counts),rownames(clean_metadata)),] @@ -985,7 +986,7 @@ compute_residuals <- function(clean_metadata, filtered_counts, voom$E <- cqn_counts adjusted_fit <- variancePartition::dream( exprObj = voom, - formula = formula, + formula = metadata_input$formula, data = metadata_input$metadata, computeResiduals = TRUE ) @@ -1008,9 +1009,14 @@ compute_residuals <- function(clean_metadata, filtered_counts, ) # save gene features - output <- rownames_to_column(as.data.frame(output), var = "feature") - - return(output) - - + output <- tibble::rownames_to_column(as.data.frame(output), var = "feature") + + return( + list( + output = output, + signal = variables_to_add_back, + adjusted_fit = adjusted_fit, + formula = metadata_input$formula + ) + ) } diff --git a/inst/_targets.R b/inst/_targets.R index 9763e179..517194b5 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -199,7 +199,7 @@ list( filtered_counts = filtered_counts, biomart_results = biomart_results, de_results = de, - residualized_counts = residualized_counts, + residualized_counts = residualized_counts$output, report = report, rownames = list( config::get("metadata")$`sample id`, diff --git a/man/store_results.Rd b/man/store_results.Rd index 6d289c80..b900c7bc 100644 --- a/man/store_results.Rd +++ b/man/store_results.Rd @@ -10,6 +10,7 @@ store_results( biomart_results = biomart_results, cqn_counts = cqn_counts$E, de_results = de, + residualized_counts = residualized_counts, report = report, syn_names, data_names, diff --git a/tests/testthat/test-compute_residuals.R b/tests/testthat/test-compute_residuals.R new file mode 100644 index 00000000..62a7ff64 --- /dev/null +++ b/tests/testthat/test-compute_residuals.R @@ -0,0 +1,36 @@ +metadata <- data.frame( + batch = c("1", "2", "1", "2", "1", "2", "1", "2"), + diagnosis = c("dx", "dx", "ct", "ct", "dx", "dx", "ct", "ct"), + sex = c("M", "F", "F", "F", "M", "F", "F", "F"), + RIN = c(5, 5, 4, 5, 4, 4, 5, 5), + site = c("one", "one", "one", "two", "two", "three", "three", "three"), + row.names = c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8"), + stringsAsFactors = TRUE +) + +counts <- data.frame(matrix( + sample(0:100, size = 32), + ncol = 8, + dimnames = list(c("ENSG00000229807.12", "ENSG00000183878.12", + "ENSG00000239807.12", "ENSG00000259807.12"), + c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8")) +)) + +matrix <- compute_residuals( + metadata, + counts, + counts, + primary_variable = c("diagnosis", "sex"), +) + +test_that("gene features stored in column", { + expect_true("feature" %in% colnames(matrix$output)) +}) + +test_that("predictor variable preserved as signal", { + expect_true(all(grepl("diagnosis_sex", matrix$signal))) +}) + +test_that("output list meets expectations", { + expect_identical(names(matrix), c("output", "signal", "adjusted_fit", "formula")) +}) From 13014dd14da50a8f8c1a2cc8466eebed3cd0f3af Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 26 Aug 2021 11:48:09 -0500 Subject: [PATCH 38/64] document new function --- man/compute_residuals.Rd | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 man/compute_residuals.Rd diff --git a/man/compute_residuals.Rd b/man/compute_residuals.Rd new file mode 100644 index 00000000..e475dc94 --- /dev/null +++ b/man/compute_residuals.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions.R +\name{compute_residuals} +\alias{compute_residuals} +\title{Compute residualized counts matrix} +\usage{ +compute_residuals( + clean_metadata, + filtered_counts, + cqn_counts = cqn_counts$E, + primary_variable, + model_variables = NULL +) +} +\arguments{ +\item{clean_metadata}{A data frame with sample identifiers as rownames and variables as +factors or numeric as determined by \code{"sageseqr::clean_covariates()"}.} + +\item{filtered_counts}{A counts data frame with genes removed that have low expression.} + +\item{cqn_counts}{A counts data frame normalized by CQN.} + +\item{primary_variable}{Vector of variables that will be collapsed into a single +fixed effect interaction term.} + +\item{model_variables}{Optional. Vector of variables to include in the linear (mixed) model. +If not supplied, the model will include all variables in \code{md}.} +} +\description{ +Residuals of the best fit linear regression model are computed for each + observation. Batch effects are adjusted for in the returned counts matrix + while preserving the effect of the predictor variable. +} +\details{ +Counts are normalized prior to linear modeling to compute residuals. A +precision weight is assigned to each gene feature to estimate the +mean-variance relationship. Counts normalized by conditional quantile + normalization (CQN) are used in place of log2 normalized counts. +} From 4fceddcd75ca64c55c2ecd79bd4591b968115634 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 26 Aug 2021 11:49:34 -0500 Subject: [PATCH 39/64] document new target in store_results --- man/store_results.Rd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/man/store_results.Rd b/man/store_results.Rd index b900c7bc..9f41964b 100644 --- a/man/store_results.Rd +++ b/man/store_results.Rd @@ -41,6 +41,9 @@ biomart. Defaults to target name constrained by \item{de_results}{The target containing differential expression gene lists. Defaults to target name constrained in the _targets.R file.} +\item{residualized_counts}{The target containing counts adjusted for batch +effects. Defaults to target name constrained in the _targets.R file.} + \item{report}{The target containing the rendered html document. Defaults to target name constrained in the _targets.R file.} From f5040c15cf5fbd68a9600fd9607a851461c12fdf Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 26 Aug 2021 13:21:19 -0500 Subject: [PATCH 40/64] document arg in store_results --- R/functions.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/functions.R b/R/functions.R index 08755d85..2177ebdf 100644 --- a/R/functions.R +++ b/R/functions.R @@ -744,6 +744,8 @@ prepare_results <- function(target, data_name, rowname = NULL) { #' \code{"targets::tar_make"}. #' @param de_results The target containing differential expression gene #' lists. Defaults to target name constrained in the _targets.R file. +#' @param residualized_counts The target containing counts adjusted for batch +#' effects. Defaults to target name constrained in the _targets.R file. #' @param report The target containing the rendered html document. Defaults #' to target name constrained in the _targets.R file. #' @param rownames A list of variables to store rownames ordered by `metadata`, From 65b7a81a2f60583b879ac25e87632491e5985f20 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Mon, 30 Aug 2021 14:59:05 -0500 Subject: [PATCH 41/64] remove _targets.R file at root --- _targets.R | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 _targets.R diff --git a/_targets.R b/_targets.R deleted file mode 100644 index 13deb93d..00000000 --- a/_targets.R +++ /dev/null @@ -1,16 +0,0 @@ -library(targets) -library(config) - list( - tar_target( - compute_model, - c("a list", "of", "vars"), - ), - tar_target( - model, - if(is.null(get("de log2F"))) {compute_model} else {get("de log2F")} - ), - tar_target( - store_output, - write.csv(model, "test.csv") - ) - ) From 8b155ff3e4925614b96b343a8e11aa71bd33e5b3 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Mon, 30 Aug 2021 17:37:57 -0500 Subject: [PATCH 42/64] clarify instructions --- vignettes/run-the-plan.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/run-the-plan.Rmd b/vignettes/run-the-plan.Rmd index 006dff8b..45774c96 100644 --- a/vignettes/run-the-plan.Rmd +++ b/vignettes/run-the-plan.Rmd @@ -26,7 +26,7 @@ profile_set <- "default" Sys.setenv(R_CONFIG_ACTIVE = profile_set) ``` -4. Store your Synapse credentials locally. You can achieve this by either creating a `.synapseConfig` file or executing `synLogin()` with `rememberMe = True`. [This article](https://r-docs.synapse.org/articles/manageSynapseCredentials.html) provides more context to storing Synapse credentials. +4. Store your Synapse credentials locally. You can achieve this by either creating a `.synapseConfig` file or executing `synLogin()` with `rememberMe = True`. If you are working inside a Docker, make a text file called `.synapseConfig` from the home directory inside your container. [This article](https://r-docs.synapse.org/articles/manageSynapseCredentials.html) describes the format of the `.synapseConfig` file. 5. Load the `sageseqr` library. `start_de()` copies the `targets` plan to your working directory. Execute `targets::make()` to compute. Run this code from your project root. From 312fe88c5e65454fb9aa0e6d0b2e8099b1d06028 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 2 Sep 2021 14:29:51 -0500 Subject: [PATCH 43/64] add missing paren --- inst/_targets.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/_targets.R b/inst/_targets.R index 517194b5..a3f2697e 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -143,6 +143,7 @@ list( cqn_counts = cqn_counts$E, primary_variable = x, model_variables = selected_model + ) ) ), tar_target( From c0a654fb07b1719bd544a8f2cc59bfc31e876438 Mon Sep 17 00:00:00 2001 From: Jake Gockley Date: Fri, 10 Sep 2021 15:05:11 -0700 Subject: [PATCH 44/64] Update _targets.R --- inst/_targets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/_targets.R b/inst/_targets.R index a3f2697e..57b4a37c 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -138,7 +138,7 @@ list( purrr::map( get("de contrasts"), function(x) compute_residuals( - clean_metadata, + clean_md, filtered_counts, cqn_counts = cqn_counts$E, primary_variable = x, From 3be13a306ff52e714cc141214ccb9ed2caaf829a Mon Sep 17 00:00:00 2001 From: Jake Gockley Date: Fri, 10 Sep 2021 15:29:54 -0700 Subject: [PATCH 45/64] Update functions.R Added BPPARAM = BiocParallel::SnowParam(parallel::detectCores()-1) to variancePartition::dream and variancePartition::voomWithDreamWeights function calls --- R/functions.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/functions.R b/R/functions.R index 1bdaa103..bab1c9d2 100644 --- a/R/functions.R +++ b/R/functions.R @@ -549,7 +549,9 @@ differential_expression <- function(filtered_counts, cqn_counts, md, gene_expression <- edgeR::calcNormFactors(gene_expression) voom_gene_expression <- variancePartition::voomWithDreamWeights(counts = gene_expression, formula = metadata_input$formula, - data = metadata_input$metadata) + data = metadata_input$metadata, + BPPARAM = BiocParallel::SnowParam(parallel::detectCores()-1) + ) voom_gene_expression$E <- cqn_counts de_conditions <- levels(metadata_input$metadata[[metadata_input$primary_variable]]) @@ -580,7 +582,8 @@ differential_expression <- function(filtered_counts, cqn_counts, md, fit_contrasts = variancePartition::dream(exprObj = voom_gene_expression, formula = metadata_input$formula_non_intercept, data = metadata_input$metadata, - L = contrasts + L = contrasts, + BPPARAM = BiocParallel::SnowParam(parallel::detectCores()-1) ) de <- lapply(names(contrasts), function(i, fit){ @@ -986,7 +989,8 @@ compute_residuals <- function(clean_metadata, filtered_counts, voom <- variancePartition::voomWithDreamWeights( counts = gene_expression, formula = metadata_input$formula, - data = metadata_input$metadata + data = metadata_input$metadata, + BPPARAM = BiocParallel::SnowParam(parallel::detectCores()-1) ) # fit linear model using weights and best model @@ -995,7 +999,8 @@ compute_residuals <- function(clean_metadata, filtered_counts, exprObj = voom, formula = metadata_input$formula, data = metadata_input$metadata, - computeResiduals = TRUE + computeResiduals = TRUE, + BPPARAM = BiocParallel::SnowParam(parallel::detectCores()-1) ) # compute residual matrix From 241fe2f91fc68b6ace20891242f8f750f636c8ad Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 10 Sep 2021 17:31:50 -0500 Subject: [PATCH 46/64] does this fix build? --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index d9fb61b6..5ee834d2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ URL: https://sage-bionetworks.github.io/sageseqr, Encoding: UTF-8 LazyData: true Suggests: + pandoc, testthat (>= 2.1.0), markdown RoxygenNote: 7.1.1 From 15034709d5eb606c22c8e113f07fd4f2677d1cac Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 10 Sep 2021 17:43:05 -0500 Subject: [PATCH 47/64] be explicit about pandoc in r cmd check? --- .github/workflows/R-CMD-check.yaml | 2 +- DESCRIPTION | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 4ca9e43e..b1934423 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -12,7 +12,7 @@ jobs: - uses: r-lib/actions/setup-r@master - name: Install dependencies run: | - install.packages(c("remotes", "rcmdcheck")) + install.packages(c("pandoc", "remotes", "rcmdcheck")) remotes::install_deps(dependencies = TRUE) shell: Rscript {0} - name: Check diff --git a/DESCRIPTION b/DESCRIPTION index 5ee834d2..d9fb61b6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,6 @@ URL: https://sage-bionetworks.github.io/sageseqr, Encoding: UTF-8 LazyData: true Suggests: - pandoc, testthat (>= 2.1.0), markdown RoxygenNote: 7.1.1 From d721adf56953d48b1531cd73123e558b4fb5623f Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 10 Sep 2021 17:50:44 -0500 Subject: [PATCH 48/64] markdown is imported instead of suggested --- .github/workflows/R-CMD-check.yaml | 2 +- DESCRIPTION | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index b1934423..4ca9e43e 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -12,7 +12,7 @@ jobs: - uses: r-lib/actions/setup-r@master - name: Install dependencies run: | - install.packages(c("pandoc", "remotes", "rcmdcheck")) + install.packages(c("remotes", "rcmdcheck")) remotes::install_deps(dependencies = TRUE) shell: Rscript {0} - name: Check diff --git a/DESCRIPTION b/DESCRIPTION index d9fb61b6..f98efedf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,6 @@ Encoding: UTF-8 LazyData: true Suggests: testthat (>= 2.1.0), - markdown RoxygenNote: 7.1.1 biocViews: Imports: @@ -51,6 +50,7 @@ Imports: knitr, limma, magrittr, + markdown, mclust, mvIC, plyr, From 31951f9500b125f99d8246a39dbacd4c3c7c8453 Mon Sep 17 00:00:00 2001 From: Jake Gockley Date: Fri, 10 Sep 2021 15:51:49 -0700 Subject: [PATCH 49/64] Update DESCRIPTION --- DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index d9fb61b6..f295fc35 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,6 +32,7 @@ Suggests: RoxygenNote: 7.1.1 biocViews: Imports: + BiocParallel, biomaRt, cqn, data.table, @@ -53,6 +54,7 @@ Imports: magrittr, mclust, mvIC, + parallel, plyr, psych, purrr, From 8b142ca64c5282eb7b961f7be07a151342407e2e Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Mon, 13 Sep 2021 17:21:49 -0700 Subject: [PATCH 50/64] Specify cores command for parallel backend --- R/functions.R | 23 ++++++++++++++++------- config.yml | 1 + inst/_targets.R | 6 ++++-- vignettes/customize-config.Rmd | 5 +++++ 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/R/functions.R b/R/functions.R index bab1c9d2..025bccca 100644 --- a/R/functions.R +++ b/R/functions.R @@ -515,13 +515,14 @@ build_formula <- function(md, primary_variable, model_variables = NULL, #' Differential expression testing is performed with \code{"variancePartition::dream()"} to increase power #' and decrease false positives for repeated measure designs. The `primary_variable` is modeled as a fixed #' effect. If you wish to test an interaction term, `primary_variable` can take multiple variable names. -#' +#' #' @param cqn_counts A counts data frame normalized by CQN. #' @param p_value_threshold Numeric. P-values are adjusted by Benjamini and #' Hochberg (BH) false discovery rate (FDR). Significant genes are those with an #' adjusted p-value greater than this threshold. #' @param fold_change_threshold Numeric. Significant genes are those with a #' fold-change greater than this threshold. +#' @param cores An integer of cores to specify in the parallel backend (eg. 4) #' @inheritParams cqn #' @inheritParams coerce_factors #' @inheritParams build_formula @@ -539,9 +540,13 @@ differential_expression <- function(filtered_counts, cqn_counts, md, primary_variable, biomart_results, p_value_threshold, fold_change_threshold, model_variables = NULL, - exclude_variables = NULL) { + exclude_variables = NULL, + cores = NULL) { # force order of samples in metadata to match order of samples in counts. # Required by variancePartition + if(is.null(cores)){ + cores = parallel::detectCores()-1 + } md <- md[match(colnames(filtered_counts),rownames(md)),] metadata_input <- build_formula(md, primary_variable, model_variables) @@ -550,7 +555,7 @@ differential_expression <- function(filtered_counts, cqn_counts, md, voom_gene_expression <- variancePartition::voomWithDreamWeights(counts = gene_expression, formula = metadata_input$formula, data = metadata_input$metadata, - BPPARAM = BiocParallel::SnowParam(parallel::detectCores()-1) + BPPARAM = BiocParallel::SnowParam(cores) ) voom_gene_expression$E <- cqn_counts @@ -583,7 +588,7 @@ differential_expression <- function(filtered_counts, cqn_counts, md, formula = metadata_input$formula_non_intercept, data = metadata_input$metadata, L = contrasts, - BPPARAM = BiocParallel::SnowParam(parallel::detectCores()-1) + BPPARAM = BiocParallel::SnowParam(cores) ) de <- lapply(names(contrasts), function(i, fit){ @@ -977,7 +982,11 @@ start_de <- function() { #' @export compute_residuals <- function(clean_metadata, filtered_counts, cqn_counts = cqn_counts$E, primary_variable, - model_variables = NULL) { + model_variables = NULL, cores = NULL) { + # set the number of cores if not specified in the config + if(is.null(cores)){ + cores = parallel::detectCores()-1 + } # force order of samples in metadata to match order of samples in counts. # Required by variancePartition clean_metadata <- clean_metadata[match(colnames(filtered_counts),rownames(clean_metadata)),] @@ -990,7 +999,7 @@ compute_residuals <- function(clean_metadata, filtered_counts, counts = gene_expression, formula = metadata_input$formula, data = metadata_input$metadata, - BPPARAM = BiocParallel::SnowParam(parallel::detectCores()-1) + BPPARAM = BiocParallel::SnowParam(cores) ) # fit linear model using weights and best model @@ -1000,7 +1009,7 @@ compute_residuals <- function(clean_metadata, filtered_counts, formula = metadata_input$formula, data = metadata_input$metadata, computeResiduals = TRUE, - BPPARAM = BiocParallel::SnowParam(parallel::detectCores()-1) + BPPARAM = BiocParallel::SnowParam(cores) ) # compute residual matrix diff --git a/config.yml b/config.yml index 0ee28a10..0bb350a6 100644 --- a/config.yml +++ b/config.yml @@ -20,6 +20,7 @@ default: cpm threshold: 1 percent threshold: 0.5 sex check: sex + cores: dimensions: color: "diagnosis" shape: "batch" diff --git a/inst/_targets.R b/inst/_targets.R index 57b4a37c..390900ef 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -142,7 +142,8 @@ list( filtered_counts, cqn_counts = cqn_counts$E, primary_variable = x, - model_variables = selected_model + model_variables = selected_model, + cores = get("cores") ) ) ), @@ -156,7 +157,8 @@ list( biomart_results, p_value_threshold = get("de p-value threshold"), fold_change_threshold = get("de FC"), - model_variables = selected_model + model_variables = selected_model, + cores = get("cores") ) ), tar_target( diff --git a/vignettes/customize-config.Rmd b/vignettes/customize-config.Rmd index 2e0f7dcd..4c8bccae 100644 --- a/vignettes/customize-config.Rmd +++ b/vignettes/customize-config.Rmd @@ -68,6 +68,11 @@ skip model: Optional. If TRUE, the exploratory data report is run. Model s is not computed. force model with: Optional. Force differential expression with this user defined model instead of the output of stepwise regression. +cores: Optional. Specify an integer of cores to use with a BiocParallel + parallel backend. Null value results in the number of available + cores minus one being used. Parallel backend ccurrently only supports + BiocParallel::SnowParam(). BatchtoolsParam, MulticoreParam, + BiocParallelDoparParam, and SerialParam not currently supported. de FC: The fold-change (FC) of significant differentially expressed (de) genes must exceed this value. This value will be transformed into log2 FC. From d397aa855db6cb7043e9d46ed528e7ca38223e94 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Tue, 14 Sep 2021 12:30:24 -0500 Subject: [PATCH 51/64] pandoc as soft dependency --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f98efedf..8888cd74 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,9 @@ URL: https://sage-bionetworks.github.io/sageseqr, Encoding: UTF-8 LazyData: true Suggests: - testthat (>= 2.1.0), + markdown, + pandoc, + testthat (>= 2.1.0) RoxygenNote: 7.1.1 biocViews: Imports: @@ -50,7 +52,6 @@ Imports: knitr, limma, magrittr, - markdown, mclust, mvIC, plyr, From ee213b8dffeeeeaa2acdd14f38054b089b4a27f6 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Tue, 14 Sep 2021 12:45:29 -0500 Subject: [PATCH 52/64] vignette builder is rmarkdown --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8888cd74..a584a7ac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ URL: https://sage-bionetworks.github.io/sageseqr, Encoding: UTF-8 LazyData: true Suggests: - markdown, + rmarkdown, pandoc, testthat (>= 2.1.0) RoxygenNote: 7.1.1 @@ -60,7 +60,6 @@ Imports: RColorBrewer, reshape2, rlang, - rmarkdown, sagethemes, stats, stringr, From f55e0d00197755e8aba40c532e0c049780854849 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Tue, 14 Sep 2021 13:20:42 -0500 Subject: [PATCH 53/64] run jobs in r+pandoc environment --- .github/workflows/R-CMD-check.yaml | 2 +- DESCRIPTION | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 4ca9e43e..e2f9995f 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -9,7 +9,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v1 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-pandoc@v1 - name: Install dependencies run: | install.packages(c("remotes", "rcmdcheck")) diff --git a/DESCRIPTION b/DESCRIPTION index a584a7ac..ab79d72f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,6 @@ Encoding: UTF-8 LazyData: true Suggests: rmarkdown, - pandoc, testthat (>= 2.1.0) RoxygenNote: 7.1.1 biocViews: From 14e8932f4d10602142e8f5ddbcf456d8a6b6d835 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Tue, 14 Sep 2021 13:40:35 -0500 Subject: [PATCH 54/64] preserve setup-r --- .github/workflows/R-CMD-check.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index e2f9995f..4bc13990 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -9,6 +9,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v1 + - uses: r-lib/actions/setup-r@v1 - uses: r-lib/actions/setup-pandoc@v1 - name: Install dependencies run: | From aaeadf8d52b316ba86722af4b0aa1aa1aa0bc6f3 Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Tue, 14 Sep 2021 20:33:41 -0700 Subject: [PATCH 55/64] suggestions and document --- R/functions.R | 4 ++-- man/compute_residuals.Rd | 5 ++++- man/differential_expression.Rd | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/functions.R b/R/functions.R index 025bccca..753e5275 100644 --- a/R/functions.R +++ b/R/functions.R @@ -515,14 +515,14 @@ build_formula <- function(md, primary_variable, model_variables = NULL, #' Differential expression testing is performed with \code{"variancePartition::dream()"} to increase power #' and decrease false positives for repeated measure designs. The `primary_variable` is modeled as a fixed #' effect. If you wish to test an interaction term, `primary_variable` can take multiple variable names. -#' +#' #' @param cqn_counts A counts data frame normalized by CQN. #' @param p_value_threshold Numeric. P-values are adjusted by Benjamini and #' Hochberg (BH) false discovery rate (FDR). Significant genes are those with an #' adjusted p-value greater than this threshold. #' @param fold_change_threshold Numeric. Significant genes are those with a #' fold-change greater than this threshold. -#' @param cores An integer of cores to specify in the parallel backend (eg. 4) +#' @param cores An integer of cores to specify in the parallel backend (eg. 4). #' @inheritParams cqn #' @inheritParams coerce_factors #' @inheritParams build_formula diff --git a/man/compute_residuals.Rd b/man/compute_residuals.Rd index e475dc94..5f19eb40 100644 --- a/man/compute_residuals.Rd +++ b/man/compute_residuals.Rd @@ -9,7 +9,8 @@ compute_residuals( filtered_counts, cqn_counts = cqn_counts$E, primary_variable, - model_variables = NULL + model_variables = NULL, + cores = NULL ) } \arguments{ @@ -25,6 +26,8 @@ fixed effect interaction term.} \item{model_variables}{Optional. Vector of variables to include in the linear (mixed) model. If not supplied, the model will include all variables in \code{md}.} + +\item{cores}{An integer of cores to specify in the parallel backend (eg. 4).} } \description{ Residuals of the best fit linear regression model are computed for each diff --git a/man/differential_expression.Rd b/man/differential_expression.Rd index 95c974c7..934ced93 100644 --- a/man/differential_expression.Rd +++ b/man/differential_expression.Rd @@ -13,7 +13,8 @@ differential_expression( p_value_threshold, fold_change_threshold, model_variables = NULL, - exclude_variables = NULL + exclude_variables = NULL, + cores = NULL ) } \arguments{ @@ -40,6 +41,8 @@ fold-change greater than this threshold.} If not supplied, the model will include all variables in \code{md}.} \item{exclude_variables}{Vector of variables to exclude from testing.} + +\item{cores}{An integer of cores to specify in the parallel backend (eg. 4).} } \value{ A named list with \code{"variancePartition::voomWithDreamWeights()"} From 1aaf2b34c65ad08182d18d0371e67d4679fbac45 Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Tue, 14 Sep 2021 20:35:37 -0700 Subject: [PATCH 56/64] suggestions --- vignettes/customize-config.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/customize-config.Rmd b/vignettes/customize-config.Rmd index 4c8bccae..69fa48dd 100644 --- a/vignettes/customize-config.Rmd +++ b/vignettes/customize-config.Rmd @@ -72,7 +72,7 @@ cores: Optional. Specify an integer of cores to use with a BiocParall parallel backend. Null value results in the number of available cores minus one being used. Parallel backend ccurrently only supports BiocParallel::SnowParam(). BatchtoolsParam, MulticoreParam, - BiocParallelDoparParam, and SerialParam not currently supported. + BiocParallelDoparParam, and SerialParam are not currently supported. de FC: The fold-change (FC) of significant differentially expressed (de) genes must exceed this value. This value will be transformed into log2 FC. From 6b17a209519018188d51d2b2dd001755ba999f3a Mon Sep 17 00:00:00 2001 From: kelshmo Date: Wed, 15 Sep 2021 11:41:02 -0500 Subject: [PATCH 57/64] get_data requires synLogin within the function due to targets constrained env --- R/functions.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/functions.R b/R/functions.R index 1bdaa103..a9f30d29 100644 --- a/R/functions.R +++ b/R/functions.R @@ -14,6 +14,9 @@ #' #'} get_data <- function(synid, version = NULL) { + # login to Synapse + synapser::synLogin() + df <- tibble::as_tibble( data.table::fread( synapser::synGet( From e47438c4275a2993b4a0a3c7de257975c152ff8f Mon Sep 17 00:00:00 2001 From: jgockley62 Date: Wed, 15 Sep 2021 10:45:52 -0700 Subject: [PATCH 58/64] add cores to wrap_de --- R/functions.R | 5 +++-- man/wrap_de.Rd | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/R/functions.R b/R/functions.R index 753e5275..f4b741c9 100644 --- a/R/functions.R +++ b/R/functions.R @@ -633,7 +633,7 @@ differential_expression <- function(filtered_counts, cqn_counts, md, #' @export wrap_de <- function(conditions, filtered_counts, cqn_counts, md, biomart_results, p_value_threshold, fold_change_threshold, - model_variables = names(md)) { + model_variables = names(md), cores = NULL) { purrr::map( conditions, function(x) differential_expression( @@ -644,7 +644,8 @@ wrap_de <- function(conditions, filtered_counts, cqn_counts, md, biomart_results, p_value_threshold, fold_change_threshold, - model_variables + model_variables, + cores = cores ) ) } diff --git a/man/wrap_de.Rd b/man/wrap_de.Rd index 7c4374af..c9acdc23 100644 --- a/man/wrap_de.Rd +++ b/man/wrap_de.Rd @@ -12,7 +12,8 @@ wrap_de( biomart_results, p_value_threshold, fold_change_threshold, - model_variables = names(md) + model_variables = names(md), + cores = NULL ) } \arguments{ @@ -37,6 +38,8 @@ fold-change greater than this threshold.} \item{model_variables}{Optional. Vector of variables to include in the linear (mixed) model. If not supplied, the model will include all variables in \code{md}.} + +\item{cores}{An integer of cores to specify in the parallel backend (eg. 4).} } \description{ This function will pass multiple conditions to test to \code{"sagseqr::differential_expression()"}. From 84aba484a5f579d2372294541020e37096309d89 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 16 Sep 2021 12:04:57 -0500 Subject: [PATCH 59/64] instructions for alpha version --- vignettes/sagseqr-alpha-steps.Rmd | 156 ++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 vignettes/sagseqr-alpha-steps.Rmd diff --git a/vignettes/sagseqr-alpha-steps.Rmd b/vignettes/sagseqr-alpha-steps.Rmd new file mode 100644 index 00000000..0e104642 --- /dev/null +++ b/vignettes/sagseqr-alpha-steps.Rmd @@ -0,0 +1,156 @@ +--- +title: "Run the drake plan in version 0.0.0.9-alpha" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Run the drake plan in version 0.0.0.9-alpha} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + eval = FALSE +) +``` +In `sageseqr` version 0.0.0.9-alpha, `sageseqr` was built with the workflow R package called [`drake`](https://books.ropensci.org/drake/). There are differences between the latest version of `sageseqr` and the alpha version when it comes to executing the workflow. As new features are always being released, the latest release of the package is recommended! However, if you are reproducing an analysis that depends on the alpha version of the package, follow these instructions. + +1. Install the package `remotes::install_github("Sage-Bionetworks/sageseqr@v0.0.0.9-alpha")`. + +2. Write your own config file. + +``` +counts: + synID: Required. Synapse ID to counts data frame with identifiers to + the metadata as column names and gene ids in a column. + version: Optional. Include Synapse file version number (e.g. 3). + gene id: Required. Column name that corresponds to the gene ids (e.g. feature). +metadata: + synID: Required. Synapse ID to cleaned metadata file with sample + identifiers in a column and variables of interest as column names. + version: Optional. Include Synapse file version number (e.g. 3). + sample id: Required. Column name that corresponds to the sample ids (e.g. donorid). +biomart: + synID: Optional. If left blank, Ensembl will be queried with the gene + ids provided in the counts. Otherwise, you may provide the + Synapse ID to gene metadata from Ensembl. This must include gene + length and GC content in order to implement Conditional Quantile + Normalization. + version: Optional. Include Synapse file version number (e.g. 3). + filters: Required. Column name that corresponds to the gene ids (e.g. + ensembl_gene_id). + host: Optional. A character vector specifying the BioMart database release + version. This specification is highly recommended for a reproducible + workflow. Defaults to ensembl.org. + organism: Required. A character vector of the organism name. This argument + takes partial strings. For example,"hsa" will match "hsapiens_gene_ensembl". +factors: Required. List of factor variables in brackets. Variables must be + present in the metadata as column names (e.g. [ "donorid", "source"]). +continuous: Required. List of continuous variables in brackets. Variables must + be present in the metadata as column names (e.g. [ "rin", "rin2"]). +x_var: Required. A boxplot will visualize the distribution of continuous variables using + the x_var as a dimension. +conditions: Optional. Filtering low-expression genes is a common practice to improve + sensitivity in detection of differentially expressed genes. Low + count genes that have less than a user-defined threshold of counts + per million (CPM) in a user-defined percentage of samples per the + conditions provided here will be removed. (e.g. ["diagnosis", "sex"]). +cpm threshold: Optional. The minium allowable CPM to keep a gene in the analysis. +percent threshold:Optional. The percentage of samples that should contain the minimum number + of CPM. If a condition is passed, the percentage will apply to + the samples in that sub-population. +sex check: Optional. The exact variable name that corresponds to reported gender or sex + to check the distribution of x and y marker expression across + samples. +dimensions: Required. Specify the PCA dimensions by variable name. + color: + shape: + size: +skip model: Optional. If TRUE, the exploratory data report is run. Model selection + is not computed. +force model with: Optional. Force differential expression with this user defined model + instead of the output of stepwise regression. +de FC: The fold-change (FC) of significant differentially expressed + (de) genes must exceed this value. This value will be transformed + into log2 FC. +de p-value threshold: The adjusted p-value of significant differentially expressed + (de) genes must exceed this value. +de contrasts: Required. Variable in the metadata to define comparisons between + groups. + If there are multiple comparisons, set them up as nested lists. +visualization gene list: Label this list of genes in the volcano plot. +report: Required. The name of your project. This will become the name of your + output html file. +store output: Required. Folder Synapse Id to store output on Synapse. +``` + +3. Specify the active configuration by setting `R_CONFIG_ACTIVE`. + +```{r config-setup} +Sys.setenv(R_CONFIG_ACTIVE = "default") +``` + +4. Load the `sageseqr` library and login to [Synapse](https://www.synapse.org/). `rnaseq_plan()` calls the arguments from the config file and creates the `drake` plan. Execute `drake::make(plan)` to compute. Run this code from your project root. + +```{r run-plan} +library(sageseqr) + +# Login to Synapse. Make a Synapse account and use synapser to login: https://r-docs.synapse.org/articles/manageSynapseCredentials.html +synapser::synLogin() + +# Run the analysis +plan <- sageseqr::rnaseq_plan( + metadata_id = config::get("metadata")$synID, + metadata_version = config::get("metadata")$version, + counts_id = config::get("counts")$synID, + counts_version = config::get("counts")$version, + gene_id_input = config::get("counts")$`gene id`, + sample_id_input = config::get("metadata")$`sample id`, + factor_input = config::get("factors"), + continuous_input = config::get("continuous"), + biomart_id = config::get("biomart")$synID, + biomart_version = config::get("biomart")$version, + filters = config::get("biomart")$filters, + host = config::get("biomart")$host, + organism = config::get("biomart")$organism, + conditions = config::get("conditions"), + cpm_threshold = config::get("cpm threshold"), + conditions_threshold = config::get("percent threshold"), + primary_variable = config::get("x_var"), + de_contrasts = config::get("de contrasts"), + fold_change_threshold = config::get("de FC"), + p_value_threshold = config::get("de p-value threshold"), + sex_var = config::get("sex check"), + color = config::get("dimensions")$color, + shape = config::get("dimensions")$shape, + size = config::get("dimensions")$size, + report_name = config::get("report"), + skip_model = config::get("skip model"), + parent_id = config::get("store output"), + rownames = list( + config::get("metadata")$`sample id`, + config::get("counts")$`gene id`, + config::get("biomart")$filters, + config::get("counts")$`gene id` + ), + config_file = "config.yml", + force_model = config::get("force model with"), + gene_list = config::get("visualization gene list") +) + +drake::make( + plan, + envir = getNamespace("sageseqr") + ) +``` + +4. Visualize the results of your work. + +```{r visualize} +drake::vis_drake_graph( + plan, + envir = getNamespace("sageseqr"), + targets_only = TRUE + ) +``` From 3c0743f51d4bf7d3eab9801800d1446c525c12fa Mon Sep 17 00:00:00 2001 From: kelshmo Date: Thu, 16 Sep 2021 12:31:32 -0500 Subject: [PATCH 60/64] add missing targets check and gene_coexpression --- inst/_targets.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/inst/_targets.R b/inst/_targets.R index 57b4a37c..564850a4 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -33,6 +33,13 @@ list( sample_identifier = get("metadata")$`sample id` ) ), + tar_target( + check, + check_mismatch( + md = clean_md, + count_df = counts + ) + ), tar_target( biomart_results, get_biomart( @@ -68,6 +75,12 @@ list( biomart_results ) ), + tar_target( + gene_coexpression, + plot_coexpression( + cqn_counts + ) + ), tar_target( boxplots, boxplot_vars( From 08cdfef7b9e7c48aeaf03b9790c182f65ece16a5 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Fri, 17 Sep 2021 10:32:20 -0500 Subject: [PATCH 61/64] fix store residual counts --- R/functions.R | 8 ++++++-- inst/_targets.R | 14 +++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/functions.R b/R/functions.R index 84b89b7b..dd731652 100644 --- a/R/functions.R +++ b/R/functions.R @@ -591,7 +591,7 @@ differential_expression <- function(filtered_counts, cqn_counts, md, formula = metadata_input$formula_non_intercept, data = metadata_input$metadata, L = contrasts, - BPPARAM = BiocParallel::SnowParam(cores) + BPPARAM = BiocParallel::SnowParam(cores) ) de <- lapply(names(contrasts), function(i, fit){ @@ -811,8 +811,12 @@ store_results <- function(clean_md = clean_md, # parse differential expression gene list de_results <- purrr::map(de_results, function(x) x$differential_expression) + # parse residualized counts + parse_residual_matrix <- purrr::map(residualized_counts, function(x) x$output) + # append differential expression data frames already nested in list - targets <- append(targets, de_results, residualized_counts) + targets <- append(targets, de_results) + targets <- append(targets, parse_residual_matrix) # append null rownames for differential expression object rownames <- append(rownames, rep(list(NULL), length(de_results) + length(residualized_counts))) diff --git a/inst/_targets.R b/inst/_targets.R index f0008060..e3263d3c 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -236,9 +236,9 @@ list( as.list( glue::glue( "Differential Expression ({names(de)})" - ) ) - ), + ) + ), data_names = append( list( "clean_md", @@ -247,13 +247,9 @@ list( "cqn_counts" ), as.list( - names( - residualized_counts - ) - ), - as.list( - names( - de + c( + names(residualized_counts), + names(de) ) ) ), From 1cb4aa5ca70f4ef9713307591f1477466c4b0578 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Mon, 20 Sep 2021 14:41:57 -0500 Subject: [PATCH 62/64] coerce vector to list to make list of x length --- inst/_targets.R | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/inst/_targets.R b/inst/_targets.R index e3263d3c..2307ae99 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -223,36 +223,26 @@ list( config::get("biomart")$filters, config::get("counts")$`gene id` ), - syn_names = append( - list( + syn_names = as.list( + c( "Covariates", "Filtered counts (greater than 1cpm)", "BioMart query results", - "Normalized counts (CQN)" - ), - as.list( - glue::glue("Residualized counts({names(residualized_counts)})") - ), - as.list( - glue::glue( - "Differential Expression ({names(de)})" + "Normalized counts (CQN)", + glue::glue("Residualized counts ({names(residualized_counts)})"), + glue::glue("Differential Expression ({names(de)})") ) - ) - ), - data_names = append( - list( + ), + data_names = as.list( + c( "clean_md", "filtered_counts", "biomart_results", - "cqn_counts" - ), - as.list( - c( - names(residualized_counts), - names(de) + "cqn_counts", + names(residualized_counts), + names(de) ) - ) - ), + ), inputs = document_inputs, activity_provenance = "Analyze RNA-seq data with sageseqr pkg", config_file = "config.yml", From 5b5f6cbe59bc760b1255ab3a375adcde2bcf9cba Mon Sep 17 00:00:00 2001 From: kelshmo Date: Mon, 20 Sep 2021 15:41:13 -0500 Subject: [PATCH 63/64] fix reversed ordere --- R/functions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/functions.R b/R/functions.R index dd731652..6e76d7e3 100644 --- a/R/functions.R +++ b/R/functions.R @@ -815,11 +815,11 @@ store_results <- function(clean_md = clean_md, parse_residual_matrix <- purrr::map(residualized_counts, function(x) x$output) # append differential expression data frames already nested in list - targets <- append(targets, de_results) targets <- append(targets, parse_residual_matrix) + targets <- append(targets, de_results) # append null rownames for differential expression object - rownames <- append(rownames, rep(list(NULL), length(de_results) + length(residualized_counts))) + rownames <- append(rownames, rep(list(NULL), length(residualized_counts) + length(de_results))) mash <- list( target = targets, From e4c554ac2b65cef9980c09287f4ab5342fc4a1c4 Mon Sep 17 00:00:00 2001 From: kelshmo Date: Mon, 20 Sep 2021 16:54:47 -0500 Subject: [PATCH 64/64] fix targets.R --- inst/_targets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/_targets.R b/inst/_targets.R index 2307ae99..5de34ddf 100644 --- a/inst/_targets.R +++ b/inst/_targets.R @@ -215,7 +215,7 @@ list( filtered_counts = filtered_counts, biomart_results = biomart_results, de_results = de, - residualized_counts = residualized_counts$output, + residualized_counts = residualized_counts, report = report, rownames = list( config::get("metadata")$`sample id`,