From 08f10ff09425b0b7c49dd3dd87dae9fd2573e7ec Mon Sep 17 00:00:00 2001 From: richelbilderbeek Date: Wed, 2 Sep 2020 10:41:06 +0200 Subject: [PATCH] 'get_aa_transition_matrix' returns either BLOSUM62 or (the untested) FLU. Use EpitopePrediction as the IC50 prediction tool in tests --- R/are_detected.R | 10 +++++--- R/create_consensus_topology_conservation.R | 8 +++--- R/get_aa_transition_matrix.R | 9 ++++++- R/is_detected.R | 25 +++++++++---------- man/are_detected.Rd | 18 +++++++++++-- man/create_consensus_topology_conservation.Rd | 5 +++- man/get_aa_transition_matrix.Rd | 2 +- man/is_detected.Rd | 18 ++++++++++--- tests/testthat/test-are_detected.R | 9 ++++--- ...t-create_consensus_topology_conservation.R | 1 + .../testthat/test-get_aa_transition_matrix.R | 4 +++ tests/testthat/test-is_detected.R | 13 ++++++---- 12 files changed, 86 insertions(+), 36 deletions(-) diff --git a/R/are_detected.R b/R/are_detected.R index e5f587a..d9221d7 100644 --- a/R/are_detected.R +++ b/R/are_detected.R @@ -4,8 +4,10 @@ #' @export are_detected <- function( protein_sequences, - mhc_haplotype = "HLA-A*02-01", - percentile = get_ic50_percentile_binder() + mhc_haplotype, + peptide_length, + percentile = get_ic50_percentile_binder(), + ic50_prediction_tool ) { bbbq::check_mhc_haplotype_name(mhc_haplotype) results <- rep(NA, length(protein_sequences)) @@ -13,7 +15,9 @@ are_detected <- function( results[i] <- bbbq::is_detected( protein_sequence = protein_sequences[i], mhc_haplotype = mhc_haplotype, - percentile = percentile + peptide_length = peptide_length, + percentile = percentile, + ic50_prediction_tool = ic50_prediction_tool ) } results diff --git a/R/create_consensus_topology_conservation.R b/R/create_consensus_topology_conservation.R index 92d0ec6..936c0f1 100644 --- a/R/create_consensus_topology_conservation.R +++ b/R/create_consensus_topology_conservation.R @@ -10,7 +10,8 @@ #' @inheritParams default_params_doc #' @export create_consensus_topology_conservation <- function( # nolint indeed a long function name - protein_sequences + protein_sequences, + transition_matrix = get_aa_transition_matrix("BLOSUM62") ) { protein_sequences_aass <- Biostrings::AAStringSet(protein_sequences) @@ -18,9 +19,10 @@ create_consensus_topology_conservation <- function( # nolint indeed a long funct protein_alignment <- msa::msa(protein_sequences_aass) sink() - utils::data(BLOSUM62) conservation_scores <- msa::msaConservationScore( - protein_alignment, BLOSUM62) + protein_alignment, + transition_matrix + ) t <- tibble::tibble( aa = names(conservation_scores), score = as.numeric(conservation_scores), diff --git a/R/get_aa_transition_matrix.R b/R/get_aa_transition_matrix.R index 5cca301..2fc04e3 100644 --- a/R/get_aa_transition_matrix.R +++ b/R/get_aa_transition_matrix.R @@ -15,7 +15,14 @@ #' amino acid name #' @author Richèl J.C. Bilderbeek #' @export -get_aa_transition_matrix <- function() { +get_aa_transition_matrix <- function( + transition_matrix_name = "FLU" +) { + if (transition_matrix_name == "BLOSUM62") { + library(Biostrings, warn.conflicts = FALSE, quietly = TRUE) + utils::data(BLOSUM62) + return(BLOSUM62) + } t <- as.matrix( readr::read_csv( system.file("extdata", "flu_transitions.csv", package = "bbbq"), diff --git a/R/is_detected.R b/R/is_detected.R index 093ba4e..3026b30 100644 --- a/R/is_detected.R +++ b/R/is_detected.R @@ -1,30 +1,29 @@ #' Determine if the haplotype has at least one binder to the peptide #' @inheritParams default_params_doc -#' @param fragment_length the length of the peptide fragments -#' the peptide will be cut into #' @author Richèl J.C. Bilderbeek #' @export is_detected <- function( protein_sequence, mhc_haplotype, - fragment_length = 9, - percentile = get_ic50_percentile_binder() + peptide_length, + percentile = get_ic50_percentile_binder(), + ic50_prediction_tool ) { bbbq::check_mhc_haplotype_name(mhc_haplotype) - ic50_threshold <- mhcnpreds::get_ic50_threshold( - peptide_length = fragment_length, - mhc_haplotype = mhcnuggetsr::to_mhcnuggets_name(mhc_haplotype), - percentile = percentile + ic50_threshold <- bbbq::get_ic50_threshold( + peptide_length = peptide_length, + haplotype = mhc_haplotype, + percentile = percentile, + ic50_prediction_tool = ic50_prediction_tool ) - results <- mhcnuggetsr::predict_ic50s( - mhcnuggets_options = mhcnuggetsr::create_mhcnuggets_options( - mhc = mhcnuggetsr::to_mhcnuggets_name(mhc_haplotype) - ), + results <- bbbq::predict_ic50s( protein_sequence = protein_sequence, - peptide_length = fragment_length + peptide_length = peptide_length, + haplotype = mhc_haplotype, + ic50_prediction_tool = ic50_prediction_tool ) sum(results$ic50 < ic50_threshold) > 0 } diff --git a/man/are_detected.Rd b/man/are_detected.Rd index f51c157..14365a1 100644 --- a/man/are_detected.Rd +++ b/man/are_detected.Rd @@ -6,8 +6,10 @@ \usage{ are_detected( protein_sequences, - mhc_haplotype = "HLA-A*02-01", - percentile = get_ic50_percentile_binder() + mhc_haplotype, + peptide_length, + percentile = get_ic50_percentile_binder(), + ic50_prediction_tool ) } \arguments{ @@ -17,10 +19,22 @@ are_detected( Use \link{get_mhc1_haplotypes} to get a list of all MHC-I haplotypes. Use \link{get_mhc2_haplotypes} to get a list of all MHC-II haplotypes.} +\item{peptide_length}{length of the peptide in amino acids} + \item{percentile}{how low the IC50 must be for the protein to be considered a binder. For example, 0.02 denotes that the protein must have an IC50 in the lowest 2 percent range. The default value is returned by \link{get_ic50_percentile_binder}.} + +\item{ic50_prediction_tool}{tool to predict the IC50 from a + peptide. Possible values are:\cr +\itemize{ + \item mhcnuggetsr \link[mhcnuggetsr]{mhcnuggetsr}, + which uses MHCnuggets + \item mhcnuggetsr \link[netmhc2pan]{netmhc2pan}, + which uses NetMHC2pam + \item EpitopePrediction uses \code{EpitopePrediction} +}} } \description{ Are the protein sequences detected by either MHC-I or MHC-II? diff --git a/man/create_consensus_topology_conservation.Rd b/man/create_consensus_topology_conservation.Rd index a129a90..463bc1c 100644 --- a/man/create_consensus_topology_conservation.Rd +++ b/man/create_consensus_topology_conservation.Rd @@ -4,7 +4,10 @@ \alias{create_consensus_topology_conservation} \title{Create a consensus-topology-conservation table} \usage{ -create_consensus_topology_conservation(protein_sequences) +create_consensus_topology_conservation( + protein_sequences, + transition_matrix = get_aa_transition_matrix("BLOSUM62") +) } \arguments{ \item{protein_sequences}{one or more protein sequences} diff --git a/man/get_aa_transition_matrix.Rd b/man/get_aa_transition_matrix.Rd index 0237dcc..2e7ac48 100644 --- a/man/get_aa_transition_matrix.Rd +++ b/man/get_aa_transition_matrix.Rd @@ -4,7 +4,7 @@ \alias{get_aa_transition_matrix} \title{Get an amino acid transition matrix} \usage{ -get_aa_transition_matrix() +get_aa_transition_matrix(transition_matrix_name = "FLU") } \value{ a tibble, with column names equal to the uppercase diff --git a/man/is_detected.Rd b/man/is_detected.Rd index 9f2d5b9..99ff729 100644 --- a/man/is_detected.Rd +++ b/man/is_detected.Rd @@ -7,8 +7,9 @@ is_detected( protein_sequence, mhc_haplotype, - fragment_length = 9, - percentile = get_ic50_percentile_binder() + peptide_length, + percentile = get_ic50_percentile_binder(), + ic50_prediction_tool ) } \arguments{ @@ -18,13 +19,22 @@ is_detected( Use \link{get_mhc1_haplotypes} to get a list of all MHC-I haplotypes. Use \link{get_mhc2_haplotypes} to get a list of all MHC-II haplotypes.} -\item{fragment_length}{the length of the peptide fragments -the peptide will be cut into} +\item{peptide_length}{length of the peptide in amino acids} \item{percentile}{how low the IC50 must be for the protein to be considered a binder. For example, 0.02 denotes that the protein must have an IC50 in the lowest 2 percent range. The default value is returned by \link{get_ic50_percentile_binder}.} + +\item{ic50_prediction_tool}{tool to predict the IC50 from a + peptide. Possible values are:\cr +\itemize{ + \item mhcnuggetsr \link[mhcnuggetsr]{mhcnuggetsr}, + which uses MHCnuggets + \item mhcnuggetsr \link[netmhc2pan]{netmhc2pan}, + which uses NetMHC2pam + \item EpitopePrediction uses \code{EpitopePrediction} +}} } \description{ Determine if the haplotype has at least one binder to the peptide diff --git a/tests/testthat/test-are_detected.R b/tests/testthat/test-are_detected.R index 582a28e..1aa61a8 100644 --- a/tests/testthat/test-are_detected.R +++ b/tests/testthat/test-are_detected.R @@ -1,11 +1,14 @@ test_that("use", { - skip("Not now") expect_equal( are_detected( - c( + protein_sequences = c( "VVIILTIAGNILVIMAVSLE", "AAAARTIAGRILVIMARSAA" - ) + ), + mhc_haplotype = bbbq::get_mhc1_haplotypes()[1], + peptide_length = 9, + percentile = 0.02, + ic50_prediction_tool = "EpitopePrediction" ), c(TRUE, FALSE) ) diff --git a/tests/testthat/test-create_consensus_topology_conservation.R b/tests/testthat/test-create_consensus_topology_conservation.R index d6449d3..744e3e0 100644 --- a/tests/testthat/test-create_consensus_topology_conservation.R +++ b/tests/testthat/test-create_consensus_topology_conservation.R @@ -10,6 +10,7 @@ test_that("use, simple data", { "IMPRESSIVELYFLILAWAYFANSWALKWEETMARKETTRIVIALLYNAILIDENTIFY", "IMPRESSIVELYFLIYAWAYFANSWALKSWEETMARKETRIVIALLYNAILIDENTIFY" ) + library(msa) t <- create_consensus_topology_conservation(protein_sequences) expect_true(tibble::is_tibble(t)) expect_equal(60, nrow(t)) diff --git a/tests/testthat/test-get_aa_transition_matrix.R b/tests/testthat/test-get_aa_transition_matrix.R index ad8f176..35f9595 100644 --- a/tests/testthat/test-get_aa_transition_matrix.R +++ b/tests/testthat/test-get_aa_transition_matrix.R @@ -1,4 +1,8 @@ test_that("use", { + # Follow the BioStrings BLOSUM62 matrix + #utils::data(BLOSUM62); + #expect_equal(rownames(BLOSUM62), rownames(m)) + m <- get_aa_transition_matrix() expect_equal(20, nrow(m)) expect_equal(20, ncol(m)) diff --git a/tests/testthat/test-is_detected.R b/tests/testthat/test-is_detected.R index 5e0d5fc..2cb8cd6 100644 --- a/tests/testthat/test-is_detected.R +++ b/tests/testthat/test-is_detected.R @@ -1,18 +1,21 @@ test_that("use", { - if (!mhcnuggetsr::is_mhcnuggets_installed()) return() - expect_true( is_detected( "VVIILTIAGN", - mhc_haplotype = mhcnuggetsr::get_mhc_1_haplotypes()[4], - percentile = 0.5 + mhc_haplotype = bbbq::get_mhc1_haplotypes()[1], + peptide_length = 9, + percentile = 0.5, + ic50_prediction_tool = "EpitopePrediction" ) ) expect_false( is_detected( "AAAARTIAGR", - mhc_haplotype = mhcnuggetsr::get_mhc_1_haplotypes()[20] + mhc_haplotype = bbbq::get_mhc1_haplotypes()[1], + peptide_length = 9, + percentile = 0.01, + ic50_prediction_tool = "EpitopePrediction" ) ) })