From 77c7bcb36680d83b41890640bcd12ae87bc31df1 Mon Sep 17 00:00:00 2001 From: Vlad Petyuk Date: Mon, 6 May 2024 12:16:29 -0700 Subject: [PATCH] resolving gene-site ambiguity --- R/read_FragPipe_TMT.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/R/read_FragPipe_TMT.R b/R/read_FragPipe_TMT.R index 017ed2b..609292b 100644 --- a/R/read_FragPipe_TMT.R +++ b/R/read_FragPipe_TMT.R @@ -59,12 +59,21 @@ read_FragPipe_TMT <- function(path = NULL, org_to_retain = NULL, use_gene_as_pro else if (grepl("single-site", basename(path_to_file))) { if(use_gene_as_prot_id){ df <- df %>% + filter(Gene != "") %>% mutate(rowname = paste0(Gene, "-", sub("[^_]*_([A-Z]\\d+)","\\1",Index))) if(anyDuplicated(df$rowname)){ - stop("Duplicates in the gene-based site names. Switch to use_gene_as_prot_id = FALSE.") - } + # let's try to resolve by ReferenceIntensity + if(!("ReferenceIntensity" %in% colnames(df))){ + stop("Duplicates in the gene-based site names. Can't resolve ambiguity. + Switch to use_gene_as_prot_id = FALSE.") + } else { + df <- df %>% + group_by(rowname) %>% + slice_max(ReferenceIntensity) + } + } }else{ df <- df %>% mutate(rowname = Index) }