Skip to content

Commit

Permalink
fix bugs and add biomarkers to Excel output
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Apr 22, 2024
1 parent 8d291a6 commit e75e8fa
Show file tree
Hide file tree
Showing 14 changed files with 269 additions and 146 deletions.
6 changes: 1 addition & 5 deletions pcgr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,7 @@ def run_pcgr(input_data, output_data,conf_options):
outfile.close()

# PCGR|Report - Generation of Excel workbooks and integrative HTML reports for molecular data interpretation
## SNVs/InDels, CNAs, expression, TMB, MSI, Mutational signatures
## SNVs/InDels, CNAs, expression, TMB, MSI, mutational signatures
if not conf_options['other']['no_reporting'] and not input_vcf == 'None':
logger = getlogger('pcgr-writer')
logger.info('PCGR - STEP 6: Generation of output files - molecular interpretation report for precision cancer medicine')
Expand All @@ -658,10 +658,6 @@ def run_pcgr(input_data, output_data,conf_options):
pcgr_report_command = (
f"{rscript} {pcgrr_script} {yaml_fname}")

print('pcgrr_conda: ' + str(pcgrr_conda))
print('pcgr_conda: ' + str(pcgr_conda))
print(pcgr_report_command)
#exit(0)
if debug:
print(pcgr_report_command)
check_subprocess(logger, pcgr_report_command, debug)
Expand Down
237 changes: 128 additions & 109 deletions pcgrr/R/biomarkers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1135,127 +1135,139 @@ get_dt_tables <- function(
if(!"callset" %in% names(report$content[[variant_class]])){
stop("report$content$variant_class object does not contain 'callset'")
}

biomarker_assoc_summary <- data.frame()
biomarker_top_resolution <- data.frame()
biomarker_context <- data.frame()

var_eitems <-
report$content[[variant_class]]$callset$variant_display |>
dplyr::filter(
!is.na(.data$ACTIONABILITY_TIER) &
.data$ACTIONABILITY_TIER == tier) |>
dplyr::select(-c("ACTIONABILITY_FRAMEWORK")) |>
dplyr::inner_join(
report$content[[variant_class]]$callset$biomarker_evidence$items,
by = c("VAR_ID","ACTIONABILITY_TIER",
"VARIANT_CLASS","ENTREZGENE")
) |>
dplyr::distinct()
dplyr::select(-c("ACTIONABILITY_FRAMEWORK"))

if(NROW(var_eitems) > 0){
var_eitems <- var_eitems |>
dplyr::inner_join(
report$content[[variant_class]]$callset$biomarker_evidence$items,
by = c("VAR_ID","ACTIONABILITY_TIER",
"VARIANT_CLASS","ENTREZGENE")
) |>
dplyr::distinct()

biomarker_top_resolution <-
var_eitems |>
dplyr::select(
c("VAR_ID",
"VARIANT_CLASS",
"ENTREZGENE",
"BM_RESOLUTION")
) |>
dplyr::group_by(
.data$VAR_ID,
.data$VARIANT_CLASS,
.data$ENTREZGENE
) |>
dplyr::summarise(
BM_RESOLUTION = paste(
.data$BM_RESOLUTION,
collapse = ","),
.groups = "drop"
) |>
dplyr::mutate(
BM_TOP_RESOLUTION = dplyr::case_when(
stringr::str_detect(
.data$BM_RESOLUTION, "genomic|hgvsp|codon") ~ "high",
!stringr::str_detect(
.data$BM_RESOLUTION, "genomic|hgvsp|codon") &
biomarker_top_resolution <-
var_eitems |>
dplyr::select(
c("VAR_ID",
"VARIANT_CLASS",
"ENTREZGENE",
"BM_RESOLUTION")
) |>
dplyr::group_by(
.data$VAR_ID,
.data$VARIANT_CLASS,
.data$ENTREZGENE
) |>
dplyr::summarise(
BM_RESOLUTION = paste(
.data$BM_RESOLUTION,
collapse = ","),
.groups = "drop"
) |>
dplyr::mutate(
BM_TOP_RESOLUTION = dplyr::case_when(
stringr::str_detect(
.data$BM_RESOLUTION, "exon|gene") ~ "low",
TRUE ~ "low"
)
) |>
dplyr::select(-c("BM_RESOLUTION")) |>
dplyr::distinct()

biomarker_context <- var_eitems |>
dplyr::mutate(BM_CONTEXT = dplyr::case_when(
.data$BM_EVIDENCE_TYPE == "Predictive" ~
paste0(.data$BM_CLINICAL_SIGNIFICANCE, " - ",
.data$BM_THERAPEUTIC_CONTEXT),
.data$BM_EVIDENCE_TYPE == "Prognostic" |
.data$BM_EVIDENCE_TYPE == "Diagnostic" ~
paste0(.data$BM_EVIDENCE_TYPE, " - ",
.data$BM_CLINICAL_SIGNIFICANCE),
TRUE ~ as.character(.data$BM_CLINICAL_SIGNIFICANCE)
)) |>
dplyr::mutate(BM_CONTEXT = stringr::str_replace_all(
.data$BM_CONTEXT, ",", ", "
)) |>
dplyr::select(
c("VAR_ID",
"VARIANT_CLASS",
"ENTREZGENE",
"BM_EVIDENCE_ID",
"BM_CONTEXT")
) |>
dplyr::distinct()
.data$BM_RESOLUTION, "genomic|hgvsp|codon") ~ "high",
!stringr::str_detect(
.data$BM_RESOLUTION, "genomic|hgvsp|codon") &
stringr::str_detect(
.data$BM_RESOLUTION, "exon|gene") ~ "low",
TRUE ~ "low"
)
) |>
dplyr::select(-c("BM_RESOLUTION")) |>
dplyr::distinct()

biomarker_assoc_summary <-
var_eitems |>
dplyr::select(
c("VAR_ID",
"VARIANT_CLASS",
"ENTREZGENE",
"BM_EVIDENCE_TYPE",
"BM_CLINICAL_SIGNIFICANCE",
"BM_PRIMARY_SITE")
) |>
dplyr::distinct() |>
dplyr::mutate(BM_PRIMARY_SITE = dplyr::if_else(
.data$BM_PRIMARY_SITE == "Any",
"Any tumor type",
.data$BM_PRIMARY_SITE
)) |>

dplyr::group_by(
.data$VAR_ID,
.data$VARIANT_CLASS,
.data$ENTREZGENE,
.data$BM_EVIDENCE_TYPE,
.data$BM_CLINICAL_SIGNIFICANCE
) |>
dplyr::summarise(
BM_PRIMARY_SITE = paste(
sort(.data$BM_PRIMARY_SITE),
collapse = ", "),
.groups = "drop") |>
dplyr::mutate(BM_ASSOC = paste0(
" - ",
.data$BM_PRIMARY_SITE, ": ",
.data$BM_EVIDENCE_TYPE, " | ",
.data$BM_CLINICAL_SIGNIFICANCE)
biomarker_context <-
var_eitems |>
dplyr::mutate(BM_CONTEXT = dplyr::case_when(
.data$BM_EVIDENCE_TYPE == "Predictive" ~
paste0(.data$BM_CLINICAL_SIGNIFICANCE, " - ",
.data$BM_THERAPEUTIC_CONTEXT),
.data$BM_EVIDENCE_TYPE == "Prognostic" |
.data$BM_EVIDENCE_TYPE == "Diagnostic" ~
paste0(.data$BM_EVIDENCE_TYPE, " - ",
.data$BM_CLINICAL_SIGNIFICANCE),
TRUE ~ as.character(.data$BM_CLINICAL_SIGNIFICANCE)
)) |>
dplyr::mutate(BM_CONTEXT = stringr::str_replace_all(
.data$BM_CONTEXT, ",", ", "
)) |>
dplyr::select(
c("VAR_ID",
"VARIANT_CLASS",
"ENTREZGENE",
"BM_EVIDENCE_ID",
"BM_CONTEXT")
) |>
dplyr::distinct()

dplyr::group_by(
.data$VAR_ID,
.data$VARIANT_CLASS,
.data$ENTREZGENE
) |>
dplyr::summarise(
BIOMARKER_EVIDENCE = paste(
.data$BM_ASSOC, collapse="<br><br>"),
.groups = "drop"
)
biomarker_assoc_summary <-
var_eitems |>
dplyr::select(
c("VAR_ID",
"VARIANT_CLASS",
"ENTREZGENE",
"BM_EVIDENCE_TYPE",
"BM_CLINICAL_SIGNIFICANCE",
"BM_PRIMARY_SITE")
) |>
dplyr::distinct() |>
dplyr::mutate(BM_PRIMARY_SITE = dplyr::if_else(
.data$BM_PRIMARY_SITE == "Any",
"Any tumor type",
.data$BM_PRIMARY_SITE
)) |>

dplyr::group_by(
.data$VAR_ID,
.data$VARIANT_CLASS,
.data$ENTREZGENE,
.data$BM_EVIDENCE_TYPE,
.data$BM_CLINICAL_SIGNIFICANCE
) |>
dplyr::summarise(
BM_PRIMARY_SITE = paste(
sort(.data$BM_PRIMARY_SITE),
collapse = ", "),
.groups = "drop") |>
dplyr::mutate(BM_ASSOC = paste0(
" - ",
.data$BM_PRIMARY_SITE, ": ",
.data$BM_EVIDENCE_TYPE, " | ",
.data$BM_CLINICAL_SIGNIFICANCE)
) |>

dplyr::group_by(
.data$VAR_ID,
.data$VARIANT_CLASS,
.data$ENTREZGENE
) |>
dplyr::summarise(
BIOMARKER_EVIDENCE = paste(
.data$BM_ASSOC, collapse="<br><br>"),
.groups = "drop"
)
}

dt <- list()
dt[['by_eitem']] <- data.frame()
dt[['by_gene']] <- data.frame()

if(variant_class == "cna"){
if(variant_class == "cna" &
NROW(var_eitems) > 0 &
NROW(biomarker_context) > 0){
dt[['by_eitem']] <-
var_eitems |>
dplyr::left_join(
Expand All @@ -1272,7 +1284,9 @@ get_dt_tables <- function(
)
)
}
if(variant_class == "snv_indel"){
if(variant_class == "snv_indel" &
NROW(var_eitems) > 0 &
NROW(biomarker_context) > 0){
dt[['by_eitem']] <-
var_eitems |>
dplyr::left_join(
Expand All @@ -1290,7 +1304,9 @@ get_dt_tables <- function(
)
}

if(variant_class == "cna"){
if(variant_class == "cna" &
NROW(var_eitems) > 0 &
NROW(biomarker_assoc_summary) > 0){
dt[['by_gene']] <-
var_eitems |>
dplyr::left_join(
Expand All @@ -1306,7 +1322,10 @@ get_dt_tables <- function(
dplyr::distinct()
}

if(variant_class == "snv_indel"){
if(variant_class == "snv_indel" &
NROW(var_eitems) > 0 &
NROW(biomarker_assoc_summary) > 0 &
NROW(biomarker_top_resolution) > 0){
dt[['by_gene']] <-
var_eitems |>
dplyr::left_join(
Expand Down
8 changes: 4 additions & 4 deletions pcgrr/R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -968,18 +968,18 @@ write_report_excel <- function(report = NULL){
pcgrr::log4r_info("------")
pcgrr::log4r_info(
paste0("Generating Excel workbook (.xlsx) with ",
"variant findings"))
"key findings"))
workbook <- openxlsx2::wb_workbook()

i <- 15
for(elem in c('SAMPLE_ASSAY',
'SNV_INDEL',
'SNV_INDEL_BIOMARKERS',
'SNV_INDEL_BIOMARKER',
'CNA',
'CNA_BIOMARKERS',
'CNA_BIOMARKER',
'TMB',
'MSI',
'MUTATIONAL_SIGNATURES',
'MUTATIONAL_SIGNATURE',
'KATAEGIS',
'IMMUNE_CONTEXTURE')){
if(elem %in% names(excel_output)){
Expand Down
Loading

0 comments on commit e75e8fa

Please sign in to comment.