Skip to content

Commit

Permalink
Merge pull request #156 from umccr/piedb-comp
Browse files Browse the repository at this point in the history
PieDB comparison
  • Loading branch information
skanwal authored Jul 16, 2024
2 parents a395332 + 45ebb0d commit d2c49fc
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 3 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ dataset is used as reference cohort (`--dataset TEST`).
rnasum.R \
--sample_name test_sample_WTS \
--dataset TEST \
--dragen_rnaseq inst/rawdata/test_data/dragen \
--dragen_wts_dir inst/rawdata/test_data/dragen \
--report_dir inst/rawdata/test_data/dragen/RNAsum \
--umccrise inst/rawdata/test_data/umccrised/test_sample_WGS \
--save_tables FALSE
Expand All @@ -396,7 +396,7 @@ pancreatic adenocarcinoma dataset is used as the reference cohort
rnasum.R \
--sample_name test_sample_WTS \
--dataset TEST \
--dragen_rnaseq inst/rawdata/test_data/dragen \
--dragen_wts_dir inst/rawdata/test_data/dragen \
--report_dir inst/rawdata/test_data/dragen/RNAsum \
--save_tables FALSE
```
Expand All @@ -422,7 +422,7 @@ dataset is used as the reference cohort (`--dataset TEST`).
rnasum.R \
--sample_name test_sample_WTS \
--dataset TEST \
--dragen_rnaseq $(pwd)/../rawdata/test_data/dragen \
--dragen_wts_dir $(pwd)/../rawdata/test_data/dragen \
--report_dir $(pwd)/../rawdata/test_data/dragen/RNAsum \
--umccrise $(pwd)/../rawdata/test_data/umccrised/test_sample_WGS \
--save_tables FALSE \
Expand Down
50 changes: 50 additions & 0 deletions inst/scripts/compare_piedb.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
require(dplyr)
require(readr)
require(here)
require(glue)
require(tibble)
require(tidyr)
require(DT)

SBJ04426_dev <- readr::read_csv("../../data/wts/RNAsum/reference_update_comp/SBJ04426/dev/genes.expr.perc.csv")
SBJ04426_pro <- readr::read_csv("../../data/wts/RNAsum/reference_update_comp/SBJ04426/pro/genes.expr.perc.csv")
SBJ04187_dev <- readr::read_csv("../../data/wts/RNAsum/reference_update_comp/SBJ04187/dev/genes.expr.perc.csv")
SBJ04187_pro <- readr::read_csv("../../data/wts/RNAsum/reference_update_comp/SBJ04187/pro/genes.expr.perc.csv")
cancer_genes <- readr::read_tsv("../../research/data/cancer_gene_list/somatic_panel-v24.03.0.tsv")

# now explore expression differences in reference and patient columns
# between dev and prod.
SBJ04426_df <- dplyr::left_join(SBJ04426_dev, SBJ04426_pro, by = "Gene", suffix = c(".dev", ".pro")) |>
dplyr::mutate(
Ref_equal = `BRCA (TCGA).dev` == `BRCA (TCGA).pro`,
Pat_equal = Patient.dev == Patient.pro,
# Ref_diff = abs(`PANCAN (TCGA).dev` - `PANCAN (TCGA).pro`),
Ref_diff = abs(`BRCA (TCGA).dev` - `BRCA (TCGA).pro`),
Pat_diff = abs(Patient.dev - Patient.pro)
) |>
dplyr::select(Gene, contains("BRCA"), contains("PANCAN"), Ref_diff, contains("Patient"), Pat_diff, contains("equal")) |>
dplyr::filter(Pat_diff > 0) |>
dplyr::filter(Gene %in% cancer_genes$ensembl_gene_symbol) |>
dplyr::arrange(desc(Pat_diff)) |>
dplyr::arrange(desc(Ref_diff)) |>
datatable()

SBJ04187_df <- dplyr::left_join(SBJ04187_dev, SBJ04187_pro, by = "Gene", suffix = c(".dev", ".pro")) |>
dplyr::mutate(
Ref_equal = `BRCA (TCGA).dev` == `BRCA (TCGA).pro`,
Pat_equal = Patient.dev == Patient.pro,
# Ref_diff = abs(`PANCAN (TCGA).dev` - `PANCAN (TCGA).pro`),
Ref_diff = abs(`BRCA (TCGA).dev` - `BRCA (TCGA).pro`),
Pat_diff = abs(Patient.dev - Patient.pro)
) |>
dplyr::select(Gene, contains("BRCA"), contains("PANCAN"), Ref_diff, contains("Patient"), Pat_diff, contains("equal")) |>
dplyr::filter(Pat_diff > 0) |>
dplyr::filter(Gene %in% cancer_genes$ensembl_gene_symbol) |>
dplyr::arrange(desc(Pat_diff)) |>
dplyr::arrange(desc(Ref_diff)) |>
datatable()

# plot Ref_diff values
hist(SBJ04426_df[[1]]$data$Ref_diff, breaks = 100)


0 comments on commit d2c49fc

Please sign in to comment.