Skip to content

Commit

Permalink
add comparison script
Browse files Browse the repository at this point in the history
  • Loading branch information
pdiakumis committed Jan 26, 2024
1 parent 7c4c8e0 commit 4502602
Showing 1 changed file with 65 additions and 0 deletions.
65 changes: 65 additions & 0 deletions inst/scripts/compare.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
require(dplyr)
require(readr)
require(dracarys)
require(here)
require(glue)
require(tibble)
require(tidyr)

# gds results per subject
o <- here("nogit/comparisons")
tokpro <- Sys.getenv("ICA_ACCESS_TOKEN_PRO")
tokdev <- Sys.getenv("ICA_ACCESS_TOKEN_DEV")
s <- tibble::tribble(
~sbj, ~pro, ~dev,
"SBJ04468", "20231212b3b22850/SBJ04468__L2301504/PRJ231328.results", "output/SBJ04468__L2301504/PRJ231328.results",
"SBJ04469", "20231209eeca1fe7/SBJ04469__L2301505/MDX230556.results", "output/SBJ04469__L2301505/MDX230556.results",
"SBJ04578", "2024011447748eb1/SBJ04578__L2400043/MDX230546.results", "output_test/SBJ04578__L2400043/MDX230546.results"
) |>
mutate(
pro = glue("gds://production/analysis_data/{sbj}/rnasum/{pro}"),
dev = glue("gds://development/sehrish/rnasum_0.5.0/{sbj}/{dev}")
) |>
pivot_longer(cols = c("pro", "dev"), names_to = "namespace", values_to = "gdsdir") |>
mutate(
outdir = file.path(o, sbj, namespace),
token = ifelse(namespace == "pro", tokpro, tokdev)
)

# download results
regex <- tibble::tribble(
~regex, ~fun,
"^genes\\.expr\\.perc\\.html$", "foo1",
"^genes\\.expr\\.z\\.html$", "foo2",
)
# dracarys::dr_gds_download(gdsdir = s$gdsdir[1], outdir = s$outdir[1], token = s$token[1], page_size = 200, dryrun = TRUE, regexes = regex)
x <- s |>
rowwise() |>
mutate(
dl = list(dracarys::dr_gds_download(gdsdir = gdsdir, outdir = outdir, token = token, page_size = 200, dryrun = FALSE, regexes = regex))
) |>
ungroup()

# after download, open html and export csv
d <- s |>
mutate(
exprp = file.path(outdir, "perc.csv"),
exprz = file.path(outdir, "z.csv")
) |>
select(sbj, namespace, exprp, exprz) |>
pivot_longer(cols = c("exprp", "exprz"), names_to = "ftype", values_to = "fpath")

dev <- readr::read_csv("dev/dev_datatables.csv")
pro <- readr::read_csv("prod/pro_datatables.csv")

dplyr::left_join(dev, pro, by = "Gene", suffix = c(".dev", ".pro")) |>
dplyr::mutate(
Ref_equal = `KIRP (TCGA).dev` == `KIRP (TCGA).pro`,
Pat_equal = Patient.dev == Patient.pro,
Ref_diff = abs(`KIRP (TCGA).dev` - `KIRP (TCGA).pro`),
Pat_diff = abs(Patient.dev - Patient.pro)
) |>
dplyr::select(Gene, contains("PANCAN"), contains("Patient"), contains("diff"), everything()) |>
dplyr::filter(Pat_diff > 0 | Ref_diff > 0) |>
dplyr::arrange(desc(Pat_diff)) |>
dplyr::arrange(desc(Ref_diff))

0 comments on commit 4502602

Please sign in to comment.