diff --git a/inst/scripts/compare.R b/inst/scripts/compare.R new file mode 100644 index 00000000..6a8b2b49 --- /dev/null +++ b/inst/scripts/compare.R @@ -0,0 +1,65 @@ +require(dplyr) +require(readr) +require(dracarys) +require(here) +require(glue) +require(tibble) +require(tidyr) + +# gds results per subject +o <- here("nogit/comparisons") +tokpro <- Sys.getenv("ICA_ACCESS_TOKEN_PRO") +tokdev <- Sys.getenv("ICA_ACCESS_TOKEN_DEV") +s <- tibble::tribble( + ~sbj, ~pro, ~dev, + "SBJ04468", "20231212b3b22850/SBJ04468__L2301504/PRJ231328.results", "output/SBJ04468__L2301504/PRJ231328.results", + "SBJ04469", "20231209eeca1fe7/SBJ04469__L2301505/MDX230556.results", "output/SBJ04469__L2301505/MDX230556.results", + "SBJ04578", "2024011447748eb1/SBJ04578__L2400043/MDX230546.results", "output_test/SBJ04578__L2400043/MDX230546.results" +) |> + mutate( + pro = glue("gds://production/analysis_data/{sbj}/rnasum/{pro}"), + dev = glue("gds://development/sehrish/rnasum_0.5.0/{sbj}/{dev}") + ) |> + pivot_longer(cols = c("pro", "dev"), names_to = "namespace", values_to = "gdsdir") |> + mutate( + outdir = file.path(o, sbj, namespace), + token = ifelse(namespace == "pro", tokpro, tokdev) + ) + +# download results +regex <- tibble::tribble( + ~regex, ~fun, + "^genes\\.expr\\.perc\\.html$", "foo1", + "^genes\\.expr\\.z\\.html$", "foo2", +) +# dracarys::dr_gds_download(gdsdir = s$gdsdir[1], outdir = s$outdir[1], token = s$token[1], page_size = 200, dryrun = TRUE, regexes = regex) +x <- s |> + rowwise() |> + mutate( + dl = list(dracarys::dr_gds_download(gdsdir = gdsdir, outdir = outdir, token = token, page_size = 200, dryrun = FALSE, regexes = regex)) + ) |> + ungroup() + +# after download, open html and export csv +d <- s |> + mutate( + exprp = file.path(outdir, "perc.csv"), + exprz = file.path(outdir, "z.csv") + ) |> + select(sbj, namespace, exprp, exprz) |> + pivot_longer(cols = c("exprp", "exprz"), names_to = "ftype", values_to = "fpath") + +dev <- readr::read_csv("dev/dev_datatables.csv") +pro <- readr::read_csv("prod/pro_datatables.csv") + +dplyr::left_join(dev, pro, by = "Gene", suffix = c(".dev", ".pro")) |> + dplyr::mutate( + Ref_equal = `KIRP (TCGA).dev` == `KIRP (TCGA).pro`, + Pat_equal = Patient.dev == Patient.pro, + Ref_diff = abs(`KIRP (TCGA).dev` - `KIRP (TCGA).pro`), + Pat_diff = abs(Patient.dev - Patient.pro) + ) |> + dplyr::select(Gene, contains("PANCAN"), contains("Patient"), contains("diff"), everything()) |> + dplyr::filter(Pat_diff > 0 | Ref_diff > 0) |> + dplyr::arrange(desc(Pat_diff)) |> + dplyr::arrange(desc(Ref_diff))