diff --git a/.gitignore b/.gitignore
index 88c5a3d..e2fec9e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 __pycache__/
 *.py[cod]
 .Rproj.user
+.Rhistory
 
 /nogit
 /docs
diff --git a/NAMESPACE b/NAMESPACE
index 2291398..688287c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -62,6 +62,8 @@ export(multiqc_tidy_json)
 export(portal_meta_read)
 export(rdf2tab)
 export(read)
+export(s3_files_list_filter_relevant)
+export(s3_search)
 export(session_info_kable)
 export(time_metrics_process)
 export(tso_rmd)
diff --git a/R/s3.R b/R/s3.R
new file mode 100644
index 0000000..c798a70
--- /dev/null
+++ b/R/s3.R
@@ -0,0 +1,101 @@
+#' List Relevant Files In AWS S3 Directory
+#'
+#' Lists relevant files in an AWS S3 directory.
+#'
+#' @param s3dir GDS directory.
+#' @param pattern Pattern to further filter the returned file type tibble.
+#' @param page_size The size of each page to get in the AWS service call (def: 1000).
+#' @param max_items The total number of items to return in the command’s output (def: 1000).
+#' @param presign Include presigned URLs (def: FALSE).
+#' @param expiry_sec Number of seconds the presigned URL will be valid for (if generated) (def: 43200 (12hrs)).
+#'
+#' @return A tibble with path, date, file size, file type, and presigned URL if requested.
+#' @examples
+#' \dontrun{
+#' s3dir <- "s3://umccr-primary-data-prod/Accreditation/ALLOCATE-134131/WGS/2021-07-26/umccrised/ALLOCATE-134131__ALLOCATE-134131_MDx150892_Missing/cancer_report_tables"
+#' s3_files_list_filter_relevant(s3dir = s3dir, presign = TRUE)
+#' }
+#' @export
+s3_files_list_filter_relevant <- function(s3dir, pattern = NULL, page_size = 1000, max_items = 1000, presign = FALSE, expiry_sec = 43200) {
+  assertthat::assert_that(grepl("^s3://", s3dir), rlang::is_logical(presign))
+  pattern <- pattern %||% ".*" # keep all recognisable files by default
+  b <- sub("s3://(.*?)/.*", "\\1", s3dir)
+  p <- sub("s3://(.*?)/(.*)", "\\2", s3dir)
+  cmd <- glue(
+    "aws --output json s3api list-objects-v2 --bucket {b} --prefix {p} ",
+    "--max-items {max_items} --page-size {page_size}"
+  )
+  l <- system(cmd, intern = TRUE)
+  j <- jsonlite::fromJSON(l)
+  assertthat::assert_that("Contents" %in% names(j))
+  d <- j[["Contents"]] |>
+    tibble::as_tibble() |>
+    dplyr::mutate(
+      path = glue("s3://{b}/{.data$Key}"),
+      date_utc = .data$LastModified,
+      size = fs::as_fs_bytes(.data$Size)
+    ) |>
+    dplyr::rowwise() |>
+    dplyr::mutate(
+      bname = basename(.data$path),
+      type = purrr::map_chr(.data$bname, match_regex)
+    ) |>
+    dplyr::ungroup() |>
+    dplyr::filter(!is.na(.data$type), grepl(pattern, .data$type)) |>
+    dplyr::select("path", "date_utc", "size", "type")
+
+  if (presign) {
+    d <- d |>
+      dplyr::rowwise() |>
+      dplyr::mutate(presigned_url = s3_file_presignedurl(.data$path, expiry_seconds = expiry_sec)) |>
+      dplyr::ungroup()
+  }
+  d
+}
+
+s3_file_presignedurl <- function(s3path, expiry_seconds = 3600) {
+  p <- system(glue("aws s3 presign {s3path} --expires-in {expiry_seconds}"), intern = TRUE)
+  p
+}
+
+#' Search AWS S3 Objects
+#'
+#' Searches for the given pattern in the UMCCR `umccr-primary-data-prod` AWS S3
+#' bucket.
+#'
+#' @param pat Pattern to search for (e.g. 'multiqc_data.json').
+#' @param rows Max number of rows to return.
+#'
+#' @return Tibble with S3 path, object size, date modified, id, unique hash.
+#'
+#' @examples
+#' \dontrun{
+#' pat <- "qc_summary.tsv.gz"
+#' s3_search(pat, 10)
+#' }
+#' @export
+s3_search <- function(pat, rows) {
+  au_tz <- "Australia/Melbourne"
+  utc_tz <- "UTC"
+  base_url <- "https://api.portal.prod.umccr.org/iam/s3"
+  url1 <- utils::URLencode(glue("{base_url}?rowsPerPage={rows}&search={pat}"))
+  awscurl_cmd <- glue(
+    "awscurl '{url1}' ",
+    "--header 'Accept: application/json'"
+  )
+  message(glue("Running {awscurl_cmd}"))
+  j <- system(awscurl_cmd, intern = TRUE)
+  date_fmt <- "%Y-%m-%dT%H:%M:%S"
+  d <- j |>
+    jsonlite::fromJSON() |>
+    purrr::pluck("results") |>
+    tibble::as_tibble()
+  d |>
+    dplyr::mutate(
+      date1 = as.POSIXct(.data$last_modified_date, tz = utc_tz, format = date_fmt),
+      date_aest = lubridate::with_tz(.data$date1, tz = au_tz),
+      path = glue("s3://{bucket}/{key}"),
+      size = fs::as_fs_bytes(.data$size)
+    ) |>
+    dplyr::select("path", "size", "date_aest", "id", "unique_hash")
+}
diff --git a/inst/rmd/umccr_workflows/umccrise/multi.Rmd b/inst/rmd/umccr_workflows/umccrise/multi.Rmd
index 4689ad5..21b7bb2 100644
--- a/inst/rmd/umccr_workflows/umccrise/multi.Rmd
+++ b/inst/rmd/umccr_workflows/umccrise/multi.Rmd
@@ -47,10 +47,10 @@ knitr::opts_chunk$set(
 
 ```{r load_pkgs}
 {
-  require(dplyr)
+  require(dplyr) # import all dplyr funcs
   require(readr, include.only = c("read_rds"))
   require(purrr, include.only = c("map"))
-  require(tidyr, include.only = c("unnest", "unnest_wider"))
+  require(tidyr, include.only = c("unnest"))
   require(dracarys)
   require(glue, include.only = "glue")
   require(here, include.only = "here")
@@ -60,11 +60,53 @@ knitr::opts_chunk$set(
   require(ggplot2, include.only = c("ggplot", "aes"))
   require(lubridate, include.only = c("as_datetime"))
   require(plotly, include.only = c("ggplotly"))
+  require(openssl, include.only = c("sha256"))
 }
 ```
 
 ```{r data_setup, eval=FALSE}
-options(width = 150)
+#---- S3 ----#
+s3 <- here::here(glue::glue("nogit/umccrise/rds/portal_meta/2023-09-12_pmeta_s3.rds")) |>
+  readr::read_rds()
+s3_get_presigned1 <- function(x, row_slice) {
+  start_time <- Sys.time()
+  s3_map <- x |>
+    slice(row_slice) |>
+    rowwise() |>
+    mutate(
+      s3_contents = list(s3_files_list_filter_relevant(
+        s3dir = .data$dir1, presign = TRUE
+      ))
+    ) |>
+    ungroup() |>
+    tidyr::unnest("s3_contents") |>
+    select(
+      "SubjectID", "LibraryID_tumor", "SampleID_tumor",
+      "date_utc", "type", "size", "path", "presigned_url"
+    )
+  end_time <- Sys.time()
+  total_time <- end_time - start_time
+  print(total_time)
+  s3_map
+}
+# 2 seconds per row
+s3_map1 <- s3_get_presigned1(s3, 1:100)
+s3_map2 <- s3_get_presigned1(s3, 101:200)
+s3_map3 <- s3_get_presigned1(s3, 201:300)
+s3_map4 <- s3_get_presigned1(s3, 301:400)
+s3_map5 <- s3_get_presigned1(s3, 401:449)
+
+saveRDS(s3_map1, here("nogit/umccrise/rds/s3/map1_2023-09-12.rds"))
+saveRDS(s3_map2, here("nogit/umccrise/rds/s3/map2_2023-09-12.rds"))
+saveRDS(s3_map3, here("nogit/umccrise/rds/s3/map3_2023-09-12.rds"))
+saveRDS(s3_map4, here("nogit/umccrise/rds/s3/map4_2023-09-12.rds"))
+saveRDS(s3_map5, here("nogit/umccrise/rds/s3/map5_2023-09-12.rds"))
+s3_map <- fs::dir_ls(here("nogit/umccrise/rds/s3"), regexp = "map.*rds") |>
+  purrr::map(readr::read_rds) |>
+  bind_rows()
+saveRDS(s3_map, here("nogit/umccrise/rds/s3_map_2023-09-12.rds"))
+
+#---- GDS ----#
 token <- dracarys::ica_token_validate(Sys.getenv("ICA_ACCESS_TOKEN_PRO"))
 pmeta <- here("nogit/umccrise/rds/portal_meta/2023-09-04_pmeta_final.rds") |>
   readr::read_rds()
@@ -84,26 +126,38 @@ gds_map <- pmeta |>
   filter(type != "MultiqcFile")
 
 saveRDS(gds_map, here("nogit/umccrise/rds/gds_map_2023-09-05.rds"))
+```
 
-parse_files <- function(gds_map, row_slice, rds_out) {
+```{r data_parse, eval=FALSE}
+parse_files <- function(x, row_slice, rds_out) {
   start_time <- Sys.time()
-  dat1 <- gds_map |>
-    dplyr::slice(row_slice) |>
-    dplyr::rowwise() |>
-    dplyr::mutate(
+  dat1 <- x |>
+    slice(row_slice) |>
+    rowwise() |>
+    mutate(
       gen = list(dracarys::dr_func_eval(.data$type)),
       obj = list(.data$gen$new(.data$presigned_url)),
       objp = list(.data$obj$read())
     ) |>
-    dplyr::ungroup()
+    ungroup()
   end_time <- Sys.time()
   total_time <- end_time - start_time
   print(total_time)
   readr::write_rds(x = dat1, file = rds_out)
 }
 
+rds_path_out <- here::here("nogit/umccrise/rds/results")
+#---- S3 ----#
+s3_map <- readr::read_rds(here("nogit/umccrise/rds/s3_map_2023-09-12.rds"))
+s0 <- parse_files(s3_map, 1:10, file.path(rds_path_out, "s0.rds"))
+s1 <- parse_files(s3_map, 1:500, file.path(rds_path_out, "s1.rds"))
+s2 <- parse_files(s3_map, 501:1000, file.path(rds_path_out, "s2.rds"))
+s3 <- parse_files(s3_map, 1001:1500, file.path(rds_path_out, "s3.rds"))
+s4 <- parse_files(s3_map, 1501:2000, file.path(rds_path_out, "s4.rds"))
+s5 <- parse_files(s3_map, 2001:2245, file.path(rds_path_out, "s5.rds"))
+
+#---- GDS ----#
 gds_map <- readr::read_rds(here("nogit/umccrise/rds/gds_map_2023-09-05.rds"))
-rds_path_out <- here("nogit/umccrise/rds/results")
 x0 <- parse_files(gds_map, 1:10, file.path(rds_path_out, "x0.rds"))
 x1 <- parse_files(gds_map, 1:500, file.path(rds_path_out, "x1.rds"))
 x2 <- parse_files(gds_map, 501:1000, file.path(rds_path_out, "x2.rds"))
@@ -115,11 +169,38 @@ x5 <- parse_files(gds_map, 2001:2245, file.path(rds_path_out, "x5.rds"))
 ```{r data_load}
 lims_raw <- here("nogit/umccrise/rds/lims/2023-09-04_lims_raw.rds") |>
   readr::read_rds()
-dat1 <- fs::dir_ls(here("nogit/umccrise/rds/results")) |>
+dat_s3_raw <- fs::dir_ls(here("nogit/umccrise/rds/results"), regexp = "s[1-5]{1}.rds") |>
+  purrr::map(readr::read_rds) |>
+  bind_rows()
+# create sha256 for umccrise directory to distinguish between runs
+# keep first 8 digits and append to umccrise date folder.
+dat_s3 <- dat_s3_raw |>
+  mutate(
+    um_dir = sub("s3://umccr-primary-data-prod/(.*)/cancer_report_tables/.*", "\\1", path),
+    date_dir = basename(dirname(dirname(um_dir))),
+    date_dir = gsub("-", "", date_dir),
+    hash256 = openssl::sha256(um_dir),
+    hash256 = substr(hash256, 1, 8),
+    portal_run_id = glue("fake.{date_dir}{hash256}")
+  ) |>
+  select(-c(um_dir, date_dir, hash256, SampleID_tumor))
+dat_gds <- fs::dir_ls(here("nogit/umccrise/rds/results"), regexp = "x[1-5]{1}.rds") |>
   purrr::map(readr::read_rds) |>
-  dplyr::bind_rows()
+  bind_rows()
 
-o <- dat1 |>
+dat_s3_res <- dat_s3 |>
+  mutate(
+    type = case_when(
+      grepl("snv_2015.tsv.gz", path) ~ "UmSigsSnvFile2015",
+      grepl("snv_2020.tsv.gz", path) ~ "UmSigsSnvFile2020",
+      .default = .data$type
+    ),
+    date_utc2 = lubridate::as_datetime(.data$date_utc, format = "%Y-%m-%dT%H:%M:%S+00:00"),
+    date_analysed_aest = lubridate::with_tz(.data$date_utc2, tz = "Australia/Melbourne"),
+    date_analysed_aest = as.character(.data$date_analysed_aest)
+  ) |>
+  select(date_analysed_aest, SubjectID, LibraryID_tumor, type, objp, portal_run_id)
+dat_gds_res <- dat_gds |>
   mutate(
     type = case_when(
       grepl("snv_2015.tsv.gz", bname) ~ "UmSigsSnvFile2015",
@@ -128,35 +209,42 @@ o <- dat1 |>
     ),
     date_analysed_aest = as.character(.data$end),
   ) |>
-  select(
-    date_analysed_aest,
-    SubjectID,
-    LibraryID_tumor,
-    LibraryID_normal,
-    type,
-    objp,
-    portal_run_id
-  )
-
-lims <- lims_raw |>
-  dplyr::filter(LibraryID %in% c(o$LibraryID_tumor)) |>
-  dplyr::select(SubjectID, LibraryID, ExternalSubjectID, ProjectOwner, ProjectName, Type, Workflow) |>
-  dplyr::distinct()
-
-
-o2 <- o |>
-  dplyr::left_join(lims, by = c("SubjectID", "LibraryID_tumor" = "LibraryID")) |>
-  dplyr::mutate(
+  select(date_analysed_aest, SubjectID, LibraryID_tumor, type, objp, portal_run_id)
+
+lims_s3 <- lims_raw |>
+  filter(LibraryID %in% dat_s3_res$LibraryID_tumor) |>
+  select(SubjectID, LibraryID, ExternalSubjectID, ProjectOwner, ProjectName, Type, Workflow) |>
+  distinct()
+lims_gds <- lims_raw |>
+  filter(LibraryID %in% c(dat_gds_res$LibraryID_tumor)) |>
+  select(SubjectID, LibraryID, ExternalSubjectID, ProjectOwner, ProjectName, Type, Workflow) |>
+  distinct()
+
+o1 <- dat_s3_res |>
+  left_join(lims_s3, by = c("SubjectID", "LibraryID_tumor" = "LibraryID")) |>
+  mutate(
+    url = glue("https://portal.umccr.org/subjects/{.data$SubjectID}/overview"),
+    sbj_url = glue("<a href={url}>{.data$SubjectID}</a>"),
+    url = glue("<a href={url}>{.data$url}</a>")
+  ) |>
+  rename(portal_url = url)
+o2 <- dat_gds_res |>
+  left_join(lims_gds, by = c("SubjectID", "LibraryID_tumor" = "LibraryID")) |>
+  mutate(
     url = glue("https://portal.umccr.org/subjects/{.data$SubjectID}/overview"),
     sbj_url = glue("<a href={url}>{.data$SubjectID}</a>"),
     url = glue("<a href={url}>{.data$url}</a>"),
     portal_run_id = glue("dr.{portal_run_id}")
   ) |>
-  dplyr::rename(portal_url = url)
+  rename(portal_url = url)
+
+d <- list(s3 = o1, gds = o2) |>
+  bind_rows(.id = "s3_or_gds")
 
 dt_view <- function(x, scroll_y = 1000, ...) {
+  options(DT.TOJSON_ARGS = list(na = "string"))
   x |>
-    dplyr::mutate(across(where(is.character), as.factor)) |>
+    mutate(across(where(is.character), as.factor)) |>
     DT::datatable(
       filter = list(position = "top", clear = FALSE, plain = TRUE),
       class = "cell-border display compact",
@@ -173,12 +261,12 @@ dt_view <- function(x, scroll_y = 1000, ...) {
     )
 }
 
-qcsum <- o2 |>
+qcsum <- d |>
   filter(type == "UmQcSumFile") |>
-  unnest_wider(objp)
-hrd_chord <- o2 |>
+  tidyr::unnest_wider(objp)
+hrd_chord <- d |>
   filter(type == "UmChordTsvFile") |>
-  unnest_wider(objp) |>
+  tidyr::unnest_wider(objp) |>
   select(portal_run_id,
     # chord_p_hrd = p_hrd,
     chord_hr_status = hr_status,
@@ -191,22 +279,24 @@ hrd_chord <- o2 |>
 #   filter(type == "UmHrdetectTsvFile") |>
 #   unnest_wider(objp) |>
 #   select(portal_run_id, hrdetect_prob = Probability)
-sigs_2015 <- o2 |>
+sigs_2015 <- d |>
   filter(type == "UmSigsSnvFile2015") |>
-  unnest_wider(objp) |>
-  select(-c(type))
-sigs_2020 <- o2 |>
+  tidyr::unnest_wider(objp) |>
+  select(-c(type)) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
+sigs_2020 <- d |>
   filter(type == "UmSigsSnvFile2020") |>
-  unnest_wider(objp) |>
-  select(-c(type))
+  tidyr::unnest_wider(objp) |>
+  select(-c(type)) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
 ```
 
 ## umccrise Results
 
 ```{r final_tab}
-cols_select <- c(
+cols_select1 <- c(
   "date_analysed_aest", "SubjectID", "sbj_url", "LibraryID_tumor", "ExternalSubjectID",
-  "ProjectOwner", "ProjectName", "Type", "Workflow", "LibraryID_normal",
+  "ProjectOwner", "ProjectName", "Type", "Workflow",
   "hrd_chord", "hrd_hrdetect",
   "chord_hr_status", "chord_hrd_type", "chord_p_BRCA1", "chord_p_BRCA2",
   "qc_status_hmf", "sex_hmf", "purity_hmf", "ploidy_hmf", "msi_hmf",
@@ -214,28 +304,65 @@ cols_select <- c(
   "deleted_genes_hmf", "tmb_hmf", "tml_hmf", "wgd_hmf", "hypermutated",
   "bpi_enabled", "portal_run_id", "portal_url"
 )
-d <- qcsum |>
-  dplyr::left_join(hrd_chord, by = "portal_run_id") |>
-  dplyr::select(dplyr::all_of(cols_select), dplyr::everything(), -c("type"))
-dt_view(d, caption = "umccrise Results Summary")
+# signatures
+dsig <- bind_rows(list(s2015 = sigs_2015, s2020 = sigs_2020), .id = "Sig_group") |>
+  select(portal_run_id, Sig_group, Rank, Signature, Contribution, RelFreq)
+
+# keep top two ranked sigs from 2015
+dsig_filt <- dsig |>
+  filter(
+    Sig_group == "s2015"
+  ) |>
+  group_by(portal_run_id) |>
+  mutate(tot_sig_vars = sum(Contribution)) |>
+  arrange(Rank) |>
+  slice_head(n = 2) |>
+  # some sigs have same Rank so use explicit sig_rank
+  mutate(sig_rank = row_number()) |>
+  ungroup() |>
+  mutate(
+    sig_summary = glue("{Signature} ({RelFreq} = {Contribution} / {tot_sig_vars})")
+  ) |>
+  select(portal_run_id, sig_rank, sig_summary) |>
+  tidyr::pivot_wider(names_from = sig_rank, values_from = sig_summary, names_prefix = "rank") |>
+  mutate(sig_top2 = paste(rank1, rank2, sep = ", ")) |>
+  select(portal_run_id, sig_top2)
+
+dall <- qcsum |>
+  left_join(hrd_chord, by = "portal_run_id") |>
+  select(all_of(cols_select1), everything(), -c("type")) |>
+  left_join(dsig_filt, by = "portal_run_id") |>
+  relocate(sig_top2, .before = "hrd_chord") |>
+  relocate(s3_or_gds, .after = "SubjectID")
+dt_view(dall)
 ```
 
+```{r join_excel_layla, eval=FALSE}
+excel_all <- here("nogit/umccrise/Combined analysis Jan22_Aug23.xlsx") |>
+  readxl::read_xlsx(sheet = "All")
+excel_all |>
+  select("...1", portal_run_id) |>
+  left_join(dall |> select(portal_run_id, sig_top2)) |>
+  rename(N = "...1") |>
+  readr::write_csv("sigs_top2_2023-09-08.csv")
+```
+
+
 ### HRD Results
 
-```{r hrd_plot, fig.width=15, fig.height = 10}
-p <- d |>
-  dplyr::mutate(
+```{r hrd_plot, fig.width=15, fig.height = 15}
+p1 <- dall |>
+  mutate(
     sbj = glue("{SubjectID}_{LibraryID_tumor}"),
     date = lubridate::as_datetime(date_analysed_aest, format = "%Y-%m-%d %H:%M:%S")
   ) |>
-  dplyr::select(
+  select(
     date,
     sbj,
     chord = hrd_chord, hrdetect = hrd_hrdetect,
   ) |>
-  tidyr::pivot_longer(chord:hrdetect, names_to = "method", values_to = "probability")
-p1 <- p |>
-  ggplot(aes(x = date, y = probability, label = sbj)) +
+  tidyr::pivot_longer(chord:hrdetect, names_to = "method", values_to = "probability") |>
+  ggplot2::ggplot(aes(x = date, y = probability, label = sbj)) +
   ggplot2::geom_point(aes(colour = method)) +
   ggplot2::geom_line(aes(group = sbj), linewidth = 0.05) +
   ggplot2::theme_bw() +
@@ -244,31 +371,79 @@ p1 <- p |>
 plotly::ggplotly(p1)
 ```
 
+### Signature Results
+
+```{r fig.width = 15, fig.height=65, eval=TRUE}
+sig_order2015 <- paste0("Sig", 1:30)
+sig_order2020 <- paste0(
+  "SBS",
+  c(
+    1:6,
+    paste0(7, c("a", "b", "c", "d")),
+    8:9,
+    paste0(10, c("a", "b", "c", "d")),
+    11:16,
+    paste0(17, c("a", "b")),
+    18:60,
+    84:94
+  )
+)
+
+p2_prep <- dsig |>
+  filter(
+    Sig_group == "s2015",
+    Rank %in% c(1:3)
+  ) |>
+  left_join(dall |> select(portal_run_id, date_analysed_aest, SubjectID, LibraryID_tumor), by = "portal_run_id") |>
+  mutate(
+    sbj = as.character(glue("{SubjectID}_{LibraryID_tumor}")),
+    date = lubridate::as_datetime(date_analysed_aest, format = "%Y-%m-%d %H:%M:%S")
+  ) |>
+  select(
+    date, sbj, Sig_group, Rank, Signature, Contribution, RelFreq
+  ) |>
+  mutate(Signature = factor(Signature, levels = c(sig_order2015, sig_order2020)))
+p2 <- p2_prep |>
+  filter(!grepl("ALLOCATE", sbj)) |> # get rid of ALLOCATE subject
+  ggplot2::ggplot(aes(x = Contribution, y = sbj, fill = Signature, text = sbj)) +
+  ggplot2::geom_bar(position = "fill", stat = "identity") +
+  ggplot2::theme_bw(base_size = 7)
+# ggplot2::facet_wrap(~Sig_group, ncol = 1)
+
+plotly::ggplotly(p2, tooltip = c("x", "text", "fill"))
+```
+
 
 ## Metadata Summary {.tabset .tabset-pills}
 
 ### ProjectOwner
 
 ```{r ProjectOwner}
-count(d, ProjectOwner) |> dt_view(scroll_y = 400)
+count(dall, ProjectOwner) |> dt_view(scroll_y = 400)
 ```
 
 ### ProjectName
 
 ```{r ProjectName}
-count(d, ProjectName) |> dt_view(scroll_y = 400)
+count(dall, ProjectName) |> dt_view(scroll_y = 400)
 ```
 
 ### Type
 
 ```{r Type}
-count(d, Type) |> dt_view(scroll_y = 400)
+count(dall, Type) |> dt_view(scroll_y = 400)
 ```
 
 ### Workflow
 
 ```{r Workflow}
-count(d, Workflow) |> dt_view(scroll_y = 400)
+count(dall, Workflow) |> dt_view(scroll_y = 400)
+```
+
+### S3orGDS
+
+```{r s3orgds}
+count(dall, s3_or_gds) |> dt_view(scroll_y = 400)
 ```
 
 </div>
diff --git a/inst/scripts/umccrise_run.R b/inst/scripts/umccrise_run.R
index 8c40a6d..6602d87 100644
--- a/inst/scripts/umccrise_run.R
+++ b/inst/scripts/umccrise_run.R
@@ -4,6 +4,7 @@ require(glue)
 require(dplyr)
 require(readr)
 
+#---- GDS ----#
 # read last 1000 umccrise runs from portal
 # 475 from 2022-01-24 until 2023-09-03, of which 449 Succeeded
 date1 <- "2023-09-04"
@@ -43,4 +44,31 @@ d <- pmeta |>
 d
 
 # final portal meta for umccrise runs
+# columns:
+# "id", "wfr_name", "wfr_id", "version", "end_status", "start", "end", "portal_run_id",
+# "SubjectID", "LibraryID_tumor", "LibraryID_normal", "SampleID_tumor", "SampleID_normal",
+# "gds_outdir_umccrise", "gds_indir_dragen_somatic", "gds_indir_dragen_germline", "gds_infile_genomes_tar"
 saveRDS(d, file = here(glue("nogit/umccrise/rds/portal_meta/{date1}_pmeta_final.rds")))
+
+#---- S3 ----#
+pat <- "qc_summary.tsv.gz"
+rows <- 1000
+d_s3_raw <- dracarys::s3_search(pat = pat, rows = rows)
+
+d_s3 <- d_s3_raw |>
+  arrange(desc(date_aest)) |>
+  mutate(
+    bname = basename(path),
+    dir1 = dirname(path), # path/to/dirA/cancer_report_tables
+    dir2 = basename(dirname(dir1)), # dirA
+    sbj_samp_lib = sub(".*__(.*)", "\\1", dir2),
+    SubjectID = sub("(SBJ[0-9]{5})_.*", "\\1", sbj_samp_lib),
+    SampleID_tumor = sub("SBJ.*?_(.*?)_.*", "\\1", sbj_samp_lib),
+    LibraryID_tumor = sub("SBJ.*?_.*?_(.*)", "\\1", sbj_samp_lib),
+    rerun = grepl("rerun", .data$LibraryID_tumor)
+  ) |>
+  select(dir1, SubjectID, LibraryID_tumor, SampleID_tumor, date = date_aest, rerun)
+
+date2 <- "2023-09-12"
+saveRDS(d_s3, file = here(glue("nogit/umccrise/rds/portal_meta/{date2}_pmeta_s3.rds")))
+# now we have S3 paths and metadata, so all we need is to generate presigned URLs and read the data
diff --git a/man/s3_files_list_filter_relevant.Rd b/man/s3_files_list_filter_relevant.Rd
new file mode 100644
index 0000000..1194eea
--- /dev/null
+++ b/man/s3_files_list_filter_relevant.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/s3.R
+\name{s3_files_list_filter_relevant}
+\alias{s3_files_list_filter_relevant}
+\title{List Relevant Files In AWS S3 Directory}
+\usage{
+s3_files_list_filter_relevant(
+  s3dir,
+  pattern = NULL,
+  page_size = 1000,
+  max_items = 1000,
+  presign = FALSE,
+  expiry_sec = 43200
+)
+}
+\arguments{
+\item{s3dir}{GDS directory.}
+
+\item{pattern}{Pattern to further filter the returned file type tibble.}
+
+\item{page_size}{The size of each page to get in the AWS service call (def: 1000).}
+
+\item{max_items}{The total number of items to return in the command’s output (def: 1000).}
+
+\item{presign}{Include presigned URLs (def: FALSE).}
+
+\item{expiry_sec}{Number of seconds the presigned URL will be valid for (if generated) (def: 43200 (12hrs)).}
+}
+\value{
+A tibble with path, date, file size, file type, and presigned URL if requested.
+}
+\description{
+Lists relevant files in an AWS S3 directory.
+}
+\examples{
+\dontrun{
+s3dir <- "s3://umccr-primary-data-prod/Accreditation/ALLOCATE-134131/WGS/2021-07-26/umccrised/ALLOCATE-134131__ALLOCATE-134131_MDx150892_Missing/cancer_report_tables"
+s3_files_list_filter_relevant(s3dir = s3dir, presign = TRUE)
+}
+}
diff --git a/man/s3_search.Rd b/man/s3_search.Rd
new file mode 100644
index 0000000..c0d9f64
--- /dev/null
+++ b/man/s3_search.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/s3.R
+\name{s3_search}
+\alias{s3_search}
+\title{Search AWS S3 Objects}
+\usage{
+s3_search(pat, rows)
+}
+\arguments{
+\item{pat}{Pattern to search for (e.g. 'multiqc_data.json').}
+
+\item{rows}{Max number of rows to return.}
+}
+\value{
+Tibble with S3 path, object size, date modified, id, unique hash.
+}
+\description{
+Searches for the given pattern in the UMCCR \code{umccr-primary-data-prod} AWS S3
+bucket.
+}
+\examples{
+\dontrun{
+pat <- "qc_summary.tsv.gz"
+s3_search(pat, 10)
+}
+}