diff --git a/NAMESPACE b/NAMESPACE
index 4eb4d8d..b10ca1f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -28,10 +28,11 @@ export(TsoSampleAnalysisResultsFile)
 export(TsoTargetRegionCoverageFile)
 export(TsoTmbFile)
 export(TsoTmbTraceTsvFile)
-export(UmccriseCanRepTables)
 export(VCMetricsFile)
 export(Wf)
 export(Wf_tso_ctdna_tumor_only)
+export(Wf_umccrise)
+export(Wf_umccrise_download_tidy_write)
 export(WgsContigMeanCovFile)
 export(WgsCoverageMetricsFile)
 export(WgsFineHistFile)
@@ -45,14 +46,17 @@ export(dr_output_format_valid)
 export(dr_s3_download)
 export(empty_tbl)
 export(file_regex_getter)
-export(gds_file_download)
 export(gds_file_download_api)
+export(gds_file_download_cli)
 export(gds_file_presignedurl)
-export(gds_files_list)
 export(gds_files_list_fastq)
-export(gds_files_list_filter_relevant)
+export(gds_list_files_dir)
+export(gds_list_files_filter_relevant)
 export(gds_volumes_list)
+export(grep_file)
 export(ica_token_validate)
+export(local_list_files_dir)
+export(local_list_files_filter_relevant)
 export(match_regex)
 export(multiqc_column_map_append)
 export(multiqc_date_fmt)
@@ -66,11 +70,15 @@ export(multiqc_parse_xyline_plot_contig_cvg)
 export(multiqc_tidy_json)
 export(rdf2tab)
 export(read)
-export(s3_files_list_filter_relevant)
+export(s3_file_presignedurl)
+export(s3_list_files_dir)
+export(s3_list_files_filter_relevant)
 export(s3_search)
 export(session_info_kable)
+export(tidy_files)
 export(time_metrics_process)
 export(umccr_tidy)
+export(write_dracarys_list_of_tbls)
 importFrom(R6,R6Class)
 importFrom(ggplot2,ggplot)
 importFrom(ggrepel,geom_text_repel)
diff --git a/R/Wf.R b/R/Wf.R
index 317100f..aa5bfae 100644
--- a/R/Wf.R
+++ b/R/Wf.R
@@ -1,47 +1,82 @@
-#' Workflow R6 Class
+#' @title Workflow
 #'
 #' @description Workflow is a base R6 class representing a bioinformatic
 #' workflow run from a UMCCR workflow manager.
 #'
+#' A workflow has:
+#'
+#' - a directory path with all the raw output files (either on GDS, S3 or
+#' local filesystem)
+#' - a subset of files that are of interest for ingestion
+#'   - tibble with full path and basename columns
+#' - a set of parsers that can parse and tidy those files
+#'   - each parser takes a path and returns a tidy tibble
+#' - a list of tidy tibbles (or a tibble with nested tibbles)
+#'
 #' @examples
-#' p1 <- system.file("extdata/portaldb_workflow_top4.rds", package = "rportal") |>
-#'   readRDS() |>
-#'   dplyr::filter(type_name == "umccrise") |>
-#'   dplyr::slice(1)
-#' w <- Wf$new(
-#'   prid = p1$portal_run_id, type = p1$type_name, start = p1$start, end = p1$end,
-#'   status = p1$end_status, input = p1$input, output = p1$output
+#' \dontrun{
+#' regexes <- tibble::tribble(
+#'   ~regex, ~fun,
+#'   "-chord\\.tsv\\.gz$", "UmChordTsvFile",
+#'   "-hrdetect\\.tsv\\.gz$", "UmHrdetectTsvFile",
+#'   "-snv_2015\\.tsv\\.gz$", "UmSigsSnvFile",
+#'   "-snv_2020\\.tsv\\.gz$", "UmSigsSnvFile",
+#'   "-dbs\\.tsv\\.gz$", "UmSigsDbsFile",
+#'   "-indel\\.tsv\\.gz$", "UmSigsIndelFile",
+#'   "-qc_summary\\.tsv\\.gz$", "UmQcSumFile",
+#' )
+#'
+#' #---- LOCAL ----#
+#' p1_local <- "~/icav1/g/production/analysis_data"
+#' p <- file.path(p1_local, "SBJ01155/umccrise/202408300c218043/L2101566__L2101565")
+#' um1 <- Wf$new(path = p, wname = "umccrise", regexes = regexes)
+#' um1$list_files(max_files = 10)
+#' um1$list_files_filter_relevant(max_files = 10)
+#'
+#' #---- GDS ----#
+#' p1_gds <- "gds://production/analysis_data"
+#' p <- file.path(p1_gds, "SBJ03043/umccrise/20240830ec648f40/L2300064__L2300063")
+#' outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+#' token <- Sys.getenv("ICA_ACCESS_TOKEN")
+#' um2 <- Wf$new(path = p, wname = "umccrise", regexes = regexes)
+#' um2$list_files(max_files = 10)
+#' um2$list_files_filter_relevant(ica_token = token, max_files = 500)
+#' d <- um2$download_files(
+#'   outdir = outdir, ica_token = token,
+#'   max_files = 1000, dryrun = T
 #' )
-#' w
+#' d_tidy <- um2$tidy_files(d)
+#'
+#' #---- S3 ----#
+#' p1_s3 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05570/sash/202408275fce06c3"
+#' p2_s3 <- "L2401304_L2401303/SBJ05570_MDX240299/cancer_report/cancer_report_tables"
+#' p <- file.path(p1_s3, p2_s3)
+#' outdir <- sub("s3:/", "~/s3", p)
+#' um3 <- Wf$new(path = p, wname = "sash", regexes = regexes)
+#' um3$list_files(max_files = 10)
+#' um3$list_files_filter_relevant(max_files = 50)
+#' d <- um3$download_files(outdir = outdir, regexes = regexes, max_files = 50, dryrun = F)
+#' }
+#'
 #' @export
 Wf <- R6::R6Class(
   "Wf",
   public = list(
-    #' @field prid Portal run ID.
-    #' @field type Workflow type.
-    #' @field start Workflow start datetime.
-    #' @field end Workflow end datetime.
-    #' @field status Workflow end status.
-    #' @field input Workflow input JSON string.
-    #' @field output Workflow output JSON string.
-    prid = NULL,
-    type = NULL,
-    start = NULL,
-    end = NULL,
-    status = NULL,
-    input = NULL,
-    output = NULL,
+    #' @field path Path to directory with raw workflow results (from GDS, S3, or
+    #' local filesystem).
+    #' @field wname Name of workflow (e.g. umccrise, sash).
+    #' @field filesystem  Filesystem of `path` (gds/s3/local).
+    #' @field regexes Tibble with file `regex` and `fun`ction to parse it.
+    path = NULL,
+    wname = NULL,
+    filesystem = NULL,
+    regexes = NULL,
     #' @description Create a new Workflow object.
-    #' @param prid Portal run ID.
-    #' @param type Workflow type.
-    #' @param start Workflow start datetime.
-    #' @param end Workflow end datetime.
-    #' @param status Workflow end status.
-    #' @param input Workflow input JSON string.
-    #' @param output Workflow output JSON string.
-    initialize = function(prid = NULL, type = NULL, start = NULL, end = NULL,
-                          status = NULL, input = NULL, output = NULL) {
-      types <- c(
+    #' @param path Path to directory with raw workflow results.
+    #' @param wname Name of workflow.
+    #' @param regexes Tibble with file `regex` and `fun`ction to parse it.
+    initialize = function(path = NULL, wname = NULL, regexes = NULL) {
+      wnames <- c(
         "bcl_convert",
         "tso_ctdna_tumor_only",
         "wgs_alignment_qc",
@@ -56,30 +91,140 @@ Wf <- R6::R6Class(
         "oncoanalyser_wgts_existing_both",
         "sash"
       )
-      assertthat::assert_that(
-        type %in% types
+      assertthat::assert_that(wname %in% wnames)
+      self$path <- path
+      self$wname <- wname
+      self$filesystem <- dplyr::case_when(
+        grepl("^gds://", path) ~ "gds",
+        grepl("^s3://", path) ~ "s3",
+        .default = "local"
       )
-      self$prid <- prid
-      self$type <- type
-      self$start <- start
-      self$end <- end
-      self$status <- status
-      self$input <- input
-      self$output <- output
+      self$regexes <- regexes
     },
     #' @description Print details about the Workflow.
     #' @param ... (ignored).
     print = function(...) {
       res <- tibble::tribble(
         ~var, ~value,
-        "prid", self$prid,
-        "type", self$type,
-        "start", as.character(self$start),
-        "end", as.character(self$end),
-        "status", self$status,
+        "path", self$path,
+        "wname", self$wname,
+        "filesystem", self$filesystem
       )
       print(res)
       invisible(self)
+    },
+    #' @description List all files under given path.
+    #' @param path Path with raw results.
+    #' @param max_files Max number of files to list (for gds/s3 only).
+    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+    #' @param ... Passed on to `gds_list_files_dir` function.
+    list_files = function(path = self$path, max_files = 1000,
+                          ica_token = Sys.getenv("ICA_ACCESS_TOKEN"), ...) {
+      if (self$filesystem == "gds") {
+        d <- gds_list_files_dir(
+          gdsdir = path, token = ica_token, page_size = max_files, ...
+        )
+      } else if (self$filesystem == "s3") {
+        d <- s3_list_files_dir(s3dir = path, max_objects = max_files)
+      } else {
+        d <- local_list_files_dir(localdir = path, max_files = max_files)
+      }
+      return(d)
+    },
+    #' @description List dracarys files under given path
+    #' @param path Path with raw results.
+    #' @param max_files Max number of files to list (for gds/s3 only).
+    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+    #' @param ... Passed on to the `gds_list_files_filter_relevant` or
+    #' the `s3_list_files_filter_relevant` function.
+    list_files_filter_relevant = function(path = self$path, max_files = 1000,
+                                          ica_token = Sys.getenv("ICA_ACCESS_TOKEN"), ...) {
+      regexes <- self$regexes
+      assertthat::assert_that(!is.null(regexes))
+      if (self$filesystem == "gds") {
+        d <- gds_list_files_filter_relevant(
+          gdsdir = path, regexes = regexes, token = ica_token, page_size = max_files, ...
+        )
+      } else if (self$filesystem == "s3") {
+        d <- s3_list_files_filter_relevant(
+          s3dir = path, regexes = regexes, max_objects = max_files, ...
+        )
+      } else {
+        d <- local_list_files_filter_relevant(
+          localdir = path, regexes = regexes, max_files = max_files
+        )
+      }
+      d
+    },
+    #' @description Download files from GDS/S3 to local filesystem.
+    #' @param path Path with raw results.
+    #' @param outdir Path to output directory.
+    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+    #' @param max_files Max number of files to list.
+    #' @param dryrun If TRUE, just list the files that will be downloaded (don't
+    #' download them).
+    #' @param recursive Should files be returned recursively _in and under_ the specified
+    #' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE via ICA API).
+    #' @param list_filter_fun Function to filter relevant files.
+    download_files = function(path = self$path, outdir, ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+                              max_files = 1000, dryrun = FALSE, recursive = NULL,
+                              list_filter_fun = NULL) {
+      # TODO: add envvar checker
+      regexes <- self$regexes
+      assertthat::assert_that(!is.null(regexes), !is.null(list_filter_fun))
+      if (self$filesystem == "gds") {
+        d <- dr_gds_download(
+          gdsdir = path, outdir = outdir, regexes = regexes, token = ica_token,
+          page_size = max_files, dryrun = dryrun, recursive = recursive,
+          list_filter_fun = list_filter_fun
+        )
+        if (!dryrun) {
+          self$filesystem <- "local"
+          self$path <- outdir
+        }
+      } else if (self$filesystem == "s3") {
+        d <- dr_s3_download(
+          s3dir = path, outdir = outdir, regexes = regexes,
+          max_objects = max_files, dryrun = dryrun,
+          list_filter_fun = list_filter_fun
+        )
+        if (!dryrun) {
+          self$filesystem <- "local"
+          self$path <- outdir
+        }
+      } else {
+        d <- self$list_files_filter_relevant(regexes = regexes, max_files = max_files)
+      }
+      return(d)
+    },
+    #' @description Tidy given files.
+    #' @param x Tibble with `localpath` to file and the function `type` to parse it.
+    tidy_files = function(x) {
+      # awesomeness
+      tidy_files(x, envir = self)
+    },
+    #' @description Write tidy data.
+    #' @param x Tibble with tidy `data` and file `type`.
+    #' @param outdir Directory path to output tidy files.
+    #' @param prefix Prefix of output files.
+    #' @param format Format of output files.
+    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
+    write = function(x, outdir = NULL, prefix = NULL, format = "tsv", drid = NULL) {
+      assertthat::assert_that(!is.null(prefix))
+      if (!is.null(outdir)) {
+        prefix <- file.path(outdir, prefix)
+      }
+      d_write <- x |>
+        dplyr::rowwise() |>
+        dplyr::mutate(
+          section = sub("read_", "", .data$type),
+          p = glue("{prefix}_{.data$section}"),
+          out = list(write_dracarys(obj = .data$data, prefix = .data$p, out_format = format, drid = drid))
+        ) |>
+        dplyr::ungroup() |>
+        dplyr::select("section", "data") |>
+        tibble::deframe()
+      invisible(d_write)
     }
   ) # end public
 )
diff --git a/R/fs_icav1.R b/R/fs_icav1.R
new file mode 100644
index 0000000..16df2c9
--- /dev/null
+++ b/R/fs_icav1.R
@@ -0,0 +1,268 @@
+#' List Files in ICAv1 GDS Directory
+#'
+#' Lists files in a GDS directory.
+#'
+#' @param gdsdir Full path to GDS directory.
+#' @param token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+#' @param page_size Page size (def: 10 via ICA API).
+#' @param include_url Include presigned URLs to all files within the GDS directory (def: FALSE via ICA API).
+#' @param page_token Page token (def: NULL). Used internally for recursion.
+#' @param no_recurse Do not recurse through the file list i.e. just give the first <page_size> items
+#' without recursing further down the list using <page_token>.
+#' @param recursive Should files be returned recursively _in and under_ the specified
+#' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE via ICA API).
+#'
+#' @return A tibble with file ID, basename, size, last modified timestamp,
+#' full GDS path, and presigned URL if requested.
+#' @examples
+#' \dontrun{
+#' gdsdir <- file.path(
+#'   "gds://production/analysis_data/SBJ00699/umccrise",
+#'   "202203277dcf8562/L2200352__L2100146/SBJ00699__MDX220105/coverage"
+#' )
+#' token <- ica_token_validate()
+#' page_size <- 11
+#' include_url <- F
+#' page_token <- NULL
+#' no_recurse <- TRUE
+#' recursive <- NULL
+#' gds_list_files_dir(gdsdir, token, page_size, include_url, no_recurse, page_token, recursive)
+#' }
+#' @export
+gds_list_files_dir <- function(gdsdir, token = Sys.getenv("ICA_ACCESS_TOKEN"), page_size = NULL,
+                               include_url = FALSE, no_recurse = TRUE, page_token = NULL,
+                               recursive = NULL) {
+  assertthat::assert_that(is.logical(no_recurse), is.logical(include_url))
+  assertthat::assert_that(is.null(recursive) || is.logical(recursive))
+  token <- ica_token_validate(token)
+  assertthat::assert_that(grepl("^gds://", gdsdir))
+  gdsdir_original <- gdsdir
+  if (!grepl("/$", gdsdir)) {
+    gdsdir <- glue("{gdsdir}/")
+  }
+  base_url <- "https://aps2.platform.illumina.com/v1"
+  volname <- sub("gds://(.*?)/.*", "\\1", gdsdir)
+  path2 <- sub("gds://(.*?)/(.*)", "\\2", gdsdir)
+  page_size <- ifelse(is.null(page_size), "", glue("&pageSize={page_size}"))
+  query_url <- glue("{base_url}/files?volume.name={volname}&path=/{path2}*{page_size}")
+  if (include_url) {
+    query_url <- glue("{query_url}&include=PresignedUrl")
+  }
+  if (!is.null(page_token)) {
+    query_url <- glue("{query_url}&pageToken={page_token}")
+  }
+  if (!is.null(recursive)) {
+    # without specifying recursive, it's true by default
+    recursive <- ifelse(recursive, "true", "false")
+    query_url <- glue("{query_url}&recursive={recursive}")
+  }
+  query_res <- httr::GET(
+    query_url,
+    httr::add_headers(Authorization = glue("Bearer {token}")),
+    httr::accept_json()
+  )
+  j <- jsonlite::fromJSON(httr::content(x = query_res, type = "text", encoding = "UTF-8"), simplifyVector = FALSE)
+  if (j[["itemCount"]] == 0) {
+    if (gds_likely_file(gdsdir_original)) {
+      cli::cli_abort("{date_log()} ERROR: Is the input directory a file perhaps?\n{.file {gdsdir_original}}")
+    }
+    # if <somehow> there is a nextPageToken then abort, else continue
+    if (!is.null(j[["nextPageToken"]])) {
+      msg <- paste0(
+        "{date_log()} ERROR: ",
+        "No GDS files listed in the input directory. Please confirm you can ",
+        "access the following GDS input directory with your token: ",
+        "{.file {gdsdir_original}}"
+      )
+      cli::cli_abort(msg)
+    }
+  } # endif
+  d <- j[["items"]] |>
+    purrr::map(\(x) c(
+      file_id = x[["id"]],
+      path = x[["path"]],
+      size = x[["sizeInBytes"]],
+      lastmodified = x[["timeModified"]],
+      presigned_url = x[["presignedUrl"]]
+    )) |>
+    dplyr::bind_rows()
+  if (nrow(d) == 0) {
+    # We've iterated through all available items, and the next page has 0 items.
+    # So dplyr::bind_rows(d, NULL) will return d.
+    return(NULL)
+  }
+  res <- d |>
+    dplyr::mutate(
+      size = fs::as_fs_bytes(.data$size),
+      bname = basename(.data$path),
+      path = glue("gds://{volname}{.data$path}")
+    ) |>
+    dplyr::select(dplyr::any_of(c("bname", "size", "lastmodified", "file_id", "path", "presigned_url")))
+
+  if (!is.null(j[["nextPageToken"]]) && !no_recurse) {
+    res2 <- gds_list_files_dir(
+      gdsdir = gdsdir, token = token, page_size = NULL,
+      include_url = include_url, no_recurse = FALSE, page_token = j[["nextPageToken"]],
+      recursive = NULL
+    )
+    res <- dplyr::bind_rows(res, res2)
+  }
+  res
+}
+
+#' List Relevant Files In ICAv1 GDS Directory
+#'
+#' Lists relevant files in a GDS directory.
+#'
+#' @inheritParams gds_list_files_dir
+#' @param pattern Pattern to further filter the returned file type tibble.
+#' @param regexes Tibble with `regex` and `fun`ction name (see example).
+#' @return A tibble with file type, basename, size, last modified timestamp, file_id, full path,
+#' and presigned URL if requested.
+#' @examples
+#' \dontrun{
+#' regexes <- tibble::tibble(regex = "multiqc_data\\.json$", fun = "MultiqcJsonFile")
+#' gdsdir <- "gds://production/analysis_data/SBJ01155/umccrise/202408300c218043/L2101566__L2101565"
+#' gds_list_files_filter_relevant(gdsdir)
+#' }
+#' @export
+gds_list_files_filter_relevant <- function(gdsdir, pattern = NULL, regexes = DR_FILE_REGEX,
+                                           token = Sys.getenv("ICA_ACCESS_TOKEN"),
+                                           page_size = 100, include_url = FALSE,
+                                           no_recurse = TRUE, page_token = NULL,
+                                           recursive = NULL) {
+  pattern <- pattern %||% ".*" # keep all recognisable files by default
+  assertthat::assert_that(all(colnames(regexes) == c("regex", "fun")))
+  cols_sel <- c("type", "bname", "size", "lastmodified", "file_id", "path", "presigned_url")
+  d <- dracarys::gds_list_files_dir(
+    gdsdir = gdsdir, token = token, page_size = page_size, include_url = include_url,
+    no_recurse = no_recurse, page_token = page_token, recursive = recursive
+  ) |>
+    dplyr::rowwise() |>
+    dplyr::mutate(type = purrr::map_chr(.data$bname, \(x) match_regex(x, regexes))) |>
+    dplyr::ungroup() |>
+    dplyr::filter(!is.na(.data$type), grepl(pattern, .data$type)) |>
+    dplyr::select(dplyr::any_of(cols_sel))
+  d
+}
+
+#' dracarys GDS Download
+#'
+#' Download only GDS files that can be processed by dracarys.
+#'
+#' @inheritParams gds_list_files_dir
+#' @inheritParams gds_list_files_filter_relevant
+#' @param outdir Local output directory.
+#' @param dryrun If TRUE, just list the files that will be downloaded (don't
+#' download them).
+#' @param list_filter_fun Function to filter relevant GDS files.
+#' @examples
+#' \dontrun{
+#' gdsdir <- "gds://production/analysis_data/SBJ01155/umccrise/202408300c218043/L2101566__L2101565"
+#' outdir <- sub("gds:/", "~/icav1/g", gdsdir)
+#' regexes <- tibble::tribble(
+#'   ~regex, ~fun,
+#'   "multiqc_data\\.json$", "MultiqcJsonFile",
+#'   "-somatic\\.pcgr\\.json\\.gz$", "pcgrjson"
+#' )
+#' dr_gds_download(gdsdir = gdsdir, outdir = outdir, regexes = regexes, dryrun = T)
+#' }
+#'
+#' @export
+dr_gds_download <- function(gdsdir, outdir, token = Sys.getenv("ICA_ACCESS_TOKEN"),
+                            pattern = NULL, page_size = 100, dryrun = FALSE,
+                            regexes = DR_FILE_REGEX, recursive = NULL,
+                            list_filter_fun = gds_list_files_filter_relevant) {
+  e <- emojifont::emoji
+  fs::dir_create(outdir)
+  d <- list_filter_fun(
+    gdsdir = gdsdir, pattern = pattern, regexes = regexes,
+    token = token, page_size = page_size, include_url = FALSE,
+    no_recurse = FALSE, page_token = NULL,
+    recursive = recursive
+  )
+  d <- d |>
+    dplyr::mutate(
+      gdspath_minus_gdsdir = sub(glue("{gdsdir}/"), "", .data$path),
+      gdspath_minus_gdsdir_outdir = fs::dir_create(
+        file.path(outdir, dirname(.data$gdspath_minus_gdsdir))
+      ),
+      localpath = file.path(.data$gdspath_minus_gdsdir_outdir, .data$bname),
+      gdspath = .data$path
+    ) |>
+    dplyr::select("type", "bname", "size", "lastmodified", "file_id", "localpath", "gdspath")
+  # download recognisable dracarys files to outdir/<mirrored-cloud-path>/{bname}
+  if (!dryrun) {
+    cli::cli_alert_info("{date_log()} {e('arrow_heading_down')} Downloading files from {.file {gdsdir}}")
+    res <- d |>
+      dplyr::rowwise() |>
+      dplyr::mutate(
+        dl = gds_file_download_api(
+          gds_fileid = .data$file_id, out_file = .data$localpath, token = token
+        ),
+        localpath = normalizePath(.data$localpath)
+      ) |>
+      dplyr::select("type", "bname", "size", "lastmodified", "localpath", "gdspath", "file_id")
+    return(res)
+  } else {
+    cli::cli_alert_info("{date_log()} {e('camera')} Just list relevant files from {.file {gdsdir}}")
+    d |>
+      dplyr::select("type", "bname", "size", "lastmodified", "gdspath", "file_id", localpath2be = "localpath") |>
+      as.data.frame() |>
+      print()
+  }
+}
+
+#' GDS File Presigned URL
+#'
+#' Returns presigned URL of given GDS file.
+#'
+#' @param gds_fileid GDS file ID.
+#' @param token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+#' @return Presigned URL if valid.
+#' @export
+gds_file_presignedurl <- function(gds_fileid, token) {
+  token <- ica_token_validate(token)
+  base_url <- "https://aps2.platform.illumina.com/v1"
+  url <- glue("{base_url}/files/{gds_fileid}")
+  res <- httr::GET(
+    url,
+    httr::add_headers(Authorization = glue("Bearer {token}")),
+    httr::accept_json()
+  )
+  presigned_url <- jsonlite::fromJSON(httr::content(x = res, as = "text", encoding = "UTF-8"), simplifyVector = FALSE)[["presignedUrl"]]
+  assertthat::assert_that(grepl("^https://stratus-gds-aps2.s3.ap-southeast-2.amazonaws.com", presigned_url))
+  presigned_url
+}
+
+#' GDS File Download via API
+#'
+#' @param gds_fileid GDS file ID.
+#' @param out_file Path to output file.
+#' @param token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+#'
+#' @examples
+#' \dontrun{
+#' gds_fileid <- "fil.f9aa2ba7af0c4330095d08dadd2e16b0"
+#' out <- tempfile()
+#' token <- Sys.getenv("ICA_ACCESS_TOKEN")
+#' }
+#' @export
+gds_file_download_api <- function(gds_fileid, out_file, token) {
+  presigned_url <- gds_file_presignedurl(gds_fileid, token)
+  # keep quiet instead of logging presigned urls
+  status_code <- utils::download.file(url = presigned_url, destfile = out_file, quiet = TRUE)
+  assertthat::assert_that(status_code == 0)
+  normalizePath(out_file)
+}
+
+#' GDS File Download via CLI
+#'
+#' @param gds Full path to GDS file.
+#' @param out Path to output file.
+#' @param token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+#' @export
+gds_file_download_cli <- function(gds, out, token = Sys.getenv("ICA_ACCESS_TOKEN")) {
+  token <- ica_token_validate(token)
+  system(glue("ica files download {gds} {out} --access-token {token}"))
+}
diff --git a/R/fs_local.R b/R/fs_local.R
new file mode 100644
index 0000000..07397aa
--- /dev/null
+++ b/R/fs_local.R
@@ -0,0 +1,54 @@
+#' List Files in Local Directory
+#'
+#' Lists files in a local directory.
+#'
+#' @param localdir Path to local directory.
+#' @param max_files Max files returned.
+#'
+#' @return A tibble with file basename, size, last modification timestamp
+#' and full path.
+#' @examples
+#' localdir <- system.file("R", package = "dracarys")
+#' x <- local_list_files_dir(localdir)
+#' @testexamples
+#' expect_equal(names(x), c("bname", "size", "lastmodified", "path"))
+#' @export
+local_list_files_dir <- function(localdir, max_files = NULL) {
+  d <- fs::dir_info(path = localdir, recurse = TRUE, type = "file") |>
+    dplyr::mutate(
+      bname = basename(.data$path),
+      lastmodified = .data$modification_time
+    ) |>
+    dplyr::select("bname", "size", "lastmodified", "path")
+  if (!is.null(max_files)) {
+    d <- d |>
+      dplyr::slice_head(n = max_files)
+  }
+  d
+}
+
+#' List Relevant Files In Local Directory
+#'
+#' Lists relevant files in a local directory.
+#'
+#' @inheritParams local_list_files_dir
+#' @param regexes Tibble with `regex` and `fun`ction name (see example).
+#'
+#' @return A tibble with file type, basename, size, last modified timestamp, and
+#' path.
+#'
+#' @examples
+#' localdir <- system.file("extdata/tso", package = "dracarys")
+#' regexes <- tibble::tibble(regex = "multiqc_data\\.json$", fun = "MultiqcFile")
+#' x <- local_list_files_filter_relevant(localdir, regexes)
+#' @testexamples
+#' expect_equal(nrow(x), 1)
+#' @export
+local_list_files_filter_relevant <- function(localdir, regexes = DR_FILE_REGEX, max_files = NULL) {
+  local_list_files_dir(localdir = localdir, max_files = max_files) |>
+    dplyr::mutate(
+      type = purrr::map_chr(.data$bname, \(x) match_regex(x, regexes = regexes))
+    ) |>
+    dplyr::filter(!is.na(.data$type)) |>
+    dplyr::select("type", "bname", "size", "lastmodified", localpath = "path")
+}
diff --git a/R/fs_s3.R b/R/fs_s3.R
new file mode 100644
index 0000000..34c437e
--- /dev/null
+++ b/R/fs_s3.R
@@ -0,0 +1,237 @@
+#' List Objects in AWS S3 Directory
+#'
+#' Returns some or all (up to 1,000) of the objects in an S3 directory.
+#'
+#' @param s3dir S3 directory.
+#' @param max_objects Maximum objects returned.
+#'
+#'
+#' @return A tibble with object basename, size, last modified timestamp, and
+#' full S3 path.
+#' @examples
+#' \dontrun{
+#' p1 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05373/sash"
+#' p2 <- "20240707becde493/L2401018_L2401017/SBJ05373_MDX240220"
+#' s3dir <- file.path(p1, p2, "cancer_report/cancer_report_tables")
+#' s3_list_files_dir(s3dir, max_objects = 15)
+#' }
+#' @export
+s3_list_files_dir <- function(s3dir, max_objects = 1000) {
+  assertthat::assert_that(grepl("^s3://", s3dir))
+  bucket <- sub("s3://(.*?)/.*", "\\1", s3dir)
+  prefix <- sub("s3://(.*?)/(.*)", "\\2", s3dir)
+  s3 <- paws.storage::s3()
+  l <- s3$list_objects_v2(Bucket = bucket, Prefix = prefix, MaxKeys = max_objects)
+  assertthat::assert_that(all(c("Contents", "KeyCount") %in% names(l)))
+  cols_sel <- c("bname", "size", "lastmodified", "path")
+  # handle no results
+  if (l[["KeyCount"]] == 0) {
+    return(empty_tbl(cnames = cols_sel, ctypes = "cccc"))
+  }
+  d <- l[["Contents"]] |>
+    purrr::map(\(x) tibble::tibble(
+      Key = x[["Key"]],
+      Size = x[["Size"]],
+      lastmodified = x[["LastModified"]]
+    )) |>
+    dplyr::bind_rows() |>
+    dplyr::mutate(
+      path = glue("s3://{bucket}/{.data$Key}"),
+      bname = basename(.data$path),
+      size = fs::as_fs_bytes(.data$Size)
+    ) |>
+    dplyr::select(dplyr::all_of(cols_sel))
+  return(d)
+}
+
+#' List Relevant Files In AWS S3 Directory
+#'
+#' Lists relevant files in an AWS S3 directory.
+#'
+#' @inheritParams s3_list_files_dir
+#' @param pattern Pattern to further filter the returned file type tibble.
+#' @param regexes Tibble with `regex` and `fun`ction name.
+#' @param presign Include presigned URLs (def: FALSE).
+#' @param expiry_sec Number of seconds the presigned URL will be valid for (if generated).
+#'
+#' @return A tibble with file type, basename, size, last modified timestamp,
+#' full path, and presigned URL if requested.
+#' @examples
+#' \dontrun{
+#' p1 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05373/sash"
+#' p2 <- "20240707becde493/L2401018_L2401017/SBJ05373_MDX240220"
+#' s3dir <- file.path(p1, p2)
+#' regexes <- tibble::tibble(regex = "multiqc_data\\.json$", fun = "MultiqcJsonFile")
+#' s3_list_files_filter_relevant(s3dir = s3dir, regexes = regexes, max_objects = 300)
+#' }
+#' @export
+s3_list_files_filter_relevant <- function(s3dir, pattern = NULL,
+                                          regexes = DR_FILE_REGEX, max_objects = 100,
+                                          presign = FALSE, expiry_sec = 3600) {
+  assertthat::assert_that(rlang::is_logical(presign), max_objects <= 1000)
+  d_all <- s3_list_files_dir(s3dir = s3dir, max_objects = max_objects)
+  if (nrow(d_all) == 0) {
+    return(d_all)
+  }
+  pattern <- pattern %||% ".*" # keep all recognisable files by default
+  cols_sel <- c("type", "bname", "size", "lastmodified", "path")
+  d <- d_all |>
+    dplyr::rowwise() |>
+    dplyr::mutate(
+      type = purrr::map_chr(.data$bname, \(x) match_regex(x, regexes))
+    ) |>
+    dplyr::ungroup() |>
+    dplyr::filter(!is.na(.data$type), grepl(pattern, .data$type)) |>
+    dplyr::select(dplyr::all_of(cols_sel))
+
+  if (presign) {
+    if (nrow(d) == 0) {
+      return(d)
+    }
+    s3_client <- paws.storage::s3(paws.storage::config(signature_version = "s3v4"))
+    d <- d |>
+      dplyr::rowwise() |>
+      dplyr::mutate(presigned_url = s3_file_presignedurl(
+        client = s3_client, s3path = .data$path, expiry_seconds = expiry_sec
+      )) |>
+      dplyr::ungroup() |>
+      dplyr::select(dplyr::all_of(c(cols_sel, "presigned_url")))
+  }
+  d
+}
+
+#' dracarys S3 Download
+#'
+#' Download only S3 files that can be processed by dracarys.
+#'
+#' @inheritParams s3_list_files_dir
+#' @inheritParams s3_list_files_filter_relevant
+#' @param outdir Path to output directory.
+#' @param dryrun If TRUE, just list the files that will be downloaded (don't
+#' download them).
+#' @param list_filter_fun Function to filter relevant S3 files.
+#' @examples
+#' \dontrun{
+#' p1 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05373/sash"
+#' p2 <- "20240707becde493/L2401018_L2401017/SBJ05373_MDX240220"
+#' s3dir <- file.path(p1, p2)
+#' regexes <- tibble::tribble(
+#'   ~regex, ~fun,
+#'   "multiqc_data\\.json$", "MultiqcJsonFile",
+#'   "pcgr.*\\.json\\.gz$", "pcgrjson"
+#' )
+#' outdir <- sub("s3:/", "~/s3", s3dir)
+#' dr_s3_download(s3dir = s3dir, outdir = outdir, max_objects = 500, regexes = regexes, dryrun = F)
+#' }
+#' @export
+dr_s3_download <- function(s3dir, outdir, max_objects = 100, pattern = NULL,
+                           regexes = DR_FILE_REGEX, dryrun = FALSE,
+                           list_filter_fun = s3_list_files_filter_relevant) {
+  s3 <- paws.storage::s3()
+  e <- emojifont::emoji
+  fs::dir_create(outdir)
+  d <- list_filter_fun(
+    s3dir = s3dir, pattern = NULL, regexes = regexes,
+    max_objects = max_objects, presign = FALSE
+  )
+  d <- d |>
+    dplyr::mutate(
+      s3path_minus_s3dir = sub(glue("{s3dir}/"), "", .data$path),
+      s3path_minus_s3dir_outdir = fs::dir_create(
+        file.path(outdir, dirname(.data$s3path_minus_s3dir))
+      ),
+      localpath = file.path(.data$s3path_minus_s3dir_outdir, .data$bname),
+      s3path = .data$path
+    ) |>
+    dplyr::select("type", "bname", "size", "lastmodified", "localpath", "s3path")
+  # download recognisable dracarys files to outdir/<mirrored-cloud-path>/{bname}
+  if (!dryrun) {
+    cli::cli_alert_info("{date_log()} {e('arrow_heading_down')} Downloading files from {.file {s3dir}}")
+    d |>
+      dplyr::rowwise() |>
+      dplyr::mutate(
+        s3bucket = sub("s3://(.*?)/.*", "\\1", .data$s3path),
+        s3key = sub("s3://(.*?)/(.*)", "\\2", .data$s3path),
+        dl = list(
+          s3$download_file(
+            Bucket = .data$s3bucket, Key = .data$s3key, Filename = .data$localpath
+          )
+        ),
+        localpath = normalizePath(.data$localpath)
+      ) |>
+      dplyr::select("type", "bname", "size", "lastmodified", "localpath", "s3path")
+  } else {
+    cli::cli_alert_info("{date_log()} {e('camera')} Just list relevant files from {.file {s3dir}}")
+    d |>
+      dplyr::select("type", "bname", "size", "lastmodified", "s3path", localpath2be = "localpath") |>
+      as.data.frame() |>
+      print()
+  }
+}
+
+#' S3 Generate Presigned URL
+#'
+#' @param client S3 client. Make sure you use `signature_version = "s3v4"` (see example).
+#' @param s3path Full path to S3 object.
+#' @param expiry_seconds Number of seconds the presigned URL is valid for (3600 = 1 hour).
+#'
+#' @return An S3 presigned URL.
+#' @examples
+#' \dontrun{
+#' client <- paws.storage::s3(paws.storage::config(signature_version = "s3v4"))
+#' s3path <- "s3://bucket1/path/to/file.tsv"
+#' s3_file_presignedurl(client, s3path)
+#' }
+#'
+#' @export
+s3_file_presignedurl <- function(client, s3path, expiry_seconds = 3600) {
+  bucket <- sub("s3://(.*?)/.*", "\\1", s3path)
+  prefix <- sub("s3://(.*?)/(.*)", "\\2", s3path)
+  client$generate_presigned_url(
+    client_method = "get_object",
+    params = list(Bucket = bucket, Key = prefix),
+    expires_in = expiry_seconds
+  )
+}
+
+#' Search AWS S3 Objects
+#'
+#' Searches for the given pattern in the UMCCR `umccr-primary-data-prod` AWS S3
+#' bucket.
+#'
+#' @param pat Pattern to search for (e.g. 'multiqc_data.json').
+#' @param rows Max number of rows to return.
+#'
+#' @return Tibble with S3 path, object size, date modified, id, unique hash.
+#'
+#' @examples
+#' \dontrun{
+#' pat <- "qc_summary.tsv.gz"
+#' s3_search(pat, 10)
+#' }
+#' @export
+s3_search <- function(pat, rows) {
+  au_tz <- "Australia/Melbourne"
+  utc_tz <- "UTC"
+  base_url <- "https://api.portal.prod.umccr.org/iam/s3"
+  url1 <- utils::URLencode(glue("{base_url}?rowsPerPage={rows}&search={pat}"))
+  awscurl_cmd <- glue(
+    "awscurl '{url1}' ",
+    "--header 'Accept: application/json'"
+  )
+  message(glue("Running {awscurl_cmd}"))
+  j <- system(awscurl_cmd, intern = TRUE)
+  date_fmt <- "%Y-%m-%dT%H:%M:%S"
+  d <- j |>
+    jsonlite::fromJSON() |>
+    purrr::pluck("results") |>
+    tibble::as_tibble()
+  d |>
+    dplyr::mutate(
+      date1 = as.POSIXct(.data$last_modified_date, tz = utc_tz, format = date_fmt),
+      date_aest = lubridate::with_tz(.data$date1, tz = au_tz),
+      path = glue("s3://{bucket}/{key}"),
+      size = fs::as_fs_bytes(.data$size)
+    ) |>
+    dplyr::select("path", "size", "date_aest", "id", "unique_hash")
+}
diff --git a/R/ica.R b/R/ica.R
index 0838ef0..951095b 100644
--- a/R/ica.R
+++ b/R/ica.R
@@ -1,28 +1,3 @@
-#' List Relevant Files In GDS Directory
-#'
-#' Lists relevant files in a GDS directory.
-#'
-#' @param gdsdir GDS directory.
-#' @param token ICA access token.
-#' @param pattern Pattern to further filter the returned file type tibble.
-#' @param include_url Include presigned URLs to all files within the GDS directory (def: FALSE).
-#' @param page_size Page size (def: 100).
-#' @param regexes Tibble with regex and function name.
-#'
-#' @return A tibble with type, bname, size, file_id, path, and presigned URL.
-#' @export
-gds_files_list_filter_relevant <- function(gdsdir, token, pattern = NULL, include_url = FALSE, page_size = 100, regexes = DR_FILE_REGEX) {
-  pattern <- pattern %||% ".*" # keep all recognisable files by default
-  cols_sel <- c("type", "bname", "size", "file_id", "path", "presigned_url")
-  d <- dracarys::gds_files_list(gdsdir, token, include_url = include_url, page_size = page_size) |>
-    dplyr::rowwise() |>
-    dplyr::mutate(type = purrr::map_chr(.data$bname, \(x) match_regex(x, regexes))) |>
-    dplyr::ungroup() |>
-    dplyr::filter(!is.na(.data$type), grepl(pattern, .data$type)) |>
-    dplyr::select(dplyr::any_of(cols_sel))
-  d
-}
-
 #' List FASTQs In GDS Directory
 #'
 #' @param gdsdir GDS directory.
@@ -48,12 +23,9 @@ gds_files_list_fastq <- function(gdsdir, token, include_url = FALSE, page_size =
     ~regex, ~fun,
     "fastq\\.gz$", "FASTQ"
   )
-  g <- gds_files_list_filter_relevant(
-    gdsdir = gdsdir, token = token, pattern = NULL, include_url = include_url,
-    page_size = page_size, regexes = fq_regex
-  )
-  assertthat::assert_that(
-    all(colnames(g) == c("type", "bname", "size", "file_id", "path"))
+  g <- gds_list_files_filter_relevant(
+    gdsdir = gdsdir, pattern = NULL, regexes = fq_regex,
+    token = token, page_size = page_size, include_url = include_url
   )
   g |>
     dplyr::mutate(
@@ -61,176 +33,10 @@ gds_files_list_fastq <- function(gdsdir, token, include_url = FALSE, page_size =
       size_num = as.numeric(.data$size)
     ) |>
     dplyr::select(
-      "type", "bname", "size", "size_chr", "size_num", "file_id", "path"
+      "type", "bname", "size", "lastmodified", "size_chr", "size_num", "file_id", "path"
     )
 }
 
-#' GDS File Presigned URL
-#'
-#' Returns presigned URL of given GDS file.
-#'
-#' @param gds_fileid GDS file ID.
-#' @param token ICA access token (def: $ICA_ACCESS_TOKEN env var).
-#' @return Presigned URL if valid.
-#' @export
-gds_file_presignedurl <- function(gds_fileid, token) {
-  token <- ica_token_validate(token)
-  base_url <- "https://aps2.platform.illumina.com/v1"
-  url <- glue("{base_url}/files/{gds_fileid}")
-  res <- httr::GET(
-    url,
-    httr::add_headers(Authorization = glue("Bearer {token}")),
-    httr::accept_json()
-  )
-  presigned_url <- jsonlite::fromJSON(httr::content(x = res, as = "text", encoding = "UTF-8"), simplifyVector = FALSE)[["presignedUrl"]]
-  assertthat::assert_that(grepl("^https://stratus-gds-aps2.s3.ap-southeast-2.amazonaws.com", presigned_url))
-  presigned_url
-}
-
-#' GDS File Download via API
-#'
-#' @param gds_fileid GDS file ID.
-#' @param out_file Path to output file.
-#' @param token ICA access token (def: $ICA_ACCESS_TOKEN env var).
-#'
-#' @examples
-#' \dontrun{
-#' gds_fileid <- "fil.f9aa2ba7af0c4330095d08dadd2e16b0"
-#' out <- tempfile()
-#' token <- Sys.getenv("ICA_ACCESS_TOKEN")
-#' }
-#' @export
-gds_file_download_api <- function(gds_fileid, out_file, token) {
-  presigned_url <- gds_file_presignedurl(gds_fileid, token)
-  # keep quiet instead of logging presigned urls
-  status_code <- utils::download.file(url = presigned_url, destfile = out_file, quiet = TRUE)
-  assertthat::assert_that(status_code == 0)
-  out_file
-}
-
-#' GDS File Download via CLI
-#'
-#' @param gds Full path to GDS file.
-#' @param out Path to output file.
-#' @param token ICA access token (def: $ICA_ACCESS_TOKEN env var).
-#' @export
-gds_file_download <- function(gds, out, token = Sys.getenv("ICA_ACCESS_TOKEN")) {
-  token <- ica_token_validate(token)
-  system(glue("ica files download {gds} {out} --access-token {token}"))
-}
-
-#' GDS Files List
-#'
-#' List files on ICA GDS filesystem.
-#'
-#' @param gdsdir Full path to GDS directory.
-#' @param token ICA access token (def: $ICA_ACCESS_TOKEN env var).
-#' @param page_size Page size (def: 10).
-#' @param include_url Include presigned URLs to all files within the GDS directory (def: FALSE).
-#' @param page_token Page token (def: NULL). Used internally for recursion.
-#' @param no_recurse Do not recurse through the file list i.e. just give the first <page_size> items
-#' without recursing further down the list using <page_token>.
-#' @param recursive Should files be returned recursively _in and under_ the specified
-#' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE).
-#'
-#' @return Tibble with file basename, file size, file full data path, file dir name.
-#' @examples
-#' \dontrun{
-#' gdsdir <- file.path(
-#'   "gds://production/primary_data",
-#'   "240322_A00130_0290_BH5HLLDSXC/20240323f56ec5a5/WGS_TsqNano"
-#' )
-#' gdsdir <- file.path(
-#'   "gds://bssh.acddbfda498038ed99fa94fe79523959/Runs",
-#'   "240322_A00130_0290_BH5HLLDSXC_r.3TbcOsEKZUyetygkqIOXcg/InterOp"
-#' )
-#' gdsdir <- file.path(
-#'   "gds://production/analysis_data/SBJ00699/umccrise",
-#'   "202203277dcf8562/L2200352__L2100146/SBJ00699__MDX220105/coverage"
-#' )
-#' token <- ica_token_validate()
-#' page_size <- 11
-#' include_url <- TRUE
-#' page_token <- NULL
-#' no_recurse <- TRUE
-#' recursive <- NULL
-#' gds_files_list(gdsdir, token, page_size, include_url, no_recurse, page_token, recursive)
-#' }
-#' @export
-gds_files_list <- function(gdsdir, token, page_size = NULL, include_url = FALSE,
-                           no_recurse = TRUE, page_token = NULL, recursive = NULL) {
-  assertthat::assert_that(is.logical(no_recurse), is.logical(include_url))
-  assertthat::assert_that(is.null(recursive) || is.logical(recursive))
-  token <- ica_token_validate(token)
-  assertthat::assert_that(grepl("^gds://", gdsdir))
-  gdsdir_original <- gdsdir
-  if (!grepl("/$", gdsdir)) {
-    gdsdir <- glue("{gdsdir}/")
-  }
-  base_url <- "https://aps2.platform.illumina.com/v1"
-  volname <- sub("gds://(.*?)/.*", "\\1", gdsdir)
-  path2 <- sub("gds://(.*?)/(.*)", "\\2", gdsdir)
-  page_size <- ifelse(is.null(page_size), "", glue("&pageSize={page_size}"))
-  query_url <- glue("{base_url}/files?volume.name={volname}&path=/{path2}*{page_size}")
-  if (include_url) {
-    query_url <- glue("{query_url}&include=PresignedUrl")
-  }
-  if (!is.null(page_token)) {
-    query_url <- glue("{query_url}&pageToken={page_token}")
-  }
-  if (!is.null(recursive)) {
-    # without specifying recursive, it's true by default
-    recursive <- ifelse(recursive, "true", "false")
-    query_url <- glue("{query_url}&recursive={recursive}")
-  }
-  query_res <- httr::GET(
-    query_url,
-    httr::add_headers(Authorization = glue("Bearer {token}")),
-    httr::accept_json()
-  )
-  j <- jsonlite::fromJSON(httr::content(x = query_res, type = "text", encoding = "UTF-8"), simplifyVector = FALSE)
-  if (j[["itemCount"]] == 0) {
-    if (likely_file(gdsdir_original)) {
-      cli::cli_abort("{date_log()} ERROR: Is the input directory a file perhaps?\n{.file {gdsdir_original}}")
-    }
-    # if <somehow> there is a nextPageToken then abort, else continue
-    if (!is.null(j[["nextPageToken"]])) {
-      msg <- paste0(
-        "{date_log()} ERROR: ",
-        "No GDS files listed in the input directory. Please confirm you can ",
-        "access the following GDS input directory with your token: ",
-        "{.file {gdsdir_original}}"
-      )
-      cli::cli_abort(msg)
-    }
-  } # endif
-  d <- j[["items"]] |>
-    purrr::map(\(x) c(file_id = x[["id"]], path = x[["path"]], size = x[["sizeInBytes"]], presigned_url = x[["presignedUrl"]])) |>
-    dplyr::bind_rows()
-  if (nrow(d) == 0) {
-    # We've iterated through all available items, and the next page has 0 items.
-    # So dplyr::bind_rows(d, NULL) will return d.
-    return(NULL)
-  }
-  res <- d |>
-    dplyr::mutate(
-      size = fs::as_fs_bytes(.data$size),
-      bname = basename(.data$path),
-      path = glue("gds://{volname}{.data$path}"),
-      dname = basename(dirname(.data$path))
-    ) |>
-    dplyr::select(dplyr::any_of(c("file_id", "bname", "size", "path", "dname", "presigned_url")))
-  if (!is.null(j[["nextPageToken"]]) && !no_recurse) {
-    res2 <- gds_files_list(
-      gdsdir = gdsdir, token = token, page_size = NULL,
-      include_url = include_url, no_recurse = FALSE, page_token = j[["nextPageToken"]],
-      recursive = NULL
-    )
-    res <- dplyr::bind_rows(res, res2)
-  }
-  res
-}
-
 #' List GDS Volumes
 #'
 #' Lists GDS volumes accessible by the provided ICA token.
@@ -255,52 +61,6 @@ gds_volumes_list <- function(token, page_size = 10) {
 }
 
 
-#' dracarys GDS Download
-#'
-#' Download only GDS files that can be processed by dracarys.
-#'
-#' @param gdsdir Full path to GDS directory.
-#' @param outdir Path to output directory.
-#' @param token ICA access token (def: $ICA_ACCESS_TOKEN env var).
-#' @param page_size Page size (def: 100).
-#' @param pattern Pattern to further filter the returned file type tibble.
-#' @param dryrun If TRUE, just list the files that will be downloaded (don't
-#' download them).
-#' @param regexes Tibble with regex and function name.
-#' @param recursive Should files be returned recursively _in and under_ the specified
-#' GDS directory (TRUE), or _only directly in_ the specified GDS directory (FALSE) (def: TRUE).
-#'
-#' @export
-dr_gds_download <- function(gdsdir, outdir, token, page_size = 100, pattern = NULL,
-                            dryrun = FALSE, regexes = DR_FILE_REGEX, recursive = NULL) {
-  e <- emojifont::emoji
-  fs::dir_create(outdir)
-  d <- gds_files_list(
-    gdsdir = gdsdir, token = token, page_size = page_size,
-    no_recurse = FALSE, recursive = recursive
-  ) |>
-    dplyr::mutate(type = purrr::map_chr(.data$bname, \(x) match_regex(x, regexes))) |>
-    dplyr::select("file_id", "dname", "type", "size", "path", "bname")
-
-  # download recognisable dracarys files to outdir/{bname}
-  pattern <- pattern %||% ".*" # keep all recognisable files
-  d_filt <- d |>
-    dplyr::filter(!is.na(.data$type), grepl(pattern, .data$type)) |>
-    dplyr::mutate(out = file.path(outdir, .data$bname))
-  if (!dryrun) {
-    cli::cli_alert_info("{date_log()} {e('arrow_heading_down')} Downloading files from {.file {gdsdir}}")
-    d_filt |>
-      dplyr::rowwise() |>
-      dplyr::mutate(out_dl = gds_file_download_api(.data$file_id, .data$out, token))
-  } else {
-    cli::cli_alert_info("{date_log()} {e('camera')} Just list relevant files from {.file {gdsdir}}")
-    d_filt |>
-      dplyr::select("path", "type", "size") |>
-      as.data.frame() |>
-      print()
-  }
-}
-
 #' Validate ICA access token
 #'
 #' Validates ICA access token by parsing it and checking its expiration date.
@@ -339,7 +99,7 @@ ica_token_exp <- function(token = Sys.getenv("ICA_ACCESS_TOKEN")) {
   structure(l$payload$exp, class = c("POSIXct", "POSIXt"))
 }
 
-likely_file <- function(x) {
+gds_likely_file <- function(x) {
   e <- c(
     "txt", "tsv", "csv", "html", "json", "stdout", "stderr", "stdouterr",
     "log", "vcf", "gz", "bam", "bai"
diff --git a/R/regex.R b/R/regex.R
index 25a2f61..34f4079 100644
--- a/R/regex.R
+++ b/R/regex.R
@@ -61,11 +61,11 @@ DR_FILE_REGEX <- tibble::tribble(
   "multiqc_data\\.json", "MultiqcFile",
   "somatic\\.pcgr\\.json\\.gz$", "PcgrJsonFile",
   "somatic\\.pcgr\\.snvs_indels\\.tiers\\.tsv$", "PcgrTiersFile",
-  "chord\\.tsv\\.gz$", "UmChordTsvFile",
-  "hrdetect\\.tsv\\.gz$", "UmHrdetectTsvFile",
-  "snv_2015\\.tsv\\.gz$", "UmSigsSnvFile",
-  "snv_2020\\.tsv\\.gz$", "UmSigsSnvFile",
-  "-qc_summary\\.tsv\\.gz$", "UmQcSumFile",
+  # "chord\\.tsv\\.gz$", "UmChordTsvFile",
+  # "hrdetect\\.tsv\\.gz$", "UmHrdetectTsvFile",
+  # "snv_2015\\.tsv\\.gz$", "UmSigsSnvFile",
+  # "snv_2020\\.tsv\\.gz$", "UmSigsSnvFile",
+  # "-qc_summary\\.tsv\\.gz$", "UmQcSumFile",
   "bcftools_stats\\.txt$", "BcftoolsStatsFile"
 )
 
@@ -83,6 +83,8 @@ FILES_DOWNLOAD_BUT_IGNORE <- c(
 #' @param f Name of function to evaluate.
 #' @param v Character vector of strings evaluating to functions. By default,
 #' this points to the functions in the DR_FILE_REGEX dracarys tibble.
+#' @param envir the environment in which to evaluate the function e.g. use `self`
+#' when using inside R6 classes.
 #'
 #' @return Evaluated function.
 #' @examples
@@ -94,13 +96,13 @@ FILES_DOWNLOAD_BUT_IGNORE <- c(
 #' expect_equal(mean_1_to_10, base::mean(1:10))
 #' expect_null(dr_func_eval("foo"))
 #' @export
-dr_func_eval <- function(f, v = NULL) {
+dr_func_eval <- function(f, v = NULL, envir = parent.frame()) {
   v <- v %||% DR_FILE_REGEX[["fun"]]
   if (!f %in% v) {
     return(NULL)
   }
   # evaluate string
-  eval(parse(text = f))
+  eval(parse(text = f), envir = envir)
 }
 
 #' Get dracarys `DR_FILE_REGEX`
diff --git a/R/s3.R b/R/s3.R
deleted file mode 100644
index 6b6daaa..0000000
--- a/R/s3.R
+++ /dev/null
@@ -1,156 +0,0 @@
-#' List Relevant Files In AWS S3 Directory
-#'
-#' Lists relevant files in an AWS S3 directory.
-#'
-#' @param s3dir S3 directory.
-#' @param pattern Pattern to further filter the returned file type tibble.
-#' @param page_size The size of each page to get in the AWS service call (def: 1000).
-#' @param max_items The total number of items to return in the command’s output (def: 1000).
-#' @param presign Include presigned URLs (def: FALSE).
-#' @param expiry_sec Number of seconds the presigned URL will be valid for (if generated) (def: 43200 (12hrs)).
-#'
-#' @return A tibble with path, date, file size, file type, and presigned URL if requested.
-#' @examples
-#' \dontrun{
-#' s3dir <- "s3://umccr-primary-data-prod/cancer_report_tables"
-#' s3_files_list_filter_relevant(s3dir = s3dir, presign = TRUE)
-#' }
-#' @export
-s3_files_list_filter_relevant <- function(s3dir, pattern = NULL, page_size = 1000, max_items = 1000, presign = FALSE, expiry_sec = 43200) {
-  assertthat::assert_that(grepl("^s3://", s3dir), rlang::is_logical(presign))
-  pattern <- pattern %||% ".*" # keep all recognisable files by default
-  b <- sub("s3://(.*?)/.*", "\\1", s3dir)
-  p <- sub("s3://(.*?)/(.*)", "\\2", s3dir)
-  cmd <- glue(
-    "aws --output json s3api list-objects-v2 --bucket {b} --prefix {p} ",
-    "--max-items {max_items} --page-size {page_size}"
-  )
-  l <- system(cmd, intern = TRUE)
-  j <- jsonlite::fromJSON(l)
-  assertthat::assert_that("Contents" %in% names(j))
-  d <- j[["Contents"]] |>
-    tibble::as_tibble() |>
-    dplyr::mutate(
-      path = glue("s3://{b}/{.data$Key}"),
-      date_utc = .data$LastModified,
-      size = fs::as_fs_bytes(.data$Size)
-    ) |>
-    dplyr::rowwise() |>
-    dplyr::mutate(
-      bname = basename(.data$path),
-      type = purrr::map_chr(.data$bname, match_regex)
-    ) |>
-    dplyr::ungroup() |>
-    dplyr::filter(!is.na(.data$type), grepl(pattern, .data$type)) |>
-    dplyr::select("path", "bname", "date_utc", "size", "type")
-
-  if (presign) {
-    d <- d |>
-      dplyr::rowwise() |>
-      dplyr::mutate(presigned_url = s3_file_presignedurl(.data$path, expiry_seconds = expiry_sec)) |>
-      dplyr::ungroup()
-  }
-  d
-}
-
-s3_file_presignedurl <- function(s3path, expiry_seconds = 3600) {
-  p <- system(glue("aws s3 presign {s3path} --expires-in {expiry_seconds}"), intern = TRUE)
-  p
-}
-
-#' Search AWS S3 Objects
-#'
-#' Searches for the given pattern in the UMCCR `umccr-primary-data-prod` AWS S3
-#' bucket.
-#'
-#' @param pat Pattern to search for (e.g. 'multiqc_data.json').
-#' @param rows Max number of rows to return.
-#'
-#' @return Tibble with S3 path, object size, date modified, id, unique hash.
-#'
-#' @examples
-#' \dontrun{
-#' pat <- "qc_summary.tsv.gz"
-#' s3_search(pat, 10)
-#' }
-#' @export
-s3_search <- function(pat, rows) {
-  au_tz <- "Australia/Melbourne"
-  utc_tz <- "UTC"
-  base_url <- "https://api.portal.prod.umccr.org/iam/s3"
-  url1 <- utils::URLencode(glue("{base_url}?rowsPerPage={rows}&search={pat}"))
-  awscurl_cmd <- glue(
-    "awscurl '{url1}' ",
-    "--header 'Accept: application/json'"
-  )
-  message(glue("Running {awscurl_cmd}"))
-  j <- system(awscurl_cmd, intern = TRUE)
-  date_fmt <- "%Y-%m-%dT%H:%M:%S"
-  d <- j |>
-    jsonlite::fromJSON() |>
-    purrr::pluck("results") |>
-    tibble::as_tibble()
-  d |>
-    dplyr::mutate(
-      date1 = as.POSIXct(.data$last_modified_date, tz = utc_tz, format = date_fmt),
-      date_aest = lubridate::with_tz(.data$date1, tz = au_tz),
-      path = glue("s3://{bucket}/{key}"),
-      size = fs::as_fs_bytes(.data$size)
-    ) |>
-    dplyr::select("path", "size", "date_aest", "id", "unique_hash")
-}
-
-#' dracarys S3 Download
-#'
-#' Download only S3 files that can be processed by dracarys.
-#'
-#' @param s3dir Full path to S3 directory.
-#' @param outdir Path to output directory.
-#' @param page_size Page size (def: 100).
-#' @param pattern Pattern to further filter the returned file type tibble.
-#' @param regexes Tibble with regex and function name.
-#' @param dryrun If TRUE, just list the files that will be downloaded (don't
-#' download them).
-#' @examples
-#' s3dir <- file.path(
-#'   "s3://umccr-primary-data-prod/UMCCR-Validation/SBJ00596",
-#'   "ctTSO/2021-03-17/PTC_SSqCMM05pc_L2100067"
-#' )
-#' outdir <- sub("s3:/", "~/s3", s3dir)
-#'
-#' @export
-dr_s3_download <- function(s3dir, outdir, page_size = 100, pattern = NULL, regexes = DR_FILE_REGEX, dryrun = FALSE) {
-  s3 <- paws.storage::s3()
-  e <- emojifont::emoji
-  fs::dir_create(outdir)
-  d <- s3_files_list_filter_relevant(s3dir, pattern = NULL, page_size = page_size, max_items = 1000, presign = FALSE, expiry_sec = 43200)
-  d <- d |>
-    dplyr::mutate(type = purrr::map_chr(.data$bname, \(x) match_regex(x, regexes))) |>
-    dplyr::select("type", "size", "path", "bname")
-
-  # download recognisable dracarys files to outdir/{bname}
-  pattern <- pattern %||% ".*" # keep all recognisable files
-  d_filt <- d |>
-    dplyr::filter(!is.na(.data$type), grepl(pattern, .data$type)) |>
-    dplyr::mutate(out = file.path(outdir, .data$bname))
-  if (!dryrun) {
-    cli::cli_alert_info("{date_log()} {e('arrow_heading_down')} Downloading files from {.file {s3dir}}")
-    d_filt |>
-      dplyr::rowwise() |>
-      dplyr::mutate(
-        s3bucket = sub("s3://(.*?)/.*", "\\1", .data$path),
-        s3key = sub("s3://(.*?)/(.*)", "\\2", .data$path),
-        dl = list(
-          s3$download_file(
-            Bucket = .data$s3bucket, Key = .data$s3key, Filename = .data$out
-          )
-        )
-      )
-  } else {
-    cli::cli_alert_info("{date_log()} {e('camera')} Just list relevant files from {.file {s3dir}}")
-    d_filt |>
-      dplyr::select("path", "type", "size") |>
-      as.data.frame() |>
-      print()
-  }
-}
diff --git a/R/tidy.R b/R/tidy.R
index a1b9313..06fac82 100644
--- a/R/tidy.R
+++ b/R/tidy.R
@@ -1,3 +1,30 @@
+#' Tidy Files
+#'
+#' @param x Tibble with `localpath` to file and the function `type` to parse it.
+#' @param envir the environment in which to evaluate the function e.g. use `self`
+#' when using inside R6 classes.
+#'
+#' @return Tibble with parsed data in a `data` list-column.
+#' @examples
+#' \dontrun{
+#' p1 <- "~/icav1/g/production/analysis_data/SBJ01155/umccrise/202408300c218043"
+#' p2 <- "L2101566__L2101565/SBJ01155__PRJ211091-qc_summary.tsv.gz"
+#' p <- file.path(p1, p2)
+#' x <- tibble::tibble(type = "readr::read_tsv", localpath = p)
+#' tidy_files(x)
+#' }
+#'
+#' @export
+tidy_files <- function(x, envir = parent.frame()) {
+  assertthat::assert_that(is.data.frame(x))
+  assertthat::assert_that(all(c("type", "localpath") %in% colnames(x)))
+  x |>
+    dplyr::rowwise() |>
+    dplyr::mutate(
+      data = list(dr_func_eval(f = .data$type, v = .data$type, envir = envir)(.data$localpath))
+    )
+}
+
 #' Tidy UMCCR Results
 #'
 #' Tidies UMCCR workflow results into a list of tibbles and writes individual
diff --git a/R/umccrise.R b/R/umccrise.R
index e358d2d..ffbe5c5 100644
--- a/R/umccrise.R
+++ b/R/umccrise.R
@@ -1,80 +1,156 @@
-#' UmccriseCanRepTables R6 Class
+#' Wf_umccrise R6 Class
 #'
 #' @description
-#' Reads and writes tidy versions of files within the `cancer_report_tables` directory
-#' output from the `umccrise` workflow.
+#' Reads and writes tidy versions of files from the `umccrise` workflow
 #'
 #' @examples
 #' \dontrun{
-#' p1 <- "~/icav1/g/production/analysis_data/SBJ01155/umccrise/202408300c218043"
-#' p2 <- "L2101566__L2101565/SBJ01155__PRJ211091/cancer_report_tables"
-#' p <- file.path(p1, p2)
-#' obj <- UmccriseCanRepTables$new(p)
-#' obj$path
-#' obj$contents
-#' d <- obj$read()
-#' obj$write(d, out_dir = tempdir(), prefix = "sampleA", out_format = "tsv")
+#'
+#' #---- LOCAL ----#
+#' SubjectID <- "SBJ03043"
+#' SampleID_tumor <- "PRJ230004"
+#' prefix <- glue("{SubjectID}__{SampleID_tumor}")
+#' p1_local <- "~/icav1/g/production/analysis_data"
+#' p <- file.path(p1_local, "SBJ03043/umccrise/20240830ec648f40/L2300064__L2300063")
+#' um1 <- Wf_umccrise$new(path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor)
+#' um1$list_files(max_files = 10)
+#' um1$list_files_filter_relevant()
+#' d <- um1$download_files(max_files = 1000, dryrun = F)
+#' d_tidy <- um1$tidy_files(d)
+#' d_write <- um1$write(
+#'   d_tidy,
+#'   outdir = file.path(p, "dracarys_tidy"),
+#'   prefix = glue("{SubjectID}__{SampleID_tumor}"),
+#'   format = "tsv"
+#' )
+#'
+#' #---- GDS ----#
+#' SubjectID <- "SBJ03043"
+#' SampleID_tumor <- "PRJ230004"
+#' prefix <- glue("{SubjectID}__{SampleID_tumor}")
+#' p1_gds <- "gds://production/analysis_data"
+#' p <- file.path(p1_gds, "SBJ03043/umccrise/20240830ec648f40/L2300064__L2300063")
+#' outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+#' token <- Sys.getenv("ICA_ACCESS_TOKEN")
+#' um2 <- Wf_umccrise$new(path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor)
+#' um2$list_files(max_files = 8)
+#' um2$list_files_filter_relevant(ica_token = token, max_files = 500)
+#' d <- um2$download_files(
+#'   outdir = outdir, ica_token = token,
+#'   max_files = 1000, dryrun = F
+#' )
+#' d_tidy <- um2$tidy_files(d)
+#' d_write <- um2$write(
+#'   d_tidy,
+#'   outdir = file.path(outdir, "dracarys_tidy"),
+#'   prefix = glue("{SubjectID}__{SampleID_tumor}"),
+#'   format = "tsv"
+#' )
 #' }
 #'
 #' @export
-UmccriseCanRepTables <- R6::R6Class(
-  "UmccriseCanRepTables",
+Wf_umccrise <- R6::R6Class(
+  "Wf_umccrise",
+  inherit = Wf,
   public = list(
-    #' @field path Path to the `cancer_report_tables` directory.
-    #' @field contents Tibble with file path, basename, and size.
-    path = NULL,
-    contents = NULL,
-    #' @description Create a new UmccriseCanRepTables object.
-    #' @param path Path to the `cancer_report_tables` directory.
-    initialize = function(path = NULL) {
-      stopifnot(is.character(path), length(path) == 1)
-      self$path <- normalizePath(path)
-      self$contents <- fs::dir_info(path, type = "file", recurse = TRUE) |>
-        dplyr::mutate(
-          bname = basename(.data$path),
-          size = as.character(trimws(.data$size))
-        ) |>
-        dplyr::select("path", "bname", "size")
+    #' @field SubjectID The SubjectID of the sample (needed for path lookup).
+    #' @field SampleID_tumor The SampleID of the tumor sample (needed for path lookup).
+    SubjectID = NULL,
+    SampleID_tumor = NULL,
+    #' @description Create a new Wf_umccrise object.
+    #' @param path Path to directory with raw workflow results (from GDS, S3, or
+    #' local filesystem).
+    #' @param SubjectID The SubjectID of the sample (needed for path lookup).
+    #' @param SampleID_tumor The SampleID of the tumor sample (needed for path lookup).
+    initialize = function(path = NULL, SubjectID = NULL, SampleID_tumor = NULL) {
+      wname <- "umccrise"
+      regexes <- tibble::tribble(
+        ~regex, ~fun,
+        "-chord\\.tsv\\.gz$", "chordtsv",
+        "-hrdetect\\.tsv\\.gz$", "hrdetecttsv",
+        "-snv_2015\\.tsv\\.gz$", "sigssnv2015tsv",
+        "-snv_2020\\.tsv\\.gz$", "sigssnv2020tsv",
+        "-dbs\\.tsv\\.gz$", "sigsdbstsv",
+        "-indel\\.tsv\\.gz$", "sigsindeltsv",
+        "-qc_summary\\.tsv\\.gz$", "qcsummarytsv",
+        "multiqc_conpair\\.txt$", "conpairmultiqc",
+        "-somatic\\.pcgr\\.json\\.gz$", "pcgrjson"
+      ) |>
+        dplyr::mutate(fun = paste0("read_", .data$fun))
+
+      super$initialize(path = path, wname = wname, regexes = regexes)
+      self$SubjectID <- SubjectID
+      self$SampleID_tumor <- SampleID_tumor
     },
-    #' @description Print details about the cancer_report_tables directory.
+    #' @description Print details about the Workflow.
     #' @param ... (ignored).
     print = function(...) {
-      bnames <- self$contents |>
-        dplyr::mutate(
-          low = tolower(.data$bname),
-        ) |>
-        dplyr::arrange(.data$low) |>
-        dplyr::mutate(
-          n = dplyr::row_number(),
-          bn = glue("{.data$n}. {.data$bname} ({.data$size})")
-        ) |>
-        dplyr::pull("bn")
-      cat("#--- UmccriseCanRepTables ---#\n")
-      cat(glue("Path: {self$path}"), "\n")
-      cat("Contents:\n")
-      cat(bnames, sep = "\n")
+      res <- tibble::tribble(
+        ~var, ~value,
+        "path", self$path,
+        "wname", self$wname,
+        "filesystem", self$filesystem,
+        "SubjectID", self$SubjectID,
+        "SampleID_tumor", self$SampleID_tumor
+      )
+      print(res)
       invisible(self)
     },
-    #' @description Returns file with given pattern from the cancer_report_tables directory.
-    #' @param pat File pattern to look for.
-    grep_file = function(pat) {
-      x <- self$contents |>
-        dplyr::filter(grepl(pat, .data$path)) |>
-        dplyr::pull(.data$path)
-      if (length(x) > 1) {
-        fnames <- paste(x, collapse = ", ")
-        cli::cli_abort("More than 1 match found for {pat} ({fnames}). Aborting.")
-      }
-      if (length(x) == 0) {
-        return("") # file.exists("") returns FALSE
-      }
-      return(x)
+    #' @description List dracarys files under given path
+    #' @param max_files Max number of files to list (for gds/s3 only).
+    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+    #' @param ... Passed on to the `gds_list_files_filter_relevant` or
+    #' the `s3_list_files_filter_relevant` function.
+    list_files_filter_relevant = function(max_files = 1000,
+                                          ica_token = Sys.getenv("ICA_ACCESS_TOKEN"), ...) {
+      path <- self$path
+      dir_final <- file.path(path, glue("{self$SubjectID}__{self$SampleID_tumor}"))
+      dir_work <- file.path(path, "work", glue("{self$SubjectID}__{self$SampleID_tumor}"))
+      dir_work_pcgr <- file.path(dir_work, "pcgr") # for pcgr json
+      f1 <- super$list_files_filter_relevant(path = dir_final, max_files = 300, ica_token = ica_token)
+      f2 <- super$list_files_filter_relevant(path = dir_work_pcgr, max_files = 50, ica_token = ica_token)
+      f_all <- dplyr::bind_rows(f1, f2)
+      return(f_all)
     },
-
-    #' @description Read `chord.tsv.gz` file output from umccrise.
-    #'
-    #' @param x (`character(1)`)\cr
-    #'   Path to `chord.tsv.gz` file.
+    #' @description Download files from GDS/S3 to local filesystem.
+    #' @param outdir Path to output directory.
+    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+    #' @param max_files Max number of files to list.
+    #' @param dryrun If TRUE, just list the files that will be downloaded (don't
+    #' download them).
+    #' @param recursive Should files be returned recursively _in and under_ the specified
+    #' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE via ICA API).
+    #' @param list_filter_fun Function to filter relevant files.
+    download_files = function(outdir, ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+                              max_files = 1000, dryrun = FALSE, recursive = NULL) {
+      super$download_files(
+        outdir = outdir, ica_token = ica_token, max_files = max_files,
+        dryrun = dryrun, recursive = recursive,
+        list_filter_fun = self$list_files_filter_relevant
+      )
+    },
+    #' @description Read `pcgr.json.gz` file.
+    #' @param x Path to file.
+    read_pcgrjson = function(x) {
+      j <- read_jsongz_jsonlite(x)
+      tmb <-
+        j[["content"]][["tmb"]][["variant_statistic"]] %||%
+        j[["content"]][["tmb"]][["v_stat"]] %||%
+        list(tmb_estimate = NA, n_tmb = NA)
+      tmb <- purrr::flatten(tmb) |>
+        tibble::as_tibble_row() |>
+        dplyr::select("tmb_estimate", "n_tmb")
+      msi <- j[["content"]][["msi"]][["prediction"]][["msi_stats"]]
+      # handle nulls
+      msi <- msi %||% list(fracIndels = NA, predicted_class = NA)
+      msi <- purrr::flatten(msi) |>
+        tibble::as_tibble_row() |>
+        dplyr::select("fracIndels", "predicted_class")
+      metrics <- dplyr::bind_cols(msi, tmb)
+      return(metrics)
+    },
+    #' @description Read `chord.tsv.gz` cancer report file.
+    #' @param x Path to file.
     read_chordtsv = function(x) {
       ct <- readr::cols_only(
         p_hrd = "d",
@@ -85,10 +161,8 @@ UmccriseCanRepTables <- R6::R6Class(
       )
       read_tsvgz(x, col_types = ct)
     },
-    #' @description Read `hrdetect.tsv.gz` file output from umccrise.
-    #'
-    #' @param x (`character(1)`)\cr
-    #'   Path to `hrdetect.tsv.gz` file.
+    #' @description Read `hrdetect.tsv.gz` cancer report file.
+    #' @param x Path to file.
     read_hrdetecttsv = function(x) {
       ct <- readr::cols(
         .default = "d",
@@ -97,24 +171,37 @@ UmccriseCanRepTables <- R6::R6Class(
       read_tsvgz(x, col_types = ct) |>
         dplyr::select(-c("sample"))
     },
-
-
-    #' @description Read `snv_20XX.tsv.gz` file output from umccrise.
-    #'
-    #' @param x (`character(1)`)\cr
-    #'   Path to `snv_20XX.tsv.gz` file.
-    read_sigs = function(x) {
+    #' @description Read signature cancer report file.
+    #' @param x Path to file.
+    read_sigstsv = function(x) {
       ct <- readr::cols(
         .default = "d",
         Signature = "c"
       )
       read_tsvgz(x, col_types = ct)
     },
-
-    #' @description Read `qc_summary.tsv.gz` file output from umccrise.
-    #'
-    #' @param x (`character(1)`)\cr
-    #'   Path to `qc_summary.tsv.gz` file.
+    #' @description Read `snv_2015.tsv.gz` sigs cancer report file.
+    #' @param x Path to file.
+    read_sigssnv2015tsv = function(x) {
+      self$read_sigstsv(x)
+    },
+    #' @description Read `snv_2020.tsv.gz` sigs cancer report file.
+    #' @param x Path to file.
+    read_sigssnv2020tsv = function(x) {
+      self$read_sigstsv(x)
+    },
+    #' @description Read `dbs.tsv.gz` sigs cancer report file.
+    #' @param x Path to file.
+    read_sigsdbstsv = function(x) {
+      self$read_sigstsv(x)
+    },
+    #' @description Read `indel.tsv.gz` sigs cancer report file.
+    #' @param x Path to file.
+    read_sigsindeltsv = function(x) {
+      self$read_sigstsv(x)
+    },
+    #' @description Read `qc_summary.tsv.gz` cancer report file.
+    #' @param x Path to file.
     read_qcsummarytsv = function(x) {
       d <- read_tsvgz(x, col_types = readr::cols(.default = "c"))
       d |>
@@ -147,48 +234,92 @@ UmccriseCanRepTables <- R6::R6Class(
           "hypermutated", "bpi_enabled"
         )
     },
-    #' @description
-    #' Reads contents of `cancer_report_tables` directory output by umccrise.
-    #'
-    #' @return A list of tibbles.
-    #' @export
-    read = function() {
-      # now return all as list elements
-      list(
-        chord = self$grep_file("-chord\\.tsv\\.gz$") |> self$read_chordtsv(),
-        hrdetect = self$grep_file("-hrdetect\\.tsv\\.gz$") |> self$read_hrdetecttsv(),
-        sigs2015 = self$grep_file("-snv_2015\\.tsv\\.gz$") |> self$read_sigs(),
-        sigs2020 = self$grep_file("-snv_2020\\.tsv\\.gz$") |> self$read_sigs(),
-        sigsdbs = self$grep_file("-dbs\\.tsv\\.gz$") |> self$read_sigs(),
-        sigsindel = self$grep_file("-indel\\.tsv\\.gz$") |> self$read_sigs(),
-        qcsum = self$grep_file("-qc_summary\\.tsv\\.gz$") |> self$read_qcsummarytsv()
+    #' @description Read multiqc_conpair.txt file.
+    #' @param x Path to file.
+    read_conpairmultiqc = function(x) {
+      um_ref_samples <- c("Alice", "Bob", "Chen", "Elon", "Dakota")
+      um_ref_samples <- paste0(um_ref_samples, rep(c("_T", "_B", ""), each = length(um_ref_samples)))
+      cnames <- list(
+        old = c(
+          "Sample", "concordance_concordance", "concordance_used_markers",
+          "concordance_total_markers", "concordance_marker_threshold",
+          "concordance_min_mapping_quality", "concordance_min_base_quality",
+          "contamination"
+        ),
+        new = c(
+          "sampleid", "contamination", "concordance", "markers_used",
+          "markers_total", "marker_threshold",
+          "mapq_min", "baseq_min"
+        )
       )
-    },
-
-    #' @description
-    #' Writes tidied contents of `cancer_report_tables` directory output by umccrise.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
+      ctypes <- list(
+        old = c("cddddddd"),
+        new = c("cddddddd")
+      )
+      if (!file.exists(x)) {
+        return(empty_tbl(cnames$new, ctypes$new))
       }
-      d_write <- d |>
-        tibble::enframe(name = "section") |>
-        dplyr::rowwise() |>
-        dplyr::mutate(
-          section_low = tolower(.data$section),
-          p = glue("{prefix}_{.data$section_low}"),
-          out = list(write_dracarys(obj = .data$value, prefix = .data$p, out_format = out_format, drid = drid))
-        ) |>
-        dplyr::ungroup() |>
-        dplyr::select("section", "value") |>
-        tibble::deframe()
-      invisible(d_write)
+      d1 <- readr::read_tsv(x, col_types = readr::cols(.default = "d", Sample = "c"))
+      assertthat::assert_that(all(colnames(d1) == cnames$old))
+      d1 |>
+        dplyr::filter(!.data$Sample %in% um_ref_samples) |>
+        dplyr::relocate("contamination", .after = "Sample") |>
+        rlang::set_names(cnames$new)
     }
-  )
+  ) # end public
 )
+
+#' umccrise Download Tidy and Write
+#'
+#' Downloads files from the `umccrise` workflow and writes them in a tidy format.
+#'
+#' @param path Path to directory with raw workflow results (from GDS, S3, or
+#' local filesystem).
+#' @param SubjectID The SubjectID of the sample (needed for path lookup).
+#' @param SampleID_tumor The SampleID of the tumor sample (needed for path lookup).
+#' @param outdir Path to output directory.
+#' @param max_files Max number of files to list.
+#' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+#' @param dryrun If TRUE, just list the files that will be downloaded (don't
+#' download them).
+#' @param format Format of output files.
+#' @return List where each element is a tidy tibble of a umccrise file.
+#'
+#' @examples
+#' \dontrun{
+#' SubjectID <- "SBJ03043"
+#' SampleID_tumor <- "PRJ230004"
+#' p1_gds <- glue("gds://production/analysis_data/{SubjectID}/umccrise")
+#' p <- file.path(p1_gds, "20240830ec648f40/L2300064__L2300063")
+#' outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+#' token <- Sys.getenv("ICA_ACCESS_TOKEN")
+#' d <- Wf_umccrise_download_tidy_write(
+#'   path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor,
+#'   outdir = outdir,
+#'   dryrun = F
+#' )
+#' }
+#' @export
+Wf_umccrise_download_tidy_write <- function(path, SubjectID, SampleID_tumor,
+                                            outdir, format = "rds", max_files = 1000,
+                                            ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+                                            dryrun = FALSE) {
+  um <- Wf_umccrise$new(
+    path = path, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor
+  )
+  d_dl <- um$download_files(
+    outdir = outdir, ica_token = ica_token,
+    max_files = max_files, dryrun = dryrun
+  )
+  if (!dryrun) {
+    d_tidy <- um$tidy_files(d_dl)
+    d_write <- um$write(
+      d_tidy,
+      outdir = file.path(outdir, "dracarys_tidy"),
+      prefix = glue("{SubjectID}__{SampleID_tumor}"),
+      format = format
+    )
+    return(d_write)
+  }
+  return(d_dl)
+}
diff --git a/R/utils.R b/R/utils.R
index f61fec9..378588b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -117,6 +117,34 @@ write_dracarys <- function(obj, prefix, out_format, drid = NULL) {
   return(invisible(obj))
 }
 
+#' Write List of Tidy Tibbles
+#'
+#' @param list_of_tbls List of tidy tibbles.
+#' @param out_dir Output directory.
+#' @param prefix Prefix of output file(s).
+#' @param out_format Format of output file(s).
+#' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
+#'
+#' @return Tibble with nested objects that have been written to the output directory.
+#' @export
+write_dracarys_list_of_tbls <- function(list_of_tbls, out_dir = NULL, prefix = NULL, out_format = "tsv", drid = NULL) {
+  assertthat::assert_that(!is.null(prefix))
+  if (!is.null(out_dir)) {
+    prefix <- file.path(out_dir, prefix)
+  }
+  d_write <- list_of_tbls |>
+    tibble::enframe(name = "section") |>
+    dplyr::rowwise() |>
+    dplyr::mutate(
+      section_low = tolower(.data$section),
+      p = glue("{prefix}_{.data$section_low}"),
+      out = list(write_dracarys(obj = .data$value, prefix = .data$p, out_format = out_format, drid = drid))
+    ) |>
+    dplyr::ungroup() |>
+    dplyr::select("section", "value") |>
+    tibble::deframe()
+  invisible(d_write)
+}
 
 #' Create Empty Tibble Given Column Names
 #'
@@ -165,7 +193,24 @@ read_jsongz_rjsonio <- function(x, ...) {
   RJSONIO::fromJSON(x, ...)
 }
 
-
+#' Grep File Pattern
+#'
+#' @param path Path to look for file.
+#' @param regexp A regular expression (e.g. [.]csv$) passed on to `grep()` to filter paths.
+#'
+#' @return The path to the file or an empty string if no match is found.
+#' @export
+grep_file <- function(path = ".", regexp) {
+  x <- fs::dir_ls(path, recurse = TRUE, type = "file", regexp = regexp)
+  if (length(x) > 1) {
+    fnames <- paste(x, collapse = ", ")
+    cli::cli_abort("More than 1 match found for {regexp} ({fnames}). Aborting.")
+  }
+  if (length(x) == 0) {
+    return("") # file.exists("") returns FALSE
+  }
+  return(x)
+}
 
 #' @noRd
 dummy1 <- function() {
diff --git a/inst/rmd/umccr_workflows/umccrise/.gitignore b/inst/rmd/umccr_workflows/umccrise/.gitignore
new file mode 100644
index 0000000..0182e8f
--- /dev/null
+++ b/inst/rmd/umccr_workflows/umccrise/.gitignore
@@ -0,0 +1,3 @@
+nogit
+
+/.quarto/
diff --git a/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R b/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R
new file mode 100755
index 0000000..3b5ede3
--- /dev/null
+++ b/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R
@@ -0,0 +1,79 @@
+#!/usr/bin/env Rscript
+
+{
+  require(dplyr)
+  require(assertthat, include.only = "assert_that")
+  require(dracarys, include.only = "Wf_umccrise_download_tidy_write")
+  require(glue, include.only = "glue")
+  require(here, include.only = "here")
+  require(rportal, include.only = c("portaldb_query_workflow"))
+  require(tidyr, include.only = "separate_wider_delim")
+}
+
+query_workflow_umccrise <- function(start_date, end_date) {
+  q1 <- glue(
+    "WHERE \"type_name\" = 'umccrise'",
+    "AND  \"start\" >= date(\'{start_date}\')",
+    "AND  \"end\" <= date(\'{end_date}\')",
+    "ORDER BY \"start\" DESC;"
+  )
+  rportal::portaldb_query_workflow(q1)
+}
+
+query_limsrow_libids <- function(libids) {
+  assertthat::assert_that(!is.null(libids), all(grepl("^L", libids)))
+  libids <- unique(libids) |>
+    paste(collapse = "|")
+  q1 <- glue("WHERE REGEXP_LIKE(\"library_id\", '{libids}');")
+  rportal::portaldb_query_limsrow(q1)
+}
+
+# first read in the workflows table, extract metadata, then join with lims
+start_date <- "2024-08-29"
+end_date <- "2024-09-01"
+meta_raw <- query_workflow_umccrise(start_date, end_date)
+meta <- meta_raw |>
+  rportal::meta_umccrise()
+lims_raw <- query_limsrow_libids(meta$LibraryID_tumor)
+lims <- lims_raw |>
+  tidyr::separate_wider_delim(
+    library_id,
+    delim = "_", names = c("library_id", "topup_or_rerun"), too_few = "align_start"
+  ) |>
+  select(
+    subject_id, library_id, sample_id, sample_name,
+    external_subject_id, external_sample_id,
+    project_name, project_owner,
+    source, quality, workflow
+  ) |>
+  distinct()
+table(lims$library_id %in% meta$LibraryID_tumor) # double-check
+
+meta_lims <- meta |>
+  left_join(lims, by = c("LibraryID_tumor" = "library_id")) |>
+  mutate(rownum = row_number()) |>
+  select(
+    rownum, wfr_id, version, end_status, start, end, portal_run_id, SubjectID, LibraryID_tumor, LibraryID_normal,
+    SampleID_tumor, SampleID_normal, gds_outdir_umccrise, gds_indir_dragen_somatic, external_subject_id, external_sample_id,
+    project_owner, project_name, source, quality, workflow
+  )
+meta_lims |>
+  saveRDS(here(glue("inst/rmd/umccr_workflows/umccrise/nogit/meta/{start_date}_{end_date}.rds")))
+
+d <- meta_lims |>
+  rowwise() |>
+  mutate(
+    indir = .data$gds_outdir_umccrise,
+    outdir = file.path(sub("gds://", "", .data$indir)),
+    outdir = file.path(normalizePath("~/icav1/g"), .data$outdir),
+    res = list(
+      dracarys::Wf_umccrise_download_tidy_write(
+        path = .data$indir, SubjectID = .data$SubjectID, SampleID_tumor = .data$SampleID_tumor,
+        outdir = .data$outdir, max_files = 1000, dryrun = FALSE
+      )
+    )
+  ) |>
+  ungroup()
+
+d |>
+  saveRDS(here(glue("inst/rmd/umccr_workflows/umccrise/nogit/results_{start_date}_{end_date}.rds")))
diff --git a/inst/rmd/umccr_workflows/umccrise/render.sh b/inst/rmd/umccr_workflows/umccrise/render.sh
new file mode 100644
index 0000000..bfff2c7
--- /dev/null
+++ b/inst/rmd/umccr_workflows/umccrise/render.sh
@@ -0,0 +1,9 @@
+date_start="2024-08-29"
+date_end="2024-09-01"
+out="umccrise_${date_start}_${date_end}.html"
+
+quarto render summary_umccrise.qmd \
+    -P date_start:${date_start} \
+    -P date_end:${date_end} \
+    -o ${out} \
+    --output-dir nogit/html
diff --git a/inst/rmd/umccr_workflows/umccrise/summary_umccrise.qmd b/inst/rmd/umccr_workflows/umccrise/summary_umccrise.qmd
new file mode 100644
index 0000000..0f9390e
--- /dev/null
+++ b/inst/rmd/umccr_workflows/umccrise/summary_umccrise.qmd
@@ -0,0 +1,369 @@
+---
+title: "{{< meta params.title >}}"
+subtitle: "Period: `r paste(params$date_start, ' to ', params$date_end)`"
+author: "UMCCR - Genomics Platform Group"
+date: now
+date-format: "YYYY-MM-DD HH:mm Z"
+execute:
+  echo: false
+format:
+  html:
+    toc: true
+    toc-expand: 1
+    toc-title: Contents
+    toc-location: body
+    highlight-style: github
+    number-sections: false
+    link-external-icon: true
+    link-external-newwindow: true
+    embed-resources: true
+    code-copy: true
+    code-link: true
+    code-fold: true
+    code-block-border-left: true
+    smooth-scroll: true
+    grid:
+      body-width: 1300px
+params:
+  title: "UMCCR umccrise Workflow Summary"
+  date_start: "XXXX-XX-XX"
+  date_end: "XXXX-XX-XX"
+---
+
+```{r}
+#| label: pkg_load
+#| message: false
+{
+  require(dplyr) # import all dplyr funcs
+  require(readr, include.only = c("read_rds"))
+  require(purrr, include.only = c("map"))
+  require(tidyr, include.only = c("unnest_wider"))
+  require(dracarys, include.only = c("session_info_kable"))
+  require(glue, include.only = "glue")
+  require(here, include.only = "here")
+  require(knitr, include.only = "kable")
+  require(reactable, include.only = "reactable")
+  require(ggplot2, include.only = c("ggplot", "aes"))
+  require(lubridate, include.only = c("as_datetime"))
+  require(plotly, include.only = c("ggplotly"))
+}
+set.seed(42)
+```
+
+```{r}
+#| label: load_data
+date_start <- "2024-08-29"
+date_end <- "2024-09-01"
+d_raw <- readr::read_rds(here(glue("inst/rmd/umccr_workflows/umccrise/nogit/results_{date_start}_{date_end}.rds")))
+```
+
+```{r}
+#| label: funcs
+dt_view <- function(x, id, ...) {
+  htmltools::browsable(
+    htmltools::tagList(
+      htmltools::tags$button(
+        htmltools::tagList(fontawesome::fa("download"), "CSV"),
+        onclick = glue("Reactable.downloadDataCSV('{id}', '{id}.csv')")
+      ),
+      x |>
+        reactable::reactable(
+          bordered = TRUE,
+          filterable = TRUE,
+          fullWidth = TRUE,
+          height = 800,
+          highlight = TRUE,
+          pagination = FALSE,
+          resizable = TRUE,
+          searchable = TRUE,
+          sortable = TRUE,
+          striped = TRUE,
+          wrap = FALSE,
+          elementId = id,
+          ...
+        )
+    )
+  )
+}
+```
+
+## Metadata
+
+```{r}
+#| label: metadata
+meta <- d_raw |>
+  select(
+    rownum, portal_run_id, SubjectID, LibraryID_tumor, SampleID_tumor, external_subject_id, external_sample_id,
+    project_owner, project_name, source, quality, workflow
+  )
+dt_view(meta, id = "metadata")
+```
+
+## Results
+
+```{r}
+#| label: process
+# one row per file type - not all samples have sigsdbstsv
+d <- d_raw |>
+  tidyr::unnest_longer(res, indices_to = "filetype")
+# main_cols <- c("rownum", "portal_run_id", "SubjectID", "LibraryID_tumor")
+main_cols <- c("portal_run_id")
+```
+
+```{r}
+#| label: qcsum
+qcsum <- d |>
+  filter(filetype == "qcsummarytsv") |>
+  select(all_of(main_cols), res) |>
+  unnest_wider(res)
+```
+
+```{r}
+#| label: pcgr
+pcgr <- d |>
+  filter(filetype == "pcgrjson") |>
+  select(all_of(main_cols), res) |>
+  unnest_wider(res) |>
+  rename(
+    msi_fraction_indels_pcgr = "fracIndels",
+    msi_pcgr = "predicted_class",
+    tmb_pcgr = "tmb_estimate",
+    n_tmb_pcgr = "n_tmb"
+  ) |>
+  mutate(msi_pcgr = sub(" \\(.*\\)", "", msi_pcgr))
+```
+
+```{r}
+#| label: conpair
+sampleids <- d_raw |>
+  select(SampleID_tumor, SampleID_normal) |>
+  tidyr::pivot_longer(everything(), values_to = "sampleid") |>
+  mutate(phenotype = sub("SampleID_", "", .data$name)) |>
+  select(sampleid, phenotype) |>
+  distinct()
+conpair_raw <- d |>
+  filter(filetype == "conpairmultiqc") |>
+  select(all_of(main_cols), res) |>
+  tidyr::unnest(res) |>
+  left_join(sampleids, by = "sampleid") |>
+  select(-sampleid)
+conpair_tumor <- conpair_raw |>
+  filter(phenotype == "tumor")
+conpair_normal <- conpair_raw |>
+  filter(phenotype == "normal") |>
+  select(portal_run_id, contamination)
+conpair <- conpair_tumor |>
+  left_join(conpair_normal, by = "portal_run_id", suffix = c("_tumor", "_normal")) |>
+  select(portal_run_id,
+    contamination_tumor_conpair = "contamination_tumor",
+    contamination_normal_conpair = "contamination_normal",
+    concordance_conpair = "concordance"
+  )
+```
+
+```{r}
+#| label: hrd
+hrd_chord <- d |>
+  filter(filetype == "chordtsv") |>
+  unnest_wider(res) |>
+  select(all_of(main_cols),
+    chord_p_hrd = "p_hrd",
+    chord_hr_status = "hr_status",
+    chord_hrd_type = "hrd_type",
+    chord_p_BRCA1 = "p_BRCA1",
+    chord_p_BRCA2 = "p_BRCA2"
+  )
+hrd_hrdetect <- d |>
+  filter(filetype == "hrdetecttsv") |>
+  unnest_wider(res) |>
+  select(all_of(main_cols), hrdetect_prob = "Probability")
+```
+
+```{r}
+#| label: sigs
+sigs_snv2015 <- d |>
+  filter(filetype == "sigssnv2015tsv") |>
+  select(all_of(main_cols), res) |>
+  tidyr::unnest_wider(res) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
+sigs_snv2020 <- d |>
+  filter(filetype == "sigssnv2020tsv") |>
+  select(all_of(main_cols), res) |>
+  tidyr::unnest_wider(res) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
+sigs_dbs <- d |>
+  filter(filetype == "sigsdbstsv") |>
+  select(all_of(main_cols), res) |>
+  tidyr::unnest_wider(res) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
+sigs_indel <- d |>
+  filter(filetype == "sigsindeltsv") |>
+  select(all_of(main_cols), res) |>
+  tidyr::unnest_wider(res) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
+dsig <- bind_rows(
+  list(
+    snv2015 = sigs_snv2015, snv2020 = sigs_snv2020, dbs = sigs_dbs, indel = sigs_indel
+  ),
+  .id = "Sig_group"
+)
+
+# keep top two ranked sigs
+dsig_filt <- dsig |>
+  group_by(Sig_group, portal_run_id) |>
+  mutate(tot_sig_vars = sum(Contribution)) |>
+  arrange(Rank) |>
+  slice_head(n = 2) |>
+  # some sigs have same Rank so use explicit sig_rank
+  mutate(sig_rank = row_number()) |>
+  ungroup() |>
+  mutate(
+    sig_summary = glue("{Signature} ({RelFreq} = {Contribution} / {tot_sig_vars})")
+  ) |>
+  select(Sig_group, portal_run_id, sig_rank, sig_summary) |>
+  tidyr::pivot_wider(names_from = sig_rank, values_from = sig_summary, names_prefix = "rank") |>
+  mutate(sig_top2 = paste(rank1, rank2, sep = ", ")) |>
+  select(Sig_group, portal_run_id, sig_top2) |>
+  tidyr::pivot_wider(names_from = Sig_group, values_from = sig_top2) |>
+  select(portal_run_id, snv2015, snv2020, dbs, indel)
+```
+
+```{r}
+#| label: qc_all
+dall <- d_raw |>
+  select(
+    rownum,
+    date_analysed = "start", portal_run_id,
+    SubjectID, LibraryID_tumor, SampleID_tumor,
+    external_subject_id, external_sample_id,
+    project_owner, project_name, source, quality, workflow
+  ) |>
+  left_join(qcsum, by = "portal_run_id") |>
+  left_join(hrd_chord, by = "portal_run_id") |>
+  left_join(hrd_hrdetect, by = "portal_run_id") |>
+  left_join(pcgr, by = "portal_run_id") |>
+  left_join(conpair, by = "portal_run_id")
+```
+
+### Summary Metrics
+
+```{r}
+#| label: summary_metrics
+dt_view(dall, "summary_metrics")
+```
+
+### HRD Plot
+
+```{r}
+#| label: hrd_plot
+#| fig-width: 15
+#| fig-height: 15
+p1 <- dall |>
+  mutate(sbj = glue("{SubjectID}_{LibraryID_tumor}")) |>
+  select(date_analysed, sbj, chord = hrd_chord, hrdetect = hrd_hrdetect) |>
+  tidyr::pivot_longer(chord:hrdetect, names_to = "method", values_to = "probability") |>
+  ggplot2::ggplot(aes(x = date_analysed, y = probability, label = sbj)) +
+  ggplot2::geom_point(aes(colour = method)) +
+  ggplot2::geom_line(aes(group = sbj), linewidth = 0.05) +
+  ggplot2::theme_bw() +
+  ggplot2::ggtitle("CHORD vs. HRDetect per SubjectID")
+
+plotly::ggplotly(p1)
+```
+
+### Signatures
+
+#### All (SNV, Indel, DBS)
+
+```{r}
+#| label: sig_results_all
+dsig |>
+  left_join(meta |> select(rownum, portal_run_id, SubjectID, LibraryID_tumor),
+    by = "portal_run_id"
+  ) |>
+  select(rownum, portal_run_id, SubjectID, LibraryID_tumor, everything()) |>
+  dt_view("sig_results_all")
+```
+
+#### Top 2
+
+```{r}
+#| label: sig_results_top2
+dsig_filt |>
+  left_join(meta |> select(rownum, portal_run_id, SubjectID, LibraryID_tumor),
+    by = "portal_run_id"
+  ) |>
+  select(rownum, portal_run_id, SubjectID, LibraryID_tumor, everything()) |>
+  arrange(rownum) |>
+  dt_view("sig_results_top2")
+```
+
+#### Top 3 SNV2015
+
+```{r}
+#| label: sig_results
+#| fig-width: 15
+#| fig-height: 65
+
+sig_order2015 <- paste0("Sig", 1:30)
+# sig_order2020 <- paste0(
+#  "SBS",
+#  c(
+#    1:6,
+#    paste0(7, c("a", "b", "c", "d")),
+#    8:9,
+#    paste0(10, c("a", "b", "c", "d")),
+#    11:16,
+#    paste0(17, c("a", "b")),
+#    18:60,
+#    84:94
+#  )
+# )
+
+p2_prep <- dsig |>
+  filter(
+    Sig_group == "snv2015",
+    Rank %in% c(1:3)
+  ) |>
+  left_join(dall |> select(portal_run_id, date_analysed, SubjectID, LibraryID_tumor), by = "portal_run_id") |>
+  mutate(sbj = as.character(glue("{SubjectID}_{LibraryID_tumor}"))) |>
+  select(date_analysed, sbj, Sig_group, Rank, Signature, Contribution, RelFreq) |>
+  mutate(Signature = factor(Signature, levels = sig_order2015))
+p2 <- p2_prep |>
+  ggplot2::ggplot(aes(x = Contribution, y = sbj, fill = Signature, text = sbj)) +
+  ggplot2::geom_bar(position = "fill", stat = "identity") +
+  ggplot2::theme_bw(base_size = 7)
+
+plotly::ggplotly(p2, tooltip = c("x", "text", "fill"))
+```
+
+## Metadata Summary
+
+::: {.panel-tabset .nav-pills}
+
+### Project Name/Owner
+
+```{r}
+#| label: project_owner_name
+dall |>
+  count(project_name, project_owner) |>
+  knitr::kable()
+```
+
+### Source / Quality
+
+```{r}
+#| label: source_quality
+count(dall, source, quality) |> knitr::kable()
+```
+
+### Workflow
+
+```{r}
+#| label: workflow_summary
+count(dall, workflow) |> knitr::kable()
+```
+
+:::
+
+</div>
diff --git a/inst/scripts/umccrise_run.R b/inst/scripts/umccrise_run.R
index 6602d87..ea4d2f4 100644
--- a/inst/scripts/umccrise_run.R
+++ b/inst/scripts/umccrise_run.R
@@ -1,74 +1,62 @@
 require(dracarys)
-require(here)
-require(glue)
+require(rportal, include.only = "portaldb_query_workflow")
+require(here, include.only = "here")
+require(glue, include.only = "glue")
+require(readr, include.only = "read_rds")
 require(dplyr)
-require(readr)
-
-#---- GDS ----#
-# read last 1000 umccrise runs from portal
-# 475 from 2022-01-24 until 2023-09-03, of which 449 Succeeded
-date1 <- "2023-09-04"
-pmeta_raw_rds <- here(glue("nogit/umccrise/rds/portal_meta/{date1}_pmeta_raw.rds"))
-# pmeta_raw <- dracarys::portal_meta_read(rows = 1000, params = "&type_name=umccrise")
-# saveRDS(pmeta_raw, file = pmeta_raw_rds)
-pmeta <- readr::read_rds(pmeta_raw_rds) |>
-  dracarys::meta_umccrise(status = "Succeeded")
-lims_raw_rds <- here(glue("nogit/umccrise/rds/lims/{date1}_lims_raw.rds"))
-# lims_raw <- dracarys::glims_read()
+require(tidyr, include.only = "separate_wider_delim")
+
+start_date <- "2024-08-29"
+query_workflow_umccrise <- function(start_date) {
+  q1 <- glue(
+    "WHERE \"type_name\" = 'umccrise' AND  \"start\" > date(\'{start_date}\') ",
+    "ORDER BY \"start\" DESC;"
+  )
+  rportal::portaldb_query_workflow(q1)
+}
+
+query_limsrow_libids <- function(libids) {
+  assertthat::assert_that(!is.null(libids), all(grepl("^L", libids)))
+  libids <- unique(libids) |>
+    paste(collapse = "|")
+  q1 <- glue("WHERE REGEXP_LIKE(\"library_id\", '{libids}');")
+  rportal::portaldb_query_limsrow(q1)
+}
+
+# p_raw <- query_workflow_umccrise(start_date)
+p_raw_rds <- here(glue("nogit/data_portal/workflows/{start_date}.rds"))
+# saveRDS(p_raw, file = p_raw_rds)
+p_raw <- readr::read_rds(p_raw_rds)
+
+p <- p_raw |>
+  rportal::meta_umccrise(status = "Succeeded")
+# lims_raw <- query_limsrow_libids(p$LibraryID_tumor)
+lims_raw_rds <- here(glue("nogit/data_portal/lims/{start_date}.rds"))
 # saveRDS(lims_raw, file = lims_raw_rds)
+# L2100192 is L2100192_rerun in the lims, 15 libs are rerun/topup/topup2
 lims_raw <- readr::read_rds(lims_raw_rds)
 lims <- lims_raw |>
-  filter(Type == "WGS") |>
-  filter(LibraryID %in% c(pmeta$LibraryID_normal, pmeta$LibraryID_tumor))
-table(pmeta$LibraryID_tumor %in% lims$LibraryID)
-table(pmeta$LibraryID_normal %in% lims$LibraryID)
-
-# The final results sit under gds_outdir_umccrise/<SubjectID>__<SampleID_tumor>/
-# We need to get the SampleID_tumor for runs before 2023-04-07. We can do that
-# by using the LibraryID_tumor to match up with the glims.
-missing_tumor_sampleid <- pmeta |>
-  filter(end < "2023-04-07") |>
-  pull(LibraryID_tumor)
-
-table(missing_tumor_sampleid %in% lims$LibraryID)
-libid2sampid <- lims |>
-  filter(LibraryID %in% missing_tumor_sampleid) |>
-  select(LibraryID_tumor = LibraryID, SampleID_tumor = SampleID)
-
-d <- pmeta |>
-  left_join(libid2sampid, by = "LibraryID_tumor") |>
-  mutate(SampleID_tumor = if_else(is.na(SampleID_tumor.x), SampleID_tumor.y, SampleID_tumor.x)) |>
-  select(-c(SampleID_tumor.x, SampleID_tumor.y)) |>
-  relocate(SampleID_tumor, .before = SampleID_normal) |>
-  mutate(gds_outdir_umccrise = glue("{.data$gds_outdir_umccrise}/{.data$SubjectID}__{.data$SampleID_tumor}"))
-d
-
-# final portal meta for umccrise runs
-# columns:
-# "id", "wfr_name", "wfr_id", "version", "end_status", "start", "end", "portal_run_id",
-# "SubjectID", "LibraryID_tumor", "LibraryID_normal", "SampleID_tumor", "SampleID_normal",
-# "gds_outdir_umccrise", "gds_indir_dragen_somatic", "gds_indir_dragen_germline", "gds_infile_genomes_tar"
-saveRDS(d, file = here(glue("nogit/umccrise/rds/portal_meta/{date1}_pmeta_final.rds")))
-
-#---- S3 ----#
-pat <- "qc_summary.tsv.gz"
-rows <- 1000
-d_s3_raw <- dracarys::s3_search(pat = pat, rows = rows)
-
-d_s3 <- d_s3_raw |>
-  arrange(desc(date_aest)) |>
-  mutate(
-    bname = basename(path),
-    dir1 = dirname(path), # path/to/dirA/cancer_report_tables
-    dir2 = basename(dirname(dir1)), # dirA
-    sbj_samp_lib = sub(".*__(.*)", "\\1", dir2),
-    SubjectID = sub("(SBJ[0-9]{5})_.*", "\\1", sbj_samp_lib),
-    SampleID_tumor = sub("SBJ.*?_(.*?)_.*", "\\1", sbj_samp_lib),
-    LibraryID_tumor = sub("SBJ.*?_.*?_(.*)", "\\1", sbj_samp_lib),
-    rerun = grepl("rerun", .data$LibraryID_tumor)
+  tidyr::separate_wider_delim(
+    library_id,
+    delim = "_", names = c("library_id", "topup_or_rerun"), too_few = "align_start"
   ) |>
-  select(dir1, SubjectID, LibraryID_tumor, SampleID_tumor, date = date_aest, rerun)
+  select(
+    subject_id, library_id, sample_id, sample_name,
+    external_subject_id, external_sample_id,
+    project_name, project_owner,
+    source, quality
+  ) |>
+  distinct()
+table(lims$library_id %in% p$LibraryID_tumor) # double-check
+
+d <- p |>
+  left_join(lims, by = c("LibraryID_tumor" = "library_id")) |>
+  mutate(gds_outdir_umccrise = glue("{.data$gds_outdir_umccrise}/{.data$SubjectID}__{.data$SampleID_tumor}")) |>
+  select(
+    wfr_id, version, end_status, start, end, portal_run_id, SubjectID, LibraryID_tumor, LibraryID_normal,
+    SampleID_tumor, SampleID_normal, gds_outdir_umccrise, gds_indir_dragen_somatic, external_subject_id, external_sample_id,
+    project_owner, project_name, source, quality
+  )
+d
 
-date2 <- "2023-09-12"
-saveRDS(d_s3, file = here(glue("nogit/umccrise/rds/portal_meta/{date2}_pmeta_s3.rds")))
-# now we have S3 paths and metadata, so all we need is to generate presigned URLs and read the data
+saveRDS(d, file = here(glue("nogit/data_portal/workflows/umccrise_tidy_{start_date}.rds")))
diff --git a/man/BclconvertReports375.Rd b/man/BclconvertReports375.Rd
index 01aba8b..e644a84 100644
--- a/man/BclconvertReports375.Rd
+++ b/man/BclconvertReports375.Rd
@@ -10,8 +10,8 @@ https://support-docs.illumina.com/SW/dragen_v42/Content/SW/DRAGEN/OutputFiles.ht
 }
 \examples{
 \dontrun{
-p1 <- "240816_A01052_0220_AHM7VHDSXC/202408195d4f2fc4/Reports"
-b <- here::here("nogit/bcl_convert", p1) |>
+p1 <- "nogit/bcl_convert/WGS_TsqNano/Reports"
+b <- here::here(p1) |>
   BclconvertReports375$new()
 b$path
 b$contents
diff --git a/man/UmccriseCanRepTables.Rd b/man/UmccriseCanRepTables.Rd
deleted file mode 100644
index 67e20f3..0000000
--- a/man/UmccriseCanRepTables.Rd
+++ /dev/null
@@ -1,231 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/umccrise.R
-\name{UmccriseCanRepTables}
-\alias{UmccriseCanRepTables}
-\title{UmccriseCanRepTables R6 Class}
-\description{
-Reads and writes tidy versions of files within the \code{cancer_report_tables} directory
-output from the \code{umccrise} workflow.
-}
-\examples{
-\dontrun{
-p1 <- "~/icav1/g/production/analysis_data/SBJ01155/umccrise/202408300c218043"
-p2 <- "L2101566__L2101565/SBJ01155__PRJ211091/cancer_report_tables"
-p <- file.path(p1, p2)
-obj <- UmccriseCanRepTables$new(p)
-obj$path
-obj$contents
-d <- obj$read()
-obj$write(d, out_dir = tempdir(), prefix = "sampleA", out_format = "tsv")
-}
-
-}
-\section{Public fields}{
-\if{html}{\out{<div class="r6-fields">}}
-\describe{
-\item{\code{path}}{Path to the \code{cancer_report_tables} directory.}
-
-\item{\code{contents}}{Tibble with file path, basename, and size.}
-}
-\if{html}{\out{</div>}}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-UmccriseCanRepTables-new}{\code{UmccriseCanRepTables$new()}}
-\item \href{#method-UmccriseCanRepTables-print}{\code{UmccriseCanRepTables$print()}}
-\item \href{#method-UmccriseCanRepTables-grep_file}{\code{UmccriseCanRepTables$grep_file()}}
-\item \href{#method-UmccriseCanRepTables-read_chordtsv}{\code{UmccriseCanRepTables$read_chordtsv()}}
-\item \href{#method-UmccriseCanRepTables-read_hrdetecttsv}{\code{UmccriseCanRepTables$read_hrdetecttsv()}}
-\item \href{#method-UmccriseCanRepTables-read_sigs}{\code{UmccriseCanRepTables$read_sigs()}}
-\item \href{#method-UmccriseCanRepTables-read_qcsummarytsv}{\code{UmccriseCanRepTables$read_qcsummarytsv()}}
-\item \href{#method-UmccriseCanRepTables-read}{\code{UmccriseCanRepTables$read()}}
-\item \href{#method-UmccriseCanRepTables-write}{\code{UmccriseCanRepTables$write()}}
-\item \href{#method-UmccriseCanRepTables-clone}{\code{UmccriseCanRepTables$clone()}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-new"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-new}{}}}
-\subsection{Method \code{new()}}{
-Create a new UmccriseCanRepTables object.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$new(path = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{path}}{Path to the \code{cancer_report_tables} directory.}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-print"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-print}{}}}
-\subsection{Method \code{print()}}{
-Print details about the cancer_report_tables directory.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$print(...)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{...}}{(ignored).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-grep_file"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-grep_file}{}}}
-\subsection{Method \code{grep_file()}}{
-Returns file with given pattern from the cancer_report_tables directory.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$grep_file(pat)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{pat}}{File pattern to look for.}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-read_chordtsv"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-read_chordtsv}{}}}
-\subsection{Method \code{read_chordtsv()}}{
-Read \code{chord.tsv.gz} file output from umccrise.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$read_chordtsv(x)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{x}}{(\code{character(1)})\cr
-Path to \code{chord.tsv.gz} file.}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-read_hrdetecttsv"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-read_hrdetecttsv}{}}}
-\subsection{Method \code{read_hrdetecttsv()}}{
-Read \code{hrdetect.tsv.gz} file output from umccrise.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$read_hrdetecttsv(x)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{x}}{(\code{character(1)})\cr
-Path to \code{hrdetect.tsv.gz} file.}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-read_sigs"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-read_sigs}{}}}
-\subsection{Method \code{read_sigs()}}{
-Read \code{snv_20XX.tsv.gz} file output from umccrise.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$read_sigs(x)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{x}}{(\code{character(1)})\cr
-Path to \code{snv_20XX.tsv.gz} file.}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-read_qcsummarytsv"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-read_qcsummarytsv}{}}}
-\subsection{Method \code{read_qcsummarytsv()}}{
-Read \code{qc_summary.tsv.gz} file output from umccrise.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$read_qcsummarytsv(x)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{x}}{(\code{character(1)})\cr
-Path to \code{qc_summary.tsv.gz} file.}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-read"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-read}{}}}
-\subsection{Method \code{read()}}{
-Reads contents of \code{cancer_report_tables} directory output by umccrise.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-A list of tibbles.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-write"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-write}{}}}
-\subsection{Method \code{write()}}{
-Writes tidied contents of \code{cancer_report_tables} directory output by umccrise.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$write(
-  d,
-  out_dir = NULL,
-  prefix,
-  out_format = "tsv",
-  drid = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-UmccriseCanRepTables-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-UmccriseCanRepTables-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{UmccriseCanRepTables$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/Wf.Rd b/man/Wf.Rd
index d5c99ae..f785399 100644
--- a/man/Wf.Rd
+++ b/man/Wf.Rd
@@ -2,38 +2,83 @@
 % Please edit documentation in R/Wf.R
 \name{Wf}
 \alias{Wf}
-\title{Workflow R6 Class}
+\title{Workflow}
 \description{
 Workflow is a base R6 class representing a bioinformatic
 workflow run from a UMCCR workflow manager.
+
+A workflow has:
+\itemize{
+\item a directory path with all the raw output files (either on GDS, S3 or
+local filesystem)
+\item a subset of files that are of interest for ingestion
+\itemize{
+\item tibble with full path and basename columns
+}
+\item a set of parsers that can parse and tidy those files
+\itemize{
+\item each parser takes a path and returns a tidy tibble
+}
+\item a list of tidy tibbles (or a tibble with nested tibbles)
+}
 }
 \examples{
-p1 <- system.file("extdata/portaldb_workflow_top4.rds", package = "rportal") |>
-  readRDS() |>
-  dplyr::filter(type_name == "umccrise") |>
-  dplyr::slice(1)
-w <- Wf$new(
-  prid = p1$portal_run_id, type = p1$type_name, start = p1$start, end = p1$end,
-  status = p1$end_status, input = p1$input, output = p1$output
+\dontrun{
+regexes <- tibble::tribble(
+  ~regex, ~fun,
+  "-chord\\\\.tsv\\\\.gz$", "UmChordTsvFile",
+  "-hrdetect\\\\.tsv\\\\.gz$", "UmHrdetectTsvFile",
+  "-snv_2015\\\\.tsv\\\\.gz$", "UmSigsSnvFile",
+  "-snv_2020\\\\.tsv\\\\.gz$", "UmSigsSnvFile",
+  "-dbs\\\\.tsv\\\\.gz$", "UmSigsDbsFile",
+  "-indel\\\\.tsv\\\\.gz$", "UmSigsIndelFile",
+  "-qc_summary\\\\.tsv\\\\.gz$", "UmQcSumFile",
+)
+
+#---- LOCAL ----#
+p1_local <- "~/icav1/g/production/analysis_data"
+p <- file.path(p1_local, "SBJ01155/umccrise/202408300c218043/L2101566__L2101565")
+um1 <- Wf$new(path = p, wname = "umccrise", regexes = regexes)
+um1$list_files(max_files = 10)
+um1$list_files_filter_relevant(max_files = 10)
+
+#---- GDS ----#
+p1_gds <- "gds://production/analysis_data"
+p <- file.path(p1_gds, "SBJ03043/umccrise/20240830ec648f40/L2300064__L2300063")
+outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+token <- Sys.getenv("ICA_ACCESS_TOKEN")
+um2 <- Wf$new(path = p, wname = "umccrise", regexes = regexes)
+um2$list_files(max_files = 10)
+um2$list_files_filter_relevant(ica_token = token, max_files = 500)
+d <- um2$download_files(
+  outdir = outdir, ica_token = token,
+  max_files = 1000, dryrun = T
 )
-w
+d_tidy <- um2$tidy_files(d)
+
+#---- S3 ----#
+p1_s3 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05570/sash/202408275fce06c3"
+p2_s3 <- "L2401304_L2401303/SBJ05570_MDX240299/cancer_report/cancer_report_tables"
+p <- file.path(p1_s3, p2_s3)
+outdir <- sub("s3:/", "~/s3", p)
+um3 <- Wf$new(path = p, wname = "sash", regexes = regexes)
+um3$list_files(max_files = 10)
+um3$list_files_filter_relevant(max_files = 50)
+d <- um3$download_files(outdir = outdir, regexes = regexes, max_files = 50, dryrun = F)
+}
+
 }
 \section{Public fields}{
 \if{html}{\out{<div class="r6-fields">}}
 \describe{
-\item{\code{prid}}{Portal run ID.}
-
-\item{\code{type}}{Workflow type.}
-
-\item{\code{start}}{Workflow start datetime.}
+\item{\code{path}}{Path to directory with raw workflow results (from GDS, S3, or
+local filesystem).}
 
-\item{\code{end}}{Workflow end datetime.}
+\item{\code{wname}}{Name of workflow (e.g. umccrise, sash).}
 
-\item{\code{status}}{Workflow end status.}
+\item{\code{filesystem}}{Filesystem of \code{path} (gds/s3/local).}
 
-\item{\code{input}}{Workflow input JSON string.}
-
-\item{\code{output}}{Workflow output JSON string.}
+\item{\code{regexes}}{Tibble with file \code{regex} and \code{fun}ction to parse it.}
 }
 \if{html}{\out{</div>}}
 }
@@ -42,6 +87,11 @@ w
 \itemize{
 \item \href{#method-Wf-new}{\code{Wf$new()}}
 \item \href{#method-Wf-print}{\code{Wf$print()}}
+\item \href{#method-Wf-list_files}{\code{Wf$list_files()}}
+\item \href{#method-Wf-list_files_filter_relevant}{\code{Wf$list_files_filter_relevant()}}
+\item \href{#method-Wf-download_files}{\code{Wf$download_files()}}
+\item \href{#method-Wf-tidy_files}{\code{Wf$tidy_files()}}
+\item \href{#method-Wf-write}{\code{Wf$write()}}
 \item \href{#method-Wf-clone}{\code{Wf$clone()}}
 }
 }
@@ -51,33 +101,17 @@ w
 \subsection{Method \code{new()}}{
 Create a new Workflow object.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf$new(
-  prid = NULL,
-  type = NULL,
-  start = NULL,
-  end = NULL,
-  status = NULL,
-  input = NULL,
-  output = NULL
-)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf$new(path = NULL, wname = NULL, regexes = NULL)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
-\item{\code{prid}}{Portal run ID.}
-
-\item{\code{type}}{Workflow type.}
+\item{\code{path}}{Path to directory with raw workflow results.}
 
-\item{\code{start}}{Workflow start datetime.}
+\item{\code{wname}}{Name of workflow.}
 
-\item{\code{end}}{Workflow end datetime.}
-
-\item{\code{status}}{Workflow end status.}
-
-\item{\code{input}}{Workflow input JSON string.}
-
-\item{\code{output}}{Workflow output JSON string.}
+\item{\code{regexes}}{Tibble with file \code{regex} and \code{fun}ction to parse it.}
 }
 \if{html}{\out{</div>}}
 }
@@ -100,6 +134,144 @@ Print details about the Workflow.
 }
 }
 \if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf-list_files"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf-list_files}{}}}
+\subsection{Method \code{list_files()}}{
+List all files under given path.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf$list_files(
+  path = self$path,
+  max_files = 1000,
+  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  ...
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{path}}{Path with raw results.}
+
+\item{\code{max_files}}{Max number of files to list (for gds/s3 only).}
+
+\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{\code{...}}{Passed on to \code{gds_list_files_dir} function.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf-list_files_filter_relevant"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf-list_files_filter_relevant}{}}}
+\subsection{Method \code{list_files_filter_relevant()}}{
+List dracarys files under given path
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf$list_files_filter_relevant(
+  path = self$path,
+  max_files = 1000,
+  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  ...
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{path}}{Path with raw results.}
+
+\item{\code{max_files}}{Max number of files to list (for gds/s3 only).}
+
+\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{\code{...}}{Passed on to the \code{gds_list_files_filter_relevant} or
+the \code{s3_list_files_filter_relevant} function.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf-download_files"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf-download_files}{}}}
+\subsection{Method \code{download_files()}}{
+Download files from GDS/S3 to local filesystem.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf$download_files(
+  path = self$path,
+  outdir,
+  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  max_files = 1000,
+  dryrun = FALSE,
+  recursive = NULL,
+  list_filter_fun = NULL
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{path}}{Path with raw results.}
+
+\item{\code{outdir}}{Path to output directory.}
+
+\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{\code{max_files}}{Max number of files to list.}
+
+\item{\code{dryrun}}{If TRUE, just list the files that will be downloaded (don't
+download them).}
+
+\item{\code{recursive}}{Should files be returned recursively \emph{in and under} the specified
+GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
+
+\item{\code{list_filter_fun}}{Function to filter relevant files.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf-tidy_files"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf-tidy_files}{}}}
+\subsection{Method \code{tidy_files()}}{
+Tidy given files.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf$tidy_files(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Tibble with \code{localpath} to file and the function \code{type} to parse it.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf-write"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf-write}{}}}
+\subsection{Method \code{write()}}{
+Write tidy data.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf$write(x, outdir = NULL, prefix = NULL, format = "tsv", drid = NULL)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Tibble with tidy \code{data} and file \code{type}.}
+
+\item{\code{outdir}}{Directory path to output tidy files.}
+
+\item{\code{prefix}}{Prefix of output files.}
+
+\item{\code{format}}{Format of output files.}
+
+\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-Wf-clone"></a>}}
 \if{latex}{\out{\hypertarget{method-Wf-clone}{}}}
 \subsection{Method \code{clone()}}{
diff --git a/man/Wf_umccrise.Rd b/man/Wf_umccrise.Rd
new file mode 100644
index 0000000..5fd5ade
--- /dev/null
+++ b/man/Wf_umccrise.Rd
@@ -0,0 +1,383 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/umccrise.R
+\name{Wf_umccrise}
+\alias{Wf_umccrise}
+\title{Wf_umccrise R6 Class}
+\description{
+Reads and writes tidy versions of files from the \code{umccrise} workflow
+}
+\examples{
+\dontrun{
+
+#---- LOCAL ----#
+SubjectID <- "SBJ03043"
+SampleID_tumor <- "PRJ230004"
+prefix <- glue("{SubjectID}__{SampleID_tumor}")
+p1_local <- "~/icav1/g/production/analysis_data"
+p <- file.path(p1_local, "SBJ03043/umccrise/20240830ec648f40/L2300064__L2300063")
+um1 <- Wf_umccrise$new(path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor)
+um1$list_files(max_files = 10)
+um1$list_files_filter_relevant()
+d <- um1$download_files(max_files = 1000, dryrun = F)
+d_tidy <- um1$tidy_files(d)
+d_write <- um1$write(
+  d_tidy,
+  outdir = file.path(p, "dracarys_tidy"),
+  prefix = glue("{SubjectID}__{SampleID_tumor}"),
+  format = "tsv"
+)
+
+#---- GDS ----#
+SubjectID <- "SBJ03043"
+SampleID_tumor <- "PRJ230004"
+prefix <- glue("{SubjectID}__{SampleID_tumor}")
+p1_gds <- "gds://production/analysis_data"
+p <- file.path(p1_gds, "SBJ03043/umccrise/20240830ec648f40/L2300064__L2300063")
+outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+token <- Sys.getenv("ICA_ACCESS_TOKEN")
+um2 <- Wf_umccrise$new(path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor)
+um2$list_files(max_files = 8)
+um2$list_files_filter_relevant(ica_token = token, max_files = 500)
+d <- um2$download_files(
+  outdir = outdir, ica_token = token,
+  max_files = 1000, dryrun = F
+)
+d_tidy <- um2$tidy_files(d)
+d_write <- um2$write(
+  d_tidy,
+  outdir = file.path(outdir, "dracarys_tidy"),
+  prefix = glue("{SubjectID}__{SampleID_tumor}"),
+  format = "tsv"
+)
+}
+
+}
+\section{Super class}{
+\code{\link[dracarys:Wf]{dracarys::Wf}} -> \code{Wf_umccrise}
+}
+\section{Public fields}{
+\if{html}{\out{<div class="r6-fields">}}
+\describe{
+\item{\code{SubjectID}}{The SubjectID of the sample (needed for path lookup).}
+
+\item{\code{SampleID_tumor}}{The SampleID of the tumor sample (needed for path lookup).}
+}
+\if{html}{\out{</div>}}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-Wf_umccrise-new}{\code{Wf_umccrise$new()}}
+\item \href{#method-Wf_umccrise-print}{\code{Wf_umccrise$print()}}
+\item \href{#method-Wf_umccrise-list_files_filter_relevant}{\code{Wf_umccrise$list_files_filter_relevant()}}
+\item \href{#method-Wf_umccrise-download_files}{\code{Wf_umccrise$download_files()}}
+\item \href{#method-Wf_umccrise-read_pcgrjson}{\code{Wf_umccrise$read_pcgrjson()}}
+\item \href{#method-Wf_umccrise-read_chordtsv}{\code{Wf_umccrise$read_chordtsv()}}
+\item \href{#method-Wf_umccrise-read_hrdetecttsv}{\code{Wf_umccrise$read_hrdetecttsv()}}
+\item \href{#method-Wf_umccrise-read_sigstsv}{\code{Wf_umccrise$read_sigstsv()}}
+\item \href{#method-Wf_umccrise-read_sigssnv2015tsv}{\code{Wf_umccrise$read_sigssnv2015tsv()}}
+\item \href{#method-Wf_umccrise-read_sigssnv2020tsv}{\code{Wf_umccrise$read_sigssnv2020tsv()}}
+\item \href{#method-Wf_umccrise-read_sigsdbstsv}{\code{Wf_umccrise$read_sigsdbstsv()}}
+\item \href{#method-Wf_umccrise-read_sigsindeltsv}{\code{Wf_umccrise$read_sigsindeltsv()}}
+\item \href{#method-Wf_umccrise-read_qcsummarytsv}{\code{Wf_umccrise$read_qcsummarytsv()}}
+\item \href{#method-Wf_umccrise-read_conpairmultiqc}{\code{Wf_umccrise$read_conpairmultiqc()}}
+\item \href{#method-Wf_umccrise-clone}{\code{Wf_umccrise$clone()}}
+}
+}
+\if{html}{\out{
+<details open><summary>Inherited methods</summary>
+<ul>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files"><a href='../../dracarys/html/Wf.html#method-Wf-list_files'><code>dracarys::Wf$list_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="tidy_files"><a href='../../dracarys/html/Wf.html#method-Wf-tidy_files'><code>dracarys::Wf$tidy_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="write"><a href='../../dracarys/html/Wf.html#method-Wf-write'><code>dracarys::Wf$write()</code></a></span></li>
+</ul>
+</details>
+}}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-new"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-new}{}}}
+\subsection{Method \code{new()}}{
+Create a new Wf_umccrise object.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$new(path = NULL, SubjectID = NULL, SampleID_tumor = NULL)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{path}}{Path to directory with raw workflow results (from GDS, S3, or
+local filesystem).}
+
+\item{\code{SubjectID}}{The SubjectID of the sample (needed for path lookup).}
+
+\item{\code{SampleID_tumor}}{The SampleID of the tumor sample (needed for path lookup).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-print"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-print}{}}}
+\subsection{Method \code{print()}}{
+Print details about the Workflow.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$print(...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{...}}{(ignored).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-list_files_filter_relevant"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-list_files_filter_relevant}{}}}
+\subsection{Method \code{list_files_filter_relevant()}}{
+List dracarys files under given path
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$list_files_filter_relevant(
+  max_files = 1000,
+  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  ...
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{max_files}}{Max number of files to list (for gds/s3 only).}
+
+\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{\code{...}}{Passed on to the \code{gds_list_files_filter_relevant} or
+the \code{s3_list_files_filter_relevant} function.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-download_files"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-download_files}{}}}
+\subsection{Method \code{download_files()}}{
+Download files from GDS/S3 to local filesystem.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$download_files(
+  outdir,
+  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  max_files = 1000,
+  dryrun = FALSE,
+  recursive = NULL
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{outdir}}{Path to output directory.}
+
+\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{\code{max_files}}{Max number of files to list.}
+
+\item{\code{dryrun}}{If TRUE, just list the files that will be downloaded (don't
+download them).}
+
+\item{\code{recursive}}{Should files be returned recursively \emph{in and under} the specified
+GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
+
+\item{\code{list_filter_fun}}{Function to filter relevant files.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_pcgrjson"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_pcgrjson}{}}}
+\subsection{Method \code{read_pcgrjson()}}{
+Read \code{pcgr.json.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_pcgrjson(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_chordtsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_chordtsv}{}}}
+\subsection{Method \code{read_chordtsv()}}{
+Read \code{chord.tsv.gz} cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_chordtsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_hrdetecttsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_hrdetecttsv}{}}}
+\subsection{Method \code{read_hrdetecttsv()}}{
+Read \code{hrdetect.tsv.gz} cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_hrdetecttsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_sigstsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigstsv}{}}}
+\subsection{Method \code{read_sigstsv()}}{
+Read signature cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigstsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_sigssnv2015tsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigssnv2015tsv}{}}}
+\subsection{Method \code{read_sigssnv2015tsv()}}{
+Read \code{snv_2015.tsv.gz} sigs cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigssnv2015tsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_sigssnv2020tsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigssnv2020tsv}{}}}
+\subsection{Method \code{read_sigssnv2020tsv()}}{
+Read \code{snv_2020.tsv.gz} sigs cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigssnv2020tsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_sigsdbstsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigsdbstsv}{}}}
+\subsection{Method \code{read_sigsdbstsv()}}{
+Read \code{dbs.tsv.gz} sigs cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigsdbstsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_sigsindeltsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigsindeltsv}{}}}
+\subsection{Method \code{read_sigsindeltsv()}}{
+Read \code{indel.tsv.gz} sigs cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigsindeltsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_qcsummarytsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_qcsummarytsv}{}}}
+\subsection{Method \code{read_qcsummarytsv()}}{
+Read \code{qc_summary.tsv.gz} cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_qcsummarytsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-read_conpairmultiqc"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_conpairmultiqc}{}}}
+\subsection{Method \code{read_conpairmultiqc()}}{
+Read multiqc_conpair.txt file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_conpairmultiqc(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_umccrise-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}
diff --git a/man/Wf_umccrise_download_tidy_write.Rd b/man/Wf_umccrise_download_tidy_write.Rd
new file mode 100644
index 0000000..05ca0bf
--- /dev/null
+++ b/man/Wf_umccrise_download_tidy_write.Rd
@@ -0,0 +1,57 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/umccrise.R
+\name{Wf_umccrise_download_tidy_write}
+\alias{Wf_umccrise_download_tidy_write}
+\title{umccrise Download Tidy and Write}
+\usage{
+Wf_umccrise_download_tidy_write(
+  path,
+  SubjectID,
+  SampleID_tumor,
+  outdir,
+  format = "rds",
+  max_files = 1000,
+  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  dryrun = FALSE
+)
+}
+\arguments{
+\item{path}{Path to directory with raw workflow results (from GDS, S3, or
+local filesystem).}
+
+\item{SubjectID}{The SubjectID of the sample (needed for path lookup).}
+
+\item{SampleID_tumor}{The SampleID of the tumor sample (needed for path lookup).}
+
+\item{outdir}{Path to output directory.}
+
+\item{format}{Format of output files.}
+
+\item{max_files}{Max number of files to list.}
+
+\item{ica_token}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{dryrun}{If TRUE, just list the files that will be downloaded (don't
+download them).}
+}
+\value{
+List where each element is a tidy tibble of a umccrise file.
+}
+\description{
+Downloads files from the \code{umccrise} workflow and writes them in a tidy format.
+}
+\examples{
+\dontrun{
+SubjectID <- "SBJ03043"
+SampleID_tumor <- "PRJ230004"
+p1_gds <- glue("gds://production/analysis_data/{SubjectID}/umccrise")
+p <- file.path(p1_gds, "20240830ec648f40/L2300064__L2300063")
+outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+token <- Sys.getenv("ICA_ACCESS_TOKEN")
+d <- Wf_umccrise_download_tidy_write(
+  path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor,
+  outdir = outdir,
+  dryrun = F
+)
+}
+}
diff --git a/man/dr_func_eval.Rd b/man/dr_func_eval.Rd
index 2dfedf1..0697c7f 100644
--- a/man/dr_func_eval.Rd
+++ b/man/dr_func_eval.Rd
@@ -4,13 +4,16 @@
 \alias{dr_func_eval}
 \title{Evaluate dracarys Function}
 \usage{
-dr_func_eval(f, v = NULL)
+dr_func_eval(f, v = NULL, envir = parent.frame())
 }
 \arguments{
 \item{f}{Name of function to evaluate.}
 
 \item{v}{Character vector of strings evaluating to functions. By default,
 this points to the functions in the DR_FILE_REGEX dracarys tibble.}
+
+\item{envir}{the environment in which to evaluate the function e.g. use \code{self}
+when using inside R6 classes.}
 }
 \value{
 Evaluated function.
diff --git a/man/dr_gds_download.Rd b/man/dr_gds_download.Rd
index 087a7ed..61aa7f8 100644
--- a/man/dr_gds_download.Rd
+++ b/man/dr_gds_download.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ica.R
+% Please edit documentation in R/fs_icav1.R
 \name{dr_gds_download}
 \alias{dr_gds_download}
 \title{dracarys GDS Download}
@@ -7,33 +7,49 @@
 dr_gds_download(
   gdsdir,
   outdir,
-  token,
-  page_size = 100,
+  token = Sys.getenv("ICA_ACCESS_TOKEN"),
   pattern = NULL,
+  page_size = 100,
   dryrun = FALSE,
   regexes = DR_FILE_REGEX,
-  recursive = NULL
+  recursive = NULL,
+  list_filter_fun = gds_list_files_filter_relevant
 )
 }
 \arguments{
 \item{gdsdir}{Full path to GDS directory.}
 
-\item{outdir}{Path to output directory.}
+\item{outdir}{Local output directory.}
 
 \item{token}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
 
-\item{page_size}{Page size (def: 100).}
-
 \item{pattern}{Pattern to further filter the returned file type tibble.}
 
+\item{page_size}{Page size (def: 10 via ICA API).}
+
 \item{dryrun}{If TRUE, just list the files that will be downloaded (don't
 download them).}
 
-\item{regexes}{Tibble with regex and function name.}
+\item{regexes}{Tibble with \code{regex} and \code{fun}ction name (see example).}
 
 \item{recursive}{Should files be returned recursively \emph{in and under} the specified
-GDS directory (TRUE), or \emph{only directly in} the specified GDS directory (FALSE) (def: TRUE).}
+GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
+
+\item{list_filter_fun}{Function to filter relevant GDS files.}
 }
 \description{
 Download only GDS files that can be processed by dracarys.
 }
+\examples{
+\dontrun{
+gdsdir <- "gds://production/analysis_data/SBJ01155/umccrise/202408300c218043/L2101566__L2101565"
+outdir <- sub("gds:/", "~/icav1/g", gdsdir)
+regexes <- tibble::tribble(
+  ~regex, ~fun,
+  "multiqc_data\\\\.json$", "MultiqcJsonFile",
+  "-somatic\\\\.pcgr\\\\.json\\\\.gz$", "pcgrjson"
+)
+dr_gds_download(gdsdir = gdsdir, outdir = outdir, regexes = regexes, dryrun = T)
+}
+
+}
diff --git a/man/dr_s3_download.Rd b/man/dr_s3_download.Rd
index 76f0339..674ed93 100644
--- a/man/dr_s3_download.Rd
+++ b/man/dr_s3_download.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/s3.R
+% Please edit documentation in R/fs_s3.R
 \name{dr_s3_download}
 \alias{dr_s3_download}
 \title{dracarys S3 Download}
@@ -7,34 +7,43 @@
 dr_s3_download(
   s3dir,
   outdir,
-  page_size = 100,
+  max_objects = 100,
   pattern = NULL,
   regexes = DR_FILE_REGEX,
-  dryrun = FALSE
+  dryrun = FALSE,
+  list_filter_fun = s3_list_files_filter_relevant
 )
 }
 \arguments{
-\item{s3dir}{Full path to S3 directory.}
+\item{s3dir}{S3 directory.}
 
 \item{outdir}{Path to output directory.}
 
-\item{page_size}{Page size (def: 100).}
+\item{max_objects}{Maximum objects returned.}
 
 \item{pattern}{Pattern to further filter the returned file type tibble.}
 
-\item{regexes}{Tibble with regex and function name.}
+\item{regexes}{Tibble with \code{regex} and \code{fun}ction name.}
 
 \item{dryrun}{If TRUE, just list the files that will be downloaded (don't
 download them).}
+
+\item{list_filter_fun}{Function to filter relevant S3 files.}
 }
 \description{
 Download only S3 files that can be processed by dracarys.
 }
 \examples{
-s3dir <- file.path(
-  "s3://umccr-primary-data-prod/UMCCR-Validation/SBJ00596",
-  "ctTSO/2021-03-17/PTC_SSqCMM05pc_L2100067"
+\dontrun{
+p1 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05373/sash"
+p2 <- "20240707becde493/L2401018_L2401017/SBJ05373_MDX240220"
+s3dir <- file.path(p1, p2)
+regexes <- tibble::tribble(
+  ~regex, ~fun,
+  "multiqc_data\\\\.json$", "MultiqcJsonFile",
+  "pcgr.*\\\\.json\\\\.gz$", "pcgrjson"
 )
 outdir <- sub("s3:/", "~/s3", s3dir)
-
+dr_s3_download(s3dir = s3dir, outdir = outdir, max_objects = 500, regexes = regexes, dryrun = F)
+}
 }
diff --git a/man/gds_file_download_api.Rd b/man/gds_file_download_api.Rd
index 1397860..b10a438 100644
--- a/man/gds_file_download_api.Rd
+++ b/man/gds_file_download_api.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ica.R
+% Please edit documentation in R/fs_icav1.R
 \name{gds_file_download_api}
 \alias{gds_file_download_api}
 \title{GDS File Download via API}
diff --git a/man/gds_file_download.Rd b/man/gds_file_download_cli.Rd
similarity index 61%
rename from man/gds_file_download.Rd
rename to man/gds_file_download_cli.Rd
index 10ab7b5..f5946fb 100644
--- a/man/gds_file_download.Rd
+++ b/man/gds_file_download_cli.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ica.R
-\name{gds_file_download}
-\alias{gds_file_download}
+% Please edit documentation in R/fs_icav1.R
+\name{gds_file_download_cli}
+\alias{gds_file_download_cli}
 \title{GDS File Download via CLI}
 \usage{
-gds_file_download(gds, out, token = Sys.getenv("ICA_ACCESS_TOKEN"))
+gds_file_download_cli(gds, out, token = Sys.getenv("ICA_ACCESS_TOKEN"))
 }
 \arguments{
 \item{gds}{Full path to GDS file.}
diff --git a/man/gds_file_presignedurl.Rd b/man/gds_file_presignedurl.Rd
index 064adb5..841db4a 100644
--- a/man/gds_file_presignedurl.Rd
+++ b/man/gds_file_presignedurl.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ica.R
+% Please edit documentation in R/fs_icav1.R
 \name{gds_file_presignedurl}
 \alias{gds_file_presignedurl}
 \title{GDS File Presigned URL}
diff --git a/man/gds_files_list_filter_relevant.Rd b/man/gds_files_list_filter_relevant.Rd
deleted file mode 100644
index 784a393..0000000
--- a/man/gds_files_list_filter_relevant.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ica.R
-\name{gds_files_list_filter_relevant}
-\alias{gds_files_list_filter_relevant}
-\title{List Relevant Files In GDS Directory}
-\usage{
-gds_files_list_filter_relevant(
-  gdsdir,
-  token,
-  pattern = NULL,
-  include_url = FALSE,
-  page_size = 100,
-  regexes = DR_FILE_REGEX
-)
-}
-\arguments{
-\item{gdsdir}{GDS directory.}
-
-\item{token}{ICA access token.}
-
-\item{pattern}{Pattern to further filter the returned file type tibble.}
-
-\item{include_url}{Include presigned URLs to all files within the GDS directory (def: FALSE).}
-
-\item{page_size}{Page size (def: 100).}
-
-\item{regexes}{Tibble with regex and function name.}
-}
-\value{
-A tibble with type, bname, size, file_id, path, and presigned URL.
-}
-\description{
-Lists relevant files in a GDS directory.
-}
diff --git a/man/gds_files_list.Rd b/man/gds_list_files_dir.Rd
similarity index 59%
rename from man/gds_files_list.Rd
rename to man/gds_list_files_dir.Rd
index 30e3bd9..d60b546 100644
--- a/man/gds_files_list.Rd
+++ b/man/gds_list_files_dir.Rd
@@ -1,12 +1,12 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ica.R
-\name{gds_files_list}
-\alias{gds_files_list}
-\title{GDS Files List}
+% Please edit documentation in R/fs_icav1.R
+\name{gds_list_files_dir}
+\alias{gds_list_files_dir}
+\title{List Files in ICAv1 GDS Directory}
 \usage{
-gds_files_list(
+gds_list_files_dir(
   gdsdir,
-  token,
+  token = Sys.getenv("ICA_ACCESS_TOKEN"),
   page_size = NULL,
   include_url = FALSE,
   no_recurse = TRUE,
@@ -19,9 +19,9 @@ gds_files_list(
 
 \item{token}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
 
-\item{page_size}{Page size (def: 10).}
+\item{page_size}{Page size (def: 10 via ICA API).}
 
-\item{include_url}{Include presigned URLs to all files within the GDS directory (def: FALSE).}
+\item{include_url}{Include presigned URLs to all files within the GDS directory (def: FALSE via ICA API).}
 
 \item{no_recurse}{Do not recurse through the file list i.e. just give the first <page_size> items
 without recursing further down the list using <page_token>.}
@@ -29,34 +29,27 @@ without recursing further down the list using <page_token>.}
 \item{page_token}{Page token (def: NULL). Used internally for recursion.}
 
 \item{recursive}{Should files be returned recursively \emph{in and under} the specified
-GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE).}
+GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
 }
 \value{
-Tibble with file basename, file size, file full data path, file dir name.
+A tibble with file ID, basename, size, last modified timestamp,
+full GDS path, and presigned URL if requested.
 }
 \description{
-List files on ICA GDS filesystem.
+Lists files in a GDS directory.
 }
 \examples{
 \dontrun{
-gdsdir <- file.path(
-  "gds://production/primary_data",
-  "240322_A00130_0290_BH5HLLDSXC/20240323f56ec5a5/WGS_TsqNano"
-)
-gdsdir <- file.path(
-  "gds://bssh.acddbfda498038ed99fa94fe79523959/Runs",
-  "240322_A00130_0290_BH5HLLDSXC_r.3TbcOsEKZUyetygkqIOXcg/InterOp"
-)
 gdsdir <- file.path(
   "gds://production/analysis_data/SBJ00699/umccrise",
   "202203277dcf8562/L2200352__L2100146/SBJ00699__MDX220105/coverage"
 )
 token <- ica_token_validate()
 page_size <- 11
-include_url <- TRUE
+include_url <- F
 page_token <- NULL
 no_recurse <- TRUE
 recursive <- NULL
-gds_files_list(gdsdir, token, page_size, include_url, no_recurse, page_token, recursive)
+gds_list_files_dir(gdsdir, token, page_size, include_url, no_recurse, page_token, recursive)
 }
 }
diff --git a/man/gds_list_files_filter_relevant.Rd b/man/gds_list_files_filter_relevant.Rd
new file mode 100644
index 0000000..1e280b4
--- /dev/null
+++ b/man/gds_list_files_filter_relevant.Rd
@@ -0,0 +1,53 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fs_icav1.R
+\name{gds_list_files_filter_relevant}
+\alias{gds_list_files_filter_relevant}
+\title{List Relevant Files In ICAv1 GDS Directory}
+\usage{
+gds_list_files_filter_relevant(
+  gdsdir,
+  pattern = NULL,
+  regexes = DR_FILE_REGEX,
+  token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  page_size = 100,
+  include_url = FALSE,
+  no_recurse = TRUE,
+  page_token = NULL,
+  recursive = NULL
+)
+}
+\arguments{
+\item{gdsdir}{Full path to GDS directory.}
+
+\item{pattern}{Pattern to further filter the returned file type tibble.}
+
+\item{regexes}{Tibble with \code{regex} and \code{fun}ction name (see example).}
+
+\item{token}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{page_size}{Page size (def: 10 via ICA API).}
+
+\item{include_url}{Include presigned URLs to all files within the GDS directory (def: FALSE via ICA API).}
+
+\item{no_recurse}{Do not recurse through the file list i.e. just give the first <page_size> items
+without recursing further down the list using <page_token>.}
+
+\item{page_token}{Page token (def: NULL). Used internally for recursion.}
+
+\item{recursive}{Should files be returned recursively \emph{in and under} the specified
+GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
+}
+\value{
+A tibble with file type, basename, size, last modified timestamp, file_id, full path,
+and presigned URL if requested.
+}
+\description{
+Lists relevant files in a GDS directory.
+}
+\examples{
+\dontrun{
+regexes <- tibble::tibble(regex = "multiqc_data\\\\.json$", fun = "MultiqcJsonFile")
+gdsdir <- "gds://production/analysis_data/SBJ01155/umccrise/202408300c218043/L2101566__L2101565"
+gds_list_files_filter_relevant(gdsdir)
+}
+}
diff --git a/man/grep_file.Rd b/man/grep_file.Rd
new file mode 100644
index 0000000..d0c588e
--- /dev/null
+++ b/man/grep_file.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{grep_file}
+\alias{grep_file}
+\title{Grep File Pattern}
+\usage{
+grep_file(path = ".", regexp)
+}
+\arguments{
+\item{path}{Path to look for file.}
+
+\item{regexp}{A regular expression (e.g. \link{.}csv$) passed on to \code{grep()} to filter paths.}
+}
+\value{
+The path to the file or an empty string if no match is found.
+}
+\description{
+Grep File Pattern
+}
diff --git a/man/local_list_files_dir.Rd b/man/local_list_files_dir.Rd
new file mode 100644
index 0000000..d77ca63
--- /dev/null
+++ b/man/local_list_files_dir.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fs_local.R
+\name{local_list_files_dir}
+\alias{local_list_files_dir}
+\title{List Files in Local Directory}
+\usage{
+local_list_files_dir(localdir, max_files = NULL)
+}
+\arguments{
+\item{localdir}{Path to local directory.}
+
+\item{max_files}{Max files returned.}
+}
+\value{
+A tibble with file basename, size, last modification timestamp
+and full path.
+}
+\description{
+Lists files in a local directory.
+}
+\examples{
+localdir <- system.file("R", package = "dracarys")
+x <- local_list_files_dir(localdir)
+}
diff --git a/man/local_list_files_filter_relevant.Rd b/man/local_list_files_filter_relevant.Rd
new file mode 100644
index 0000000..ac62065
--- /dev/null
+++ b/man/local_list_files_filter_relevant.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fs_local.R
+\name{local_list_files_filter_relevant}
+\alias{local_list_files_filter_relevant}
+\title{List Relevant Files In Local Directory}
+\usage{
+local_list_files_filter_relevant(
+  localdir,
+  regexes = DR_FILE_REGEX,
+  max_files = NULL
+)
+}
+\arguments{
+\item{localdir}{Path to local directory.}
+
+\item{regexes}{Tibble with \code{regex} and \code{fun}ction name (see example).}
+
+\item{max_files}{Max files returned.}
+}
+\value{
+A tibble with file type, basename, size, last modified timestamp, and
+path.
+}
+\description{
+Lists relevant files in a local directory.
+}
+\examples{
+localdir <- system.file("extdata/tso", package = "dracarys")
+regexes <- tibble::tibble(regex = "multiqc_data\\\\.json$", fun = "MultiqcFile")
+x <- local_list_files_filter_relevant(localdir, regexes)
+}
diff --git a/man/s3_file_presignedurl.Rd b/man/s3_file_presignedurl.Rd
new file mode 100644
index 0000000..79eb7d6
--- /dev/null
+++ b/man/s3_file_presignedurl.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fs_s3.R
+\name{s3_file_presignedurl}
+\alias{s3_file_presignedurl}
+\title{S3 Generate Presigned URL}
+\usage{
+s3_file_presignedurl(client, s3path, expiry_seconds = 3600)
+}
+\arguments{
+\item{client}{S3 client. Make sure you use \code{signature_version = "s3v4"} (see example).}
+
+\item{s3path}{Full path to S3 object.}
+
+\item{expiry_seconds}{Number of seconds the presigned URL is valid for (3600 = 1 hour).}
+}
+\value{
+An S3 presigned URL.
+}
+\description{
+S3 Generate Presigned URL
+}
+\examples{
+\dontrun{
+client <- paws.storage::s3(paws.storage::config(signature_version = "s3v4"))
+s3path <- "s3://bucket1/path/to/file.tsv"
+s3_file_presignedurl(client, s3path)
+}
+
+}
diff --git a/man/s3_files_list_filter_relevant.Rd b/man/s3_files_list_filter_relevant.Rd
deleted file mode 100644
index 7fc45f3..0000000
--- a/man/s3_files_list_filter_relevant.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/s3.R
-\name{s3_files_list_filter_relevant}
-\alias{s3_files_list_filter_relevant}
-\title{List Relevant Files In AWS S3 Directory}
-\usage{
-s3_files_list_filter_relevant(
-  s3dir,
-  pattern = NULL,
-  page_size = 1000,
-  max_items = 1000,
-  presign = FALSE,
-  expiry_sec = 43200
-)
-}
-\arguments{
-\item{s3dir}{S3 directory.}
-
-\item{pattern}{Pattern to further filter the returned file type tibble.}
-
-\item{page_size}{The size of each page to get in the AWS service call (def: 1000).}
-
-\item{max_items}{The total number of items to return in the command’s output (def: 1000).}
-
-\item{presign}{Include presigned URLs (def: FALSE).}
-
-\item{expiry_sec}{Number of seconds the presigned URL will be valid for (if generated) (def: 43200 (12hrs)).}
-}
-\value{
-A tibble with path, date, file size, file type, and presigned URL if requested.
-}
-\description{
-Lists relevant files in an AWS S3 directory.
-}
-\examples{
-\dontrun{
-s3dir <- "s3://umccr-primary-data-prod/cancer_report_tables"
-s3_files_list_filter_relevant(s3dir = s3dir, presign = TRUE)
-}
-}
diff --git a/man/s3_list_files_dir.Rd b/man/s3_list_files_dir.Rd
new file mode 100644
index 0000000..820da1d
--- /dev/null
+++ b/man/s3_list_files_dir.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fs_s3.R
+\name{s3_list_files_dir}
+\alias{s3_list_files_dir}
+\title{List Objects in AWS S3 Directory}
+\usage{
+s3_list_files_dir(s3dir, max_objects = 1000)
+}
+\arguments{
+\item{s3dir}{S3 directory.}
+
+\item{max_objects}{Maximum objects returned.}
+}
+\value{
+A tibble with object basename, size, last modified timestamp, and
+full S3 path.
+}
+\description{
+Returns some or all (up to 1,000) of the objects in an S3 directory.
+}
+\examples{
+\dontrun{
+p1 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05373/sash"
+p2 <- "20240707becde493/L2401018_L2401017/SBJ05373_MDX240220"
+s3dir <- file.path(p1, p2, "cancer_report/cancer_report_tables")
+s3_list_files_dir(s3dir, max_objects = 15)
+}
+}
diff --git a/man/s3_list_files_filter_relevant.Rd b/man/s3_list_files_filter_relevant.Rd
new file mode 100644
index 0000000..1716825
--- /dev/null
+++ b/man/s3_list_files_filter_relevant.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fs_s3.R
+\name{s3_list_files_filter_relevant}
+\alias{s3_list_files_filter_relevant}
+\title{List Relevant Files In AWS S3 Directory}
+\usage{
+s3_list_files_filter_relevant(
+  s3dir,
+  pattern = NULL,
+  regexes = DR_FILE_REGEX,
+  max_objects = 100,
+  presign = FALSE,
+  expiry_sec = 3600
+)
+}
+\arguments{
+\item{s3dir}{S3 directory.}
+
+\item{pattern}{Pattern to further filter the returned file type tibble.}
+
+\item{regexes}{Tibble with \code{regex} and \code{fun}ction name.}
+
+\item{max_objects}{Maximum objects returned.}
+
+\item{presign}{Include presigned URLs (def: FALSE).}
+
+\item{expiry_sec}{Number of seconds the presigned URL will be valid for (if generated).}
+}
+\value{
+A tibble with file type, basename, size, last modified timestamp,
+full path, and presigned URL if requested.
+}
+\description{
+Lists relevant files in an AWS S3 directory.
+}
+\examples{
+\dontrun{
+p1 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05373/sash"
+p2 <- "20240707becde493/L2401018_L2401017/SBJ05373_MDX240220"
+s3dir <- file.path(p1, p2)
+regexes <- tibble::tibble(regex = "multiqc_data\\\\.json$", fun = "MultiqcJsonFile")
+s3_list_files_filter_relevant(s3dir = s3dir, regexes = regexes, max_objects = 300)
+}
+}
diff --git a/man/s3_search.Rd b/man/s3_search.Rd
index c0d9f64..c4a1db2 100644
--- a/man/s3_search.Rd
+++ b/man/s3_search.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/s3.R
+% Please edit documentation in R/fs_s3.R
 \name{s3_search}
 \alias{s3_search}
 \title{Search AWS S3 Objects}
diff --git a/man/tidy_files.Rd b/man/tidy_files.Rd
new file mode 100644
index 0000000..8d1849b
--- /dev/null
+++ b/man/tidy_files.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tidy.R
+\name{tidy_files}
+\alias{tidy_files}
+\title{Tidy Files}
+\usage{
+tidy_files(x, envir = parent.frame())
+}
+\arguments{
+\item{x}{Tibble with \code{localpath} to file and the function \code{type} to parse it.}
+
+\item{envir}{the environment in which to evaluate the function e.g. use \code{self}
+when using inside R6 classes.}
+}
+\value{
+Tibble with parsed data in a \code{data} list-column.
+}
+\description{
+Tidy Files
+}
+\examples{
+\dontrun{
+p1 <- "~/icav1/g/production/analysis_data/SBJ01155/umccrise/202408300c218043"
+p2 <- "L2101566__L2101565/SBJ01155__PRJ211091-qc_summary.tsv.gz"
+p <- file.path(p1, p2)
+x <- tibble::tibble(type = "readr::read_tsv", localpath = p)
+tidy_files(x)
+}
+
+}
diff --git a/man/write_dracarys_list_of_tbls.Rd b/man/write_dracarys_list_of_tbls.Rd
new file mode 100644
index 0000000..e6aa8be
--- /dev/null
+++ b/man/write_dracarys_list_of_tbls.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{write_dracarys_list_of_tbls}
+\alias{write_dracarys_list_of_tbls}
+\title{Write List of Tidy Tibbles}
+\usage{
+write_dracarys_list_of_tbls(
+  list_of_tbls,
+  out_dir = NULL,
+  prefix = NULL,
+  out_format = "tsv",
+  drid = NULL
+)
+}
+\arguments{
+\item{list_of_tbls}{List of tidy tibbles.}
+
+\item{out_dir}{Output directory.}
+
+\item{prefix}{Prefix of output file(s).}
+
+\item{out_format}{Format of output file(s).}
+
+\item{drid}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
+}
+\value{
+Tibble with nested objects that have been written to the output directory.
+}
+\description{
+Write List of Tidy Tibbles
+}
diff --git a/tests/testthat/test-roxytest-testexamples-fs_local.R b/tests/testthat/test-roxytest-testexamples-fs_local.R
new file mode 100644
index 0000000..764c4aa
--- /dev/null
+++ b/tests/testthat/test-roxytest-testexamples-fs_local.R
@@ -0,0 +1,20 @@
+# Generated by roxytest: do not edit by hand!
+
+# File R/fs_local.R: @testexamples
+
+test_that("Function local_list_files_dir() @ L16", {
+  
+  localdir <- system.file("R", package = "dracarys")
+  x <- local_list_files_dir(localdir)
+  expect_equal(names(x), c("bname", "size", "lastmodified", "path"))
+})
+
+
+test_that("Function local_list_files_filter_relevant() @ L47", {
+  
+  localdir <- system.file("extdata/tso", package = "dracarys")
+  regexes <- tibble::tibble(regex = "multiqc_data\\.json$", fun = "MultiqcFile")
+  x <- local_list_files_filter_relevant(localdir, regexes)
+  expect_equal(nrow(x), 1)
+})
+
diff --git a/tests/testthat/test-roxytest-testexamples-regex.R b/tests/testthat/test-roxytest-testexamples-regex.R
index 91a7f26..05787ff 100644
--- a/tests/testthat/test-roxytest-testexamples-regex.R
+++ b/tests/testthat/test-roxytest-testexamples-regex.R
@@ -2,7 +2,7 @@
 
 # File R/regex.R: @testexamples
 
-test_that("Function dr_func_eval() @ L97", {
+test_that("Function dr_func_eval() @ L99", {
   
   mean_1_to_10 <- dr_func_eval("mean", v = c("mean", "sd"))(1:10)
   x <- system.file("extdata/tso/sample705.fragment_length_hist.json.gz", package = "dracarys")