New function groupData()

sofia-taf · Feb 20, 2022 · edb918b · edb918b
1 parent f269d8b
commit edb918b
Show file tree

Hide file tree

Showing 7 changed files with 191 additions and 5 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,10 +1,10 @@
 Package: SOFIA
-Version: 1.0.3
-Date: 2022-02-14
+Version: 1.1.0
+Date: 2022-02-20
 Title: Tools to Work with SOFIA Analyses
 Authors@R: c(person("Rishi", "Sharma", role="aut"),
              person("Arni", "Magnusson", role=c("aut","cre"), email="[email protected]"))
-Imports: stats, ggplot2, sraplus
+Imports: stats, utils, ggplot2, sraplus
 Description: Tools that support the Transparent SOFIA framework.
 License: GPL-3
 URL: https://github.com/sofia-tsaf/SOFIA

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,7 @@ export(addDriors)
 export(addEffort)
 export(calcCat)
 export(compCat)
+export(groupData)
 export(plotCat)
 export(plotProp)
 importFrom(ggplot2,aes_string)
@@ -14,3 +15,4 @@ importFrom(ggplot2,scale_fill_manual)
 importFrom(ggplot2,theme_minimal)
 importFrom(sraplus,format_driors)
 importFrom(stats,na.omit)
+importFrom(utils,read.csv)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# SOFIA 1.1.0 (2022-02-20)
+
+* Added function groupData() to group primary data into subdirectories.
+
+
+
+
 # SOFIA 1.0.3 (2022-02-14)
 
 * Improved addDriors() so effort data are passed to format_driors().

diff --git a/R/SOFIA-package.R b/R/SOFIA-package.R
@@ -13,7 +13,8 @@
 #' \emph{Prepare data:}
 #' \tabular{ll}{
 #'   \code{\link{addDriors}} \tab add driors column to stocks object\cr
-#'   \code{\link{addEffort}} \tab add effort column to catch data
+#'   \code{\link{addEffort}} \tab add effort column to catch data\cr
+#'   \code{\link{groupData}} \tab group primary data into directories
 #' }
 #' \emph{Calculate:}
 #' \tabular{ll}{

diff --git a/R/groupData.R b/R/groupData.R
@@ -0,0 +1,109 @@
+#' Group Data
+#'
+#' Group primary data files into subdirectories, depending on what columns each
+#' data file contains.
+#'
+#' @param dir is the directory containing the primary data files.
+#' @param quiet is whether to suppress screen output.
+#'
+#' @details
+#' If \code{quiet = FALSE} then warnings are raised if the output subdirectories
+#' exist already. Generally, the output subdirectories should not exist before
+#' this function is called.
+#'
+#' @return
+#' Files are copied into subdirectories. As a byproduct, a list is returned,
+#' describing which subdirectories contain which data files.
+#'
+#' @author Arni Magnusson.
+#'
+#' @note
+#' A primary data file can have a filename such as
+#' \file{Yellowtail_snapper_Mexico.csv} and columns such as
+#' \code{stockid|scientificname|commonname|year|catch|stocklong}.
+#'
+#' In addition, the data file may have columns called \code{best_effort} and/or
+#' \code{best_index}, containing the effort and/or index series to be used in
+#' the SOFIA analysis.
+#'
+#' This function creates four subdirectories:
+#' \enumerate{
+#' \item \code{both} - for data files containing both effort and index data
+#' \item \code{effort} - for data files containing effort data (and possibly
+#'        also index)
+#' \item \code{index} - for data files containing index data (and possibly also
+#'       effort)
+#' \item \code{neither} - for data files containing neither effort nor index
+#'       data
+#' }
+#'
+#' The object returned has an attribute \code{count}, showing the number of data
+#' files in each subdirectory. The number of original (unique) data files will
+#' be:
+#' \preformatted{
+#'   both + (effort-both) + (index-both) + neither
+#' }
+#'
+#' @seealso
+#' \code{\link{SOFIA-package}} gives an overview of the package.
+#'
+#' @examples
+#' \dontrun{
+#' groupData("Data_files_Area_31_3")
+#' groupData("Data_files_Area_31_3", quiet=TRUE)
+#' }
+#'
+#' @importFrom utils read.csv
+#'
+#' @export
+
+groupData <- function(dir, quiet=FALSE)
+{
+  if(!dir.exists(dir))
+    stop("'", dir, "' not found")
+
+  ## 1  Import CSV files
+  files <- dir(dir, pattern="\\.csv$", full=TRUE)
+  csv <- lapply(files, read.csv)
+  names(csv) <- basename(files)
+
+  ## 2  Create directories
+  dir.create(file.path(dir, "both"), showWarnings=!quiet)
+  dir.create(file.path(dir, "effort"), showWarnings=!quiet)
+  dir.create(file.path(dir, "index"), showWarnings=!quiet)
+  dir.create(file.path(dir, "neither"), showWarnings=!quiet)
+
+  ## 3  Copy files into directories
+  for(i in seq_along(files))
+  {
+    n <- tolower(names(csv[[i]]))
+    if("best_effort" %in% n && "best_index" %in% n)
+      file.copy(files[i], file.path(dir, "both"))
+    if("best_effort" %in% n)
+      file.copy(files[i], file.path(dir, "effort"))
+    if("best_index" %in% n)
+      file.copy(files[i], file.path(dir, "index"))
+    if(!("best_effort" %in% n) && !("best_index" %in% n))
+      file.copy(files[i], file.path(dir, "neither"))
+    ## Report when column names look suspicious
+    if(!("best_effort" %in% n) && any(grepl("effort", tolower(n))))
+      warning(basename(files[i]), "\n  has effort data (",
+              paste(n[grep("effort", tolower(n))], collapse=", "),
+              ") but no 'best_effort'")
+    if(!("best_index" %in% n) && any(grepl("index", tolower(n))))
+      warning(basename(files[i]), "\n has index data (",
+              paste(n[grep("index", tolower(n))], collapse=", "),
+              ") but no 'best_index'")
+  }
+
+  ## 4  Return list
+  out <- list(both=dir(file.path(dir, "both")),
+              effort=dir(file.path(dir, "effort")),
+              index=dir(file.path(dir, "index")),
+              neither=dir(file.path(dir, "neither")))
+  attr(out, "count") <- c(sapply(out, length), unique=length(files))
+  if(quiet)
+    invisible(out)
+  else
+    out
+}
diff --git a/man/SOFIA-package.Rd b/man/SOFIA-package.Rd
diff --git a/man/groupData.Rd b/man/groupData.Rd