Skip to content

Commit

Permalink
New function groupData()
Browse files Browse the repository at this point in the history
  • Loading branch information
arni-magnusson committed Feb 20, 2022
1 parent f269d8b commit edb918b
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 5 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Package: SOFIA
Version: 1.0.3
Date: 2022-02-14
Version: 1.1.0
Date: 2022-02-20
Title: Tools to Work with SOFIA Analyses
Authors@R: c(person("Rishi", "Sharma", role="aut"),
person("Arni", "Magnusson", role=c("aut","cre"), email="[email protected]"))
Imports: stats, ggplot2, sraplus
Imports: stats, utils, ggplot2, sraplus
Description: Tools that support the Transparent SOFIA framework.
License: GPL-3
URL: https://github.com/sofia-tsaf/SOFIA
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export(addDriors)
export(addEffort)
export(calcCat)
export(compCat)
export(groupData)
export(plotCat)
export(plotProp)
importFrom(ggplot2,aes_string)
Expand All @@ -14,3 +15,4 @@ importFrom(ggplot2,scale_fill_manual)
importFrom(ggplot2,theme_minimal)
importFrom(sraplus,format_driors)
importFrom(stats,na.omit)
importFrom(utils,read.csv)
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# SOFIA 1.1.0 (2022-02-20)

* Added function groupData() to group primary data into subdirectories.




# SOFIA 1.0.3 (2022-02-14)

* Improved addDriors() so effort data are passed to format_driors().
Expand Down
3 changes: 2 additions & 1 deletion R/SOFIA-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
#' \emph{Prepare data:}
#' \tabular{ll}{
#' \code{\link{addDriors}} \tab add driors column to stocks object\cr
#' \code{\link{addEffort}} \tab add effort column to catch data
#' \code{\link{addEffort}} \tab add effort column to catch data\cr
#' \code{\link{groupData}} \tab group primary data into directories
#' }
#' \emph{Calculate:}
#' \tabular{ll}{
Expand Down
109 changes: 109 additions & 0 deletions R/groupData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#' Group Data
#'
#' Group primary data files into subdirectories, depending on what columns each
#' data file contains.
#'
#' @param dir is the directory containing the primary data files.
#' @param quiet is whether to suppress screen output.
#'
#' @details
#' If \code{quiet = FALSE} then warnings are raised if the output subdirectories
#' exist already. Generally, the output subdirectories should not exist before
#' this function is called.
#'
#' @return
#' Files are copied into subdirectories. As a byproduct, a list is returned,
#' describing which subdirectories contain which data files.
#'
#' @author Arni Magnusson.
#'
#' @note
#' A primary data file can have a filename such as
#' \file{Yellowtail_snapper_Mexico.csv} and columns such as
#' \code{stockid|scientificname|commonname|year|catch|stocklong}.
#'
#' In addition, the data file may have columns called \code{best_effort} and/or
#' \code{best_index}, containing the effort and/or index series to be used in
#' the SOFIA analysis.
#'
#' This function creates four subdirectories:
#' \enumerate{
#' \item \code{both} - for data files containing both effort and index data
#' \item \code{effort} - for data files containing effort data (and possibly
#' also index)
#' \item \code{index} - for data files containing index data (and possibly also
#' effort)
#' \item \code{neither} - for data files containing neither effort nor index
#' data
#' }
#'
#' The object returned has an attribute \code{count}, showing the number of data
#' files in each subdirectory. The number of original (unique) data files will
#' be:
#' \preformatted{
#' both + (effort-both) + (index-both) + neither
#' }
#'
#' @seealso
#' \code{\link{SOFIA-package}} gives an overview of the package.
#'
#' @examples
#' \dontrun{
#' groupData("Data_files_Area_31_3")
#' groupData("Data_files_Area_31_3", quiet=TRUE)
#' }
#'
#' @importFrom utils read.csv
#'
#' @export

groupData <- function(dir, quiet=FALSE)
{
if(!dir.exists(dir))
stop("'", dir, "' not found")

## 1 Import CSV files
files <- dir(dir, pattern="\\.csv$", full=TRUE)
csv <- lapply(files, read.csv)
names(csv) <- basename(files)

## 2 Create directories
dir.create(file.path(dir, "both"), showWarnings=!quiet)
dir.create(file.path(dir, "effort"), showWarnings=!quiet)
dir.create(file.path(dir, "index"), showWarnings=!quiet)
dir.create(file.path(dir, "neither"), showWarnings=!quiet)

## 3 Copy files into directories
for(i in seq_along(files))
{
n <- tolower(names(csv[[i]]))
if("best_effort" %in% n && "best_index" %in% n)
file.copy(files[i], file.path(dir, "both"))
if("best_effort" %in% n)
file.copy(files[i], file.path(dir, "effort"))
if("best_index" %in% n)
file.copy(files[i], file.path(dir, "index"))
if(!("best_effort" %in% n) && !("best_index" %in% n))
file.copy(files[i], file.path(dir, "neither"))
## Report when column names look suspicious
if(!("best_effort" %in% n) && any(grepl("effort", tolower(n))))
warning(basename(files[i]), "\n has effort data (",
paste(n[grep("effort", tolower(n))], collapse=", "),
") but no 'best_effort'")
if(!("best_index" %in% n) && any(grepl("index", tolower(n))))
warning(basename(files[i]), "\n has index data (",
paste(n[grep("index", tolower(n))], collapse=", "),
") but no 'best_index'")
}

## 4 Return list
out <- list(both=dir(file.path(dir, "both")),
effort=dir(file.path(dir, "effort")),
index=dir(file.path(dir, "index")),
neither=dir(file.path(dir, "neither")))
attr(out, "count") <- c(sapply(out, length), unique=length(files))
if(quiet)
invisible(out)
else
out
}
3 changes: 2 additions & 1 deletion man/SOFIA-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 66 additions & 0 deletions man/groupData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit edb918b

Please sign in to comment.