diff --git a/DESCRIPTION b/DESCRIPTION index d2c0f98..b5f87f0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: SOFIA -Version: 1.0.3 -Date: 2022-02-14 +Version: 1.1.0 +Date: 2022-02-20 Title: Tools to Work with SOFIA Analyses Authors@R: c(person("Rishi", "Sharma", role="aut"), person("Arni", "Magnusson", role=c("aut","cre"), email="thisisarni@gmail.com")) -Imports: stats, ggplot2, sraplus +Imports: stats, utils, ggplot2, sraplus Description: Tools that support the Transparent SOFIA framework. License: GPL-3 URL: https://github.com/sofia-tsaf/SOFIA diff --git a/NAMESPACE b/NAMESPACE index 8423f95..0e95d3f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(addDriors) export(addEffort) export(calcCat) export(compCat) +export(groupData) export(plotCat) export(plotProp) importFrom(ggplot2,aes_string) @@ -14,3 +15,4 @@ importFrom(ggplot2,scale_fill_manual) importFrom(ggplot2,theme_minimal) importFrom(sraplus,format_driors) importFrom(stats,na.omit) +importFrom(utils,read.csv) diff --git a/NEWS.md b/NEWS.md index 3c356e1..c9dbcff 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# SOFIA 1.1.0 (2022-02-20) + +* Added function groupData() to group primary data into subdirectories. + + + + # SOFIA 1.0.3 (2022-02-14) * Improved addDriors() so effort data are passed to format_driors(). diff --git a/R/SOFIA-package.R b/R/SOFIA-package.R index 04912b2..903ede3 100644 --- a/R/SOFIA-package.R +++ b/R/SOFIA-package.R @@ -13,7 +13,8 @@ #' \emph{Prepare data:} #' \tabular{ll}{ #' \code{\link{addDriors}} \tab add driors column to stocks object\cr -#' \code{\link{addEffort}} \tab add effort column to catch data +#' \code{\link{addEffort}} \tab add effort column to catch data\cr +#' \code{\link{groupData}} \tab group primary data into directories #' } #' \emph{Calculate:} #' \tabular{ll}{ diff --git a/R/groupData.R b/R/groupData.R new file mode 100644 index 0000000..9dcc2a8 --- /dev/null +++ b/R/groupData.R @@ -0,0 +1,109 @@ +#' Group Data +#' +#' Group primary data files into subdirectories, depending on what columns each +#' data file contains. +#' +#' @param dir is the directory containing the primary data files. +#' @param quiet is whether to suppress screen output. +#' +#' @details +#' If \code{quiet = FALSE} then warnings are raised if the output subdirectories +#' exist already. Generally, the output subdirectories should not exist before +#' this function is called. +#' +#' @return +#' Files are copied into subdirectories. As a byproduct, a list is returned, +#' describing which subdirectories contain which data files. +#' +#' @author Arni Magnusson. +#' +#' @note +#' A primary data file can have a filename such as +#' \file{Yellowtail_snapper_Mexico.csv} and columns such as +#' \code{stockid|scientificname|commonname|year|catch|stocklong}. +#' +#' In addition, the data file may have columns called \code{best_effort} and/or +#' \code{best_index}, containing the effort and/or index series to be used in +#' the SOFIA analysis. +#' +#' This function creates four subdirectories: +#' \enumerate{ +#' \item \code{both} - for data files containing both effort and index data +#' \item \code{effort} - for data files containing effort data (and possibly +#' also index) +#' \item \code{index} - for data files containing index data (and possibly also +#' effort) +#' \item \code{neither} - for data files containing neither effort nor index +#' data +#' } +#' +#' The object returned has an attribute \code{count}, showing the number of data +#' files in each subdirectory. The number of original (unique) data files will +#' be: +#' \preformatted{ +#' both + (effort-both) + (index-both) + neither +#' } +#' +#' @seealso +#' \code{\link{SOFIA-package}} gives an overview of the package. +#' +#' @examples +#' \dontrun{ +#' groupData("Data_files_Area_31_3") +#' groupData("Data_files_Area_31_3", quiet=TRUE) +#' } +#' +#' @importFrom utils read.csv +#' +#' @export + +groupData <- function(dir, quiet=FALSE) +{ + if(!dir.exists(dir)) + stop("'", dir, "' not found") + + ## 1 Import CSV files + files <- dir(dir, pattern="\\.csv$", full=TRUE) + csv <- lapply(files, read.csv) + names(csv) <- basename(files) + + ## 2 Create directories + dir.create(file.path(dir, "both"), showWarnings=!quiet) + dir.create(file.path(dir, "effort"), showWarnings=!quiet) + dir.create(file.path(dir, "index"), showWarnings=!quiet) + dir.create(file.path(dir, "neither"), showWarnings=!quiet) + + ## 3 Copy files into directories + for(i in seq_along(files)) + { + n <- tolower(names(csv[[i]])) + if("best_effort" %in% n && "best_index" %in% n) + file.copy(files[i], file.path(dir, "both")) + if("best_effort" %in% n) + file.copy(files[i], file.path(dir, "effort")) + if("best_index" %in% n) + file.copy(files[i], file.path(dir, "index")) + if(!("best_effort" %in% n) && !("best_index" %in% n)) + file.copy(files[i], file.path(dir, "neither")) + ## Report when column names look suspicious + if(!("best_effort" %in% n) && any(grepl("effort", tolower(n)))) + warning(basename(files[i]), "\n has effort data (", + paste(n[grep("effort", tolower(n))], collapse=", "), + ") but no 'best_effort'") + if(!("best_index" %in% n) && any(grepl("index", tolower(n)))) + warning(basename(files[i]), "\n has index data (", + paste(n[grep("index", tolower(n))], collapse=", "), + ") but no 'best_index'") + } + + ## 4 Return list + out <- list(both=dir(file.path(dir, "both")), + effort=dir(file.path(dir, "effort")), + index=dir(file.path(dir, "index")), + neither=dir(file.path(dir, "neither"))) + attr(out, "count") <- c(sapply(out, length), unique=length(files)) + if(quiet) + invisible(out) + else + out +} diff --git a/man/SOFIA-package.Rd b/man/SOFIA-package.Rd index f89f12a..1ee6ac3 100644 --- a/man/SOFIA-package.Rd +++ b/man/SOFIA-package.Rd @@ -12,7 +12,8 @@ Tools that support the Transparent SOFIA framework. \emph{Prepare data:} \tabular{ll}{ \code{\link{addDriors}} \tab add driors column to stocks object\cr - \code{\link{addEffort}} \tab add effort column to catch data + \code{\link{addEffort}} \tab add effort column to catch data\cr + \code{\link{groupData}} \tab group primary data into directories } \emph{Calculate:} \tabular{ll}{ diff --git a/man/groupData.Rd b/man/groupData.Rd new file mode 100644 index 0000000..78cf8bc --- /dev/null +++ b/man/groupData.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupData.R +\name{groupData} +\alias{groupData} +\title{Group Data} +\usage{ +groupData(dir, quiet = FALSE) +} +\arguments{ +\item{dir}{is the directory containing the primary data files.} + +\item{quiet}{is whether to suppress screen output.} +} +\value{ +Files are copied into subdirectories. As a byproduct, a list is returned, +describing which subdirectories contain which data files. +} +\description{ +Group primary data files into subdirectories, depending on what columns each +data file contains. +} +\details{ +If \code{quiet = FALSE} then warnings are raised if the output subdirectories +exist already. Generally, the output subdirectories should not exist before +this function is called. +} +\note{ +A primary data file can have a filename such as +\file{Yellowtail_snapper_Mexico.csv} and columns such as +\code{stockid|scientificname|commonname|year|catch|stocklong}. + +In addition, the data file may have columns called \code{best_effort} and/or +\code{best_index}, containing the effort and/or index series to be used in +the SOFIA analysis. + +This function creates four subdirectories: +\enumerate{ +\item \code{both} - for data files containing both effort and index data +\item \code{effort} - for data files containing effort data (and possibly + also index) +\item \code{index} - for data files containing index data (and possibly also + effort) +\item \code{neither} - for data files containing neither effort nor index + data +} + +The object returned has an attribute \code{count}, showing the number of data +files in each subdirectory. The number of original (unique) data files will +be: +\preformatted{ + both + (effort-both) + (index-both) + neither +} +} +\examples{ +\dontrun{ +groupData("Data_files_Area_31_3") +groupData("Data_files_Area_31_3", quiet=TRUE) +} + +} +\seealso{ +\code{\link{SOFIA-package}} gives an overview of the package. +} +\author{ +Arni Magnusson. +}