Added widen_model_data to clean up process

lynker-spatial · Oct 11, 2024 · 8403c5a · 8403c5a
1 parent d752f88
commit 8403c5a
Show file tree

Hide file tree

Showing 7 changed files with 73 additions and 19 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -11,8 +11,10 @@ export(ModelScatter)
 export(PlotSeason)
 export(Plot_WWLvsSTR)
 export(SiteData)
+export(balance_data)
 export(model_clean)
 export(process_model_data)
+export(widen_model_data)
 importFrom(RColorBrewer,brewer.pal)
 importFrom(arrow,read_parquet)
 importFrom(aws.s3,get_bucket_df)

diff --git a/R/CABCMParquetRead.R b/R/CABCMParquetRead.R
@@ -19,12 +19,6 @@
 #' }
 #'
 #'
-#' @examples
-#' # Read and combine all Parquet files from the "cabcm" subfolder
-#' new_cabcm_data <- CABCMParquetRead()
-#'
-#' # Read and combine all Parquet files from a different subfolder
-#' other_data <- CABCMParquetRead(sub = "another_subfolder")
 #' @export
 
 

diff --git a/R/balance_data.R b/R/balance_data.R
@@ -17,7 +17,8 @@
 #'   \item **Filter and Adjust Data**: Filters the data to keep only the relevant variables and multiplies values for specific variables by -1 to facilitate subtraction from `ppt`.
 #'   \item **Save to Global Environment**: Depending on the presence of the `rch` variable, the function saves the processed data to the global environment with an appropriate name for future use.
 #' }
-#'
+#' 
+#'@export
 
 balance_data <- function(x){
 

diff --git a/R/process_model_data.R b/R/process_model_data.R
@@ -39,11 +39,6 @@ process_model_data <- function(data, NewDivides) {
       mutate(ERR = ppt - aet - run - str)
   }
 
-  if ("rch" %in% names(data_wide)) {
-    assign("cabcm_data_wide", data_wide, envir = WaterBalanceSummaryEnv)
-  } else {
-    assign("terra_data_wide", data_wide, envir = WaterBalanceSummaryEnv)
-  }
 
   # Step 2: Define get_season function
   get_season <- function(month) {

diff --git a/R/widen_model_data.R b/R/widen_model_data.R
@@ -0,0 +1,41 @@
+#' Widen Model Data and Calculate Error
+#'
+#' This function processes the output of a cleaned model dataset by pivoting the data to a wide format and calculating the error (ERR) based on available variables. The error is calculated as the difference between `ppt`, `aet`, and other relevant variables such as `rch`, `run`, and `str`.
+#'
+#' @param data A data frame containing model output. The data should have columns for `var`, `value`, `ppt`, `aet`, `run`, `str`, and optionally `rch`.
+#'
+#' @return A data frame in wide format with an additional column `ERR`, representing the calculated error.
+#'
+#' @details
+#' The function performs the following steps:
+#' \itemize{
+#'   \item **Pivot Wider**: Converts the data to wide format with separate columns for each variable.
+#'   \item **Error Calculation**: Calculates the error (`ERR`) as `ppt - aet - rch - run - str` when `rch` is available, or `ppt - aet - run - str` when it's not.
+#'   \item **Filter**: Filters out rows where `divide_id` is equal to `"cat-351"`.
+#' }
+#'
+#' @note Ensure that the input data contains the required columns: `var`, `value`, `ppt`, `aet`, `run`, `str`, and optionally `rch`.
+#'
+#' @export
+
+
+widen_model_data <- function(data) {
+
+
+  var <- value <- ppt <- aet <- rch <- run <- str <- divide_id <- ERR <- NULL
+
+  # Step 1: Pivot wider and calculate ERR
+  data_wide <- data %>%
+    pivot_wider(names_from = var, values_from = value)
+
+  if ("rch" %in% names(data_wide)) {
+    data_wide <- data_wide %>%
+      mutate(ERR = ppt - aet - rch - run - str) %>%
+      filter(divide_id != "cat-351")
+  } else {
+    data_wide <- data_wide %>%
+      mutate(ERR = ppt - aet - run - str)
+  }
+  # Return a list of data frames for each season
+  return(data_wide)
+}
diff --git a/man/CABCMParquetRead.Rd b/man/CABCMParquetRead.Rd
diff --git a/man/widen_model_data.Rd b/man/widen_model_data.Rd