From 7d6596f7a5bf022a093e752b3ac4d676b54f57ac Mon Sep 17 00:00:00 2001 From: Sima Najafzadehkhoei Date: Tue, 10 Dec 2024 18:12:38 -0700 Subject: [PATCH] no warning no error --- NAMESPACE | 1 + R/dataprep.R | 79 +++++++++++++++++++++++------------------ R/run_simulations.R | 60 ++++++++----------------------- vignettes/calibrate.Rmd | 2 +- 4 files changed, 61 insertions(+), 81 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index ff1b2c6..2ee17d4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(run_simulations) export(simulate_calibrate_sir) export(split_data) export(train_model) +import(epiworldR) importFrom(data.table,as.data.table) importFrom(data.table,copy) importFrom(data.table,dcast) diff --git a/R/dataprep.R b/R/dataprep.R index 9028d7f..9ee7bae 100644 --- a/R/dataprep.R +++ b/R/dataprep.R @@ -4,9 +4,11 @@ #' @param max_days The maximum number of days to consider for data preparation. Defaults to 50. #' @return A reshaped array of processed data suitable for TensorFlow models. #' If an error occurs, it returns the error object. +#'@import epiworldR #' #' @export -prepare_data <- function(m, max_days = max_days) { + +prepare_data <- function(m, max_days =max_days) { err <- tryCatch({ ans <- list( @@ -15,25 +17,32 @@ prepare_data <- function(m, max_days = max_days) { gentime = epiworldR::plot_generation_time(m, plot = FALSE) ) - # Convert all elements to data.table + # Filling ans <- lapply(ans, data.table::as.data.table) - # Replace NA values in repnum$avg and gentime$avg with the last observed value - ans[["repnum"]][, avg := data.table::nafill(avg, type = "locf")] - ans[["gentime"]][, avg := data.table::nafill(avg, type = "locf")] + # Replacing NaN and NAs with the previous value + # in each element in the list + # Replace NA values with the last observed value + ans$repnum$avg <- data.table::nafill(ans$repnum$avg, type = "locf") + + # Replace NA values with the last observed value + ans$gentime$avg <- data.table::nafill(ans$gentime$avg, type = "locf") - # Filter up to max_days - ans[["repnum"]] <- ans[["repnum"]][date <= max_days, ] - ans[["gentime"]] <- ans[["gentime"]][date <= max_days, ] - # incidence is indexed by row number since date is not explicitly in that data - # We assume the first row represents day 0, hence (max_days + 1) rows total. - ans[["incidence"]] <- ans[["incidence"]][as.integer(.I) <= (max_days + 1), ] + # Filtering up to max_days + ans$repnum <- ans$repnum[ans$repnum$date <= max_days,] + ans$gentime <- ans$gentime[ans$gentime$date <= max_days,] + ans$incidence <- ans$incidence[as.integer(rownames(ans$incidence)) <= (max_days + 1),] - # Create a reference table for merging - ref_table <- data.table::data.table(date = 0:max_days) + # Reference table for merging + # ndays <- epiworldR::get_ndays(m) + + ref_table <- data.table::data.table( + date = 0:max_days + ) - # Merge repnum and gentime with the reference table to ensure consistent length + # Replace the $ with the [[ ]] to avoid the warning in the next + # two lines ans[["repnum"]] <- data.table::merge.data.table( ref_table, ans[["repnum"]], by = "date", all.x = TRUE ) @@ -41,42 +50,42 @@ prepare_data <- function(m, max_days = max_days) { ref_table, ans[["gentime"]], by = "date", all.x = TRUE ) - # Create a data.table with all required columns + + # Generating the data.table with necessary columns ans <- data.table::data.table( - infected = ans[["incidence"]][["Infected"]], - recovered = ans[["incidence"]][["Recovered"]], - repnum = ans[["repnum"]][["avg"]], - gentime = ans[["gentime"]][["avg"]], - repnum_sd = ans[["repnum"]][["sd"]], + infected = ans[["incidence"]][["Infected"]], + recovered = ans[["incidence"]][["Recovered"]], + repnum = ans[["repnum"]][["avg"]], + gentime = ans[["gentime"]][["avg"]], + repnum_sd = ans[["repnum"]][["sd"]], gentime_sd = ans[["gentime"]][["sd"]] ) - # Replace NA values in all relevant columns using locf + # Replace NA values with the last observed value for all columns nafill_cols <- c("infected", "recovered", "repnum", "gentime", "repnum_sd", "gentime_sd") + for (col in nafill_cols) { ans[[col]] <- data.table::nafill(ans[[col]], type = "locf") } - # Return ans as processed data - ans }, error = function(e) e) - # If there was an error, return it + # If there is an error, return NULL if (inherits(err, "error")) { return(err) } - # Remove the first observation (often zero) and take differences - # ans is now a data.table with rows representing days - # ans[-1, ] removes the first row - dprep <- t(diff(as.matrix(err[-1,]))) + # Returning without the first observation (which is mostly zero) + dprep <- t(diff(as.matrix(ans[-1,]))) - # Construct a 3D array with shape (1, n_features, n_timesteps) - # Here n_features = number of variables (rows of dprep after transpose) - # and n_timesteps = number of columns (days-1) - ans_array <- array(dim = c(1, dim(dprep)[1], dim(dprep)[2])) - ans_array[1,,] <- dprep + ans <- array(dim = c(1, dim(dprep))) + ans[1,,] <- dprep + abm_hist_feat <- ans + + tensorflow::array_reshape( + abm_hist_feat, + dim = c(1, dim(dprep)) + ) - # Reshape for TensorFlow input using keras3 (adjust if using another keras interface) - keras3::array_reshape(ans_array, dim = c(1, dim(dprep))) } + diff --git a/R/run_simulations.R b/R/run_simulations.R index 05c6bc2..ab436e3 100644 --- a/R/run_simulations.R +++ b/R/run_simulations.R @@ -15,53 +15,23 @@ #' @return A list containing the simulation results as matrices. #' @export run_simulations <- function(N, n, ndays, ncores, theta, seeds) { - os_type <- .Platform$OS.type + matrices <- parallel::mclapply(1:N, FUN = function(i) { + set.seed(seeds[i]) + m <- epiworldR:: ModelSIRCONN( + "mycon", + prevalence = theta$preval[i], + contact_rate = theta$crate[i], + transmission_rate = theta$ptran[i], + recovery_rate = theta$prec[i], + n = n + ) - if (os_type == "windows") { - cl <- parallel::makeCluster(ncores) - on.exit(parallel::stopCluster(cl)) + verbose_off(m) + run(m, ndays = ndays) + ans <- prepare_data(m,max_days=ndays) - parallel::clusterExport(cl, varlist = c("theta", "n", "ndays", "seeds", "prepare_data"), envir = environment()) - parallel::clusterEvalQ(cl, { - # Load needed packages on workers if required (if not already loaded) - # library(epiworldR) # Not allowed, we rely on namespaces now - }) - - matrices <- parallel::parLapply(cl, 1:N, function(i) { - set.seed(seeds[i]) - m <- epiworldR::ModelSIRCONN( - "mycon", - prevalence = theta$preval[i], - contact_rate = theta$crate[i], - transmission_rate = theta$ptran[i], - recovery_rate = theta$prec[i], - n = n - ) - - epiworldR::verbose_off(m) - epiworldR::run(m, ndays = ndays) - ans <- prepare_data(m, max_days = ndays) - return(ans) - }) - - } else { - matrices <- parallel::mclapply(1:N, function(i) { - set.seed(seeds[i]) - m <- epiworldR::ModelSIRCONN( - "mycon", - prevalence = theta$preval[i], - contact_rate = theta$crate[i], - transmission_rate = theta$ptran[i], - recovery_rate = theta$prec[i], - n = n - ) - - epiworldR::verbose_off(m) - epiworldR::run(m, ndays = ndays) - ans <- prepare_data(m, max_days = ndays) - return(ans) - }, mc.cores = ncores) - } + return(ans) + }, mc.cores = ncores) return(matrices) } diff --git a/vignettes/calibrate.Rmd b/vignettes/calibrate.Rmd index 7f81bdc..1e9ece1 100644 --- a/vignettes/calibrate.Rmd +++ b/vignettes/calibrate.Rmd @@ -62,7 +62,7 @@ The function's arguments are: The first step in the function is to generate the parameter sets (`theta`) and random seeds for the simulation: ```{r} -N=2e4 +N=10 n=5000 theta <- generate_theta(N, n) head(theta,5)