Skip to content

Commit

Permalink
fix namepaths with new ais data
Browse files Browse the repository at this point in the history
  • Loading branch information
david-beauchesne committed Jan 9, 2025
1 parent 3f0bbde commit c78ed9e
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions workspace/scripts/prc_shipping_ais.R
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,12 @@ prc_shipping_ais_points <- function(input_files, output_path) {
# output_path <- "workspace/data/harvested/shipping_ais-1.0.0/processed/"
# # dir.create(output_path)
# input_path <- "workspace/data/harvested/shipping_ais-1.0.0/raw/"
# input_files <- file.path(input_path, "2023AIS.zip")
# input_files <- list(
# file.path(input_path, "DFO-TC-AIS_TERR_SAT_FUSED_2023.zip"),
# file.path(input_path, "static_data_2023.csv")
# )
input_files <- unlist(input_files)


# Data
tmp <- file.path(output_path, "tmp/")
Expand All @@ -203,25 +206,27 @@ prc_shipping_ais_points <- function(input_files, output_path) {
write.csv(ship_info, file = file.path(output_path, "vessel_static_information.csv"), row.names = FALSE)

# Prepare for parallel processing
future::plan(future::multisession, workers = parallel::detectCores() - 3)
future::plan(future::multisession, workers = parallel::detectCores() / 2)

# List of parquet files
files <- list.files(parquet_db, full.names = TRUE)

# Process each parquet file in parallel and write directly to output
furrr::future_map(files, function(file) {
# furrr::future_map(files, function(file) {
for (file in files) {
processed_points <- process_parquet(file) |>
day_night() |>
create_track_ids(ship_info) |>
post_process_points()

# Extract the year and month from the file or data
split_name <- basename(file) |>
tools::file_path_sans_ext() |>
strsplit("_") |>
unlist() |>
tolower()
year <- split_name[3]
month <- split_name[2]
year <- split_name[4]
month <- split_name[5] |> tools::file_path_sans_ext()
month <- lubridate::ymd(paste("2023", month, "01")) |>
lubridate::month()

Expand All @@ -230,7 +235,7 @@ prc_shipping_ais_points <- function(input_files, output_path) {
processed_points,
file.path(output_path, glue::glue("shipping_ais_{year}_{month}.parquet"))
)
}, .options = furrr::furrr_options(seed = TRUE))
} # , .options = furrr::furrr_options(seed = TRUE))

# Clean up temporary files
fs::dir_delete(tmp)
Expand Down

0 comments on commit c78ed9e

Please sign in to comment.