From 4a575a1b9627b51aa7567041a84d07dc5ca429ff Mon Sep 17 00:00:00 2001 From: David Zimmermann-Kollenda Date: Mon, 15 Jan 2024 13:34:56 +0100 Subject: [PATCH] fix bug where gz functionality would write to user library or current directory --- CRAN-SUBMISSION | 6 ++-- NEWS.md | 4 +++ R/count_functions.R | 7 +++-- R/filter_itch.R | 5 ++-- R/gz_functions.R | 43 +++++++++++++++-------------- R/read_functions.R | 10 ++++--- cran-comments.md | 2 +- inst/tinytest/test_filter_itch.R | 35 +++++++++++------------ inst/tinytest/test_read_functions.R | 5 ++-- man/count_functions.Rd | 4 +++ man/read_functions.Rd | 4 +++ 11 files changed, 72 insertions(+), 53 deletions(-) diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index 3ef7e2d..ff0f6f0 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 0.1.24 -Date: 2024-01-11 14:52:01 UTC -SHA: 5b6d74f511f4d1706a428832df3753703ce29d2d +Version: 0.1.25 +Date: 2024-01-13 11:16:03 UTC +SHA: 7a40e5d69f5ab94bba85133e434b838a4c19536c diff --git a/NEWS.md b/NEWS.md index 2c9d25b..b816651 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# RITCH 0.1.26 + +* fix bug where gz functionality would write to user library or current directory + # RITCH 0.1.25 * fix Debian segfault when writing to user library diff --git a/R/count_functions.R b/R/count_functions.R index 4de06c0..af749b5 100644 --- a/R/count_functions.R +++ b/R/count_functions.R @@ -10,8 +10,9 @@ #' @param quiet if TRUE, the status messages are supressed, defaults to FALSE #' @param force_gunzip only applies if file is a gz-file and a file with the same (gunzipped) name already exists. #' if set to TRUE, the existing file is overwritten. Default value is FALSE +#' @param gz_dir a directory where the gz archive is extracted to. +#' Only applies if file is a gz archive. Default is [tempdir()]. #' @param force_cleanup only applies if file is a gz-file. If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards. -#' #' @return a data.table containing the message-type and their counts for `count_messages` #' or an integer value for the other functions. #' @export @@ -37,7 +38,7 @@ #' ### Specific class count functions are: count_messages <- function(file, add_meta_data = FALSE, buffer_size = -1, quiet = FALSE, force_gunzip = FALSE, - force_cleanup = TRUE) { + gz_dir = tempdir(), force_cleanup = TRUE) { t0 <- Sys.time() if (!file.exists(file)) stop(sprintf("File '%s' not found!", file)) @@ -48,7 +49,7 @@ count_messages <- function(file, add_meta_data = FALSE, buffer_size = -1, orig_file <- file # only needed for gz files; gz files are not deleted when the raw file already existed raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", file))) - file <- check_and_gunzip(file, buffer_size, force_gunzip, quiet) + file <- check_and_gunzip(file, gz_dir, buffer_size, force_gunzip, quiet) df <- count_messages_impl(file, buffer_size, quiet) df <- data.table::setalloccol(df) diff --git a/R/filter_itch.R b/R/filter_itch.R index 9189a62..788d176 100644 --- a/R/filter_itch.R +++ b/R/filter_itch.R @@ -159,7 +159,7 @@ filter_itch <- function(infile, outfile, orig_infile <- infile # only needed for gz files; gz files are not deleted when the raw file already existed raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", infile))) - infile <- check_and_gunzip(infile, buffer_size, force_gunzip, quiet) + infile <- check_and_gunzip(infile, dirname(outfile), buffer_size, force_gunzip, quiet) filter_itch_impl(infile, outfile, start, end, filter_msg_type, filter_stock_locate, @@ -169,7 +169,8 @@ filter_itch <- function(infile, outfile, if (gz) { if (!quiet) cat(sprintf("[gzip] outfile\n")) of <- outfile - outfile <- gzip_file(outfile) + outfile <- gzip_file(infile = outfile, + outfile = paste0(outfile, ".gz")) unlink(of) # delete the temporary file } diff --git a/R/gz_functions.R b/R/gz_functions.R index 068226e..8901063 100644 --- a/R/gz_functions.R +++ b/R/gz_functions.R @@ -63,7 +63,7 @@ gzip_file <- function(infile, infile, paste0(infile, ".gz")) # remove path - xx <- strsplit(outfile, "/")[[1]] + xx <- strsplit(outfile, "\\\\|/")[[1]] outfile <- xx[length(xx)] } if (file.exists(outfile)) unlink(outfile) @@ -77,34 +77,35 @@ gzip_file <- function(infile, return(invisible(outfile)) } -# Helper function -check_and_gunzip <- function(file, buffer_size, force_gunzip, quiet) { +# Helper function +# returns the (if needed gunzipped) file +# note that it only operates in the dir directory +check_and_gunzip <- function(file, dir = dirname(file), buffer_size, force_gunzip, quiet) { file <- path.expand(file) if (!grepl("\\.gz$", file)) return(file) - raw_file <- gsub("\\.gz$", "", file) + outfile <- file.path(dir, basename(gsub("\\.gz$", "", file))) # check if the raw-file at target directory already exists, if so use this (unless force_gunzip = TRUE) - if (file.exists(raw_file) && !quiet && !force_gunzip) { - cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip=TRUE)\n", raw_file)) - return(raw_file) + if (file.exists(outfile) && !quiet && !force_gunzip) { + cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip = TRUE)\n", + outfile)) + return(outfile) } - # look in current directory and extract to current directory if decompress needed - raw_file <- strsplit(raw_file, "/")[[1]] - raw_file <- raw_file[length(raw_file)] - # check if the raw-file at current directory already exists, if so use this (unless force_gunzip = TRUE) - if (file.exists(raw_file) && !quiet && !force_gunzip) { - cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip=TRUE)\n", raw_file)) - return(raw_file) - } - # if the unzipped file doesnt exist or the force_gunzip flag is set, unzip file - if (!file.exists(raw_file) || force_gunzip) { - unlink(raw_file) - if (!quiet) cat(sprintf("[Decompressing] '%s' to '%s'\n", file, raw_file)) + if (file.exists(outfile) && !force_gunzip) { + if (!quiet) + cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip = TRUE)\n", + outfile)) + return(outfile) + } else { + # if the unzipped file doesnt exist or the force_gunzip flag is set, unzip file + unlink(outfile) + if (!quiet) + cat(sprintf("[Decompressing] '%s' to '%s'\n", file, outfile)) - gunzip_file(file, raw_file, buffer_size) + gunzip_file(file, outfile, buffer_size) } - return(raw_file) + return(outfile) } diff --git a/R/read_functions.R b/R/read_functions.R index a156c09..f3544cc 100644 --- a/R/read_functions.R +++ b/R/read_functions.R @@ -75,6 +75,8 @@ #' @param add_meta if TRUE, the date and exchange information of the file are added, defaults to TRUE #' @param force_gunzip only applies if the input file is a gz-archive and a file with the same (gunzipped) name already exists. #' if set to TRUE, the existing file is overwritten. Default value is FALSE +#' @param gz_dir a directory where the gz archive is extracted to. +#' Only applies if file is a gz archive. Default is [tempdir()]. #' @param force_cleanup only applies if the input file is a gz-archive. #' If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards. #' Only applies when the gunzipped raw file did not exist before. @@ -144,7 +146,7 @@ read_itch <- function(file, filter_msg_class = NA, max_timestamp = bit64::as.integer64(NA), filter_stock = NA_character_, stock_directory = NA, buffer_size = -1, quiet = FALSE, add_meta = TRUE, - force_gunzip = FALSE, force_cleanup = TRUE) { + force_gunzip = FALSE, gz_dir = tempdir(), force_cleanup = TRUE) { t0 <- Sys.time() if (!file.exists(file)) stop(sprintf("File '%s' not found!", file)) @@ -226,8 +228,8 @@ read_itch <- function(file, filter_msg_class = NA, orig_file <- file # only needed for gz files; gz files are not deleted when the raw file already existed - raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", file))) - file <- check_and_gunzip(file, buffer_size, force_gunzip, quiet) + raw_file_existed <- file.exists(gsub("\\.gz$", "", file)) + file <- check_and_gunzip(file, gz_dir, buffer_size, force_gunzip, quiet) res_raw <- read_itch_impl(filter_msg_class, file, start, end, filter_msg_type, filter_stock_locate, @@ -276,7 +278,7 @@ read_itch <- function(file, filter_msg_class = NA, # if the file was gzipped and the force_cleanup=TRUE, delete unzipped file if (grepl("\\.gz$", orig_file) && force_cleanup && !raw_file_existed) { if (!quiet) cat(sprintf("[Cleanup] Removing file '%s'\n", file)) - unlink(basename(gsub("\\.gz$", "", file))) + unlink(gsub("\\.gz$", "", file)) } return(res) } diff --git a/cran-comments.md b/cran-comments.md index 50e1dd8..c4d20c9 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1 +1 @@ -fix bug around writing to user library in tests. \ No newline at end of file +Fix bug where the gz functionality would write to the current directory or to the user library. \ No newline at end of file diff --git a/inst/tinytest/test_filter_itch.R b/inst/tinytest/test_filter_itch.R index f502589..ceb14a3 100644 --- a/inst/tinytest/test_filter_itch.R +++ b/inst/tinytest/test_filter_itch.R @@ -9,7 +9,7 @@ outfile <- file.path(tempdir(), "testfile_20101224.TEST_ITCH_50") ################################################################################ - # Test that filtering for all trades returns all data entries +# Test that filtering for all trades returns all data entries orig <- read_itch(infile, quiet = TRUE) trades <- read_trades(infile, quiet = TRUE) expect_equal(orig$trades, trades) @@ -371,31 +371,32 @@ unlink(outfile) ################################################################################ # filter_itch works on gz input files -infile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH") +gzinfile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH") +tmpoutfile <- file.path(tempdir(), "gz_testfile_20101224.TEST_ITCH_50") -outfile_plain <- filter_itch(infile, outfile, filter_msg_class = "orders", - quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) -expect_equal(file.size(outfile_plain), 190012) +rawoutfile <- filter_itch(gzinfile, tmpoutfile, filter_msg_class = "orders", + quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) +expect_equal(rawoutfile, tmpoutfile) +expect_equal(file.size(rawoutfile), 190012) -odf <- read_orders(outfile_plain, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) -idf <- read_orders(infile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) +odf <- read_orders(rawoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) +idf <- read_orders(gzinfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) expect_equal(odf, idf) -unlink(outfile_plain) +unlink(rawoutfile) ################################################################################ # works also on gz-output files -tmpoutfile <- file.path(tempdir(), "gz_testfile_20101224.TEST_ITCH_50") - -gzoutfile <- filter_itch(infile, tmpoutfile, filter_msg_class = "orders", gz = TRUE, - quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) +rawoutfile <- filter_itch(gzinfile, tmpoutfile, filter_msg_class = "orders", gz = TRUE, + quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) -expect_true(file.exists(gzoutfile)) -expect_equal(file.size(gzoutfile), 72619) +expect_equal(rawoutfile, paste0(tmpoutfile, ".gz")) +expect_true(file.exists(rawoutfile)) +expect_equal(file.size(rawoutfile), 72619) -odf <- read_orders(gzoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) -idf <- read_orders(infile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) +odf <- read_orders(rawoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) +idf <- read_orders(gzinfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) expect_equal(odf, idf) -unlink(gzoutfile) +unlink(rawoutfile) unlink(tmpoutfile) diff --git a/inst/tinytest/test_read_functions.R b/inst/tinytest/test_read_functions.R index 846583d..e0686a7 100644 --- a/inst/tinytest/test_read_functions.R +++ b/inst/tinytest/test_read_functions.R @@ -38,9 +38,10 @@ expect_equal(ct, ct_exp) # force_cleanup = FALSE leaves the raw file behind ct2 <- count_messages(gzfile, quiet = TRUE, force_gunzip = TRUE, - force_cleanup = FALSE) + gz_dir = tempdir(), force_cleanup = FALSE) expect_equal(ct, ct2) -expect_true(file.exists(file_raw)) +expect_true(file.exists(file.path(tempdir(), + gsub("\\.gz$", "", basename(gzfile))))) unlink(file_raw) # check that force_cleanup works diff --git a/man/count_functions.Rd b/man/count_functions.Rd index 7160da6..6ff4872 100644 --- a/man/count_functions.Rd +++ b/man/count_functions.Rd @@ -24,6 +24,7 @@ count_messages( buffer_size = -1, quiet = FALSE, force_gunzip = FALSE, + gz_dir = tempdir(), force_cleanup = TRUE ) @@ -65,6 +66,9 @@ count_rpii(x) \item{force_gunzip}{only applies if file is a gz-file and a file with the same (gunzipped) name already exists. if set to TRUE, the existing file is overwritten. Default value is FALSE} +\item{gz_dir}{a directory where the gz archive is extracted to. +Only applies if file is a gz archive. Default is \code{\link[=tempdir]{tempdir()}}.} + \item{force_cleanup}{only applies if file is a gz-file. If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards.} \item{x}{a file or a data.table containing the message types and the counts, diff --git a/man/read_functions.Rd b/man/read_functions.Rd index 792b343..5516a43 100644 --- a/man/read_functions.Rd +++ b/man/read_functions.Rd @@ -36,6 +36,7 @@ read_itch( quiet = FALSE, add_meta = TRUE, force_gunzip = FALSE, + gz_dir = tempdir(), force_cleanup = TRUE ) @@ -126,6 +127,9 @@ if you have a large amount of RAM, 1e9 (1GB) might be faster} \item{force_gunzip}{only applies if the input file is a gz-archive and a file with the same (gunzipped) name already exists. if set to TRUE, the existing file is overwritten. Default value is FALSE} +\item{gz_dir}{a directory where the gz archive is extracted to. +Only applies if file is a gz archive. Default is \code{\link[=tempdir]{tempdir()}}.} + \item{force_cleanup}{only applies if the input file is a gz-archive. If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards. Only applies when the gunzipped raw file did not exist before.}