From 000860cfd227215afef71e6303f36ddc2886a5e3 Mon Sep 17 00:00:00 2001 From: Paul Lietar Date: Tue, 3 Sep 2024 18:32:07 +0100 Subject: [PATCH] Correctly close files when uploading them. When uploading files to outpack, orderly was consistently leaking file handles. While these handles would eventually be garbage collected and closed, the GC would print a lot of noise onto the console: ``` Warning messages: 1: closing unused connection 10 (/tmp/RtmpN8ck5L/filecc11f7514a5e7) 2: closing unused connection 9 (/tmp/RtmpN8ck5L/filecc11f1c3c766d/archive/data/20240903-173208-56e33176/data.rds) 3: closing unused connection 8 (/tmp/RtmpN8ck5L/filecc11f22b55a39/archive/c/20240903-173208-03afd21d/script.R) 4: closing unused connection 7 (/tmp/RtmpN8ck5L/filecc11f22b55a39/archive/c/20240903-173208-03afd21d/data.rds) 5: closing unused connection 6 (/tmp/RtmpN8ck5L/filecc11f22b55a39/archive/b/20240903-173207-e4d60aab/script.R) 6: closing unused connection 5 (/tmp/RtmpN8ck5L/filecc11f22b55a39/archive/b/20240903-173207-e4d60aab/data.rds) 7: closing unused connection 4 (/tmp/RtmpN8ck5L/filecc11f22b55a39/archive/a/20240903-173207-d2210a12/data.rds) ``` This seems to be like an httr/httr2 bug (the behaviour was present both before and after the upgrade), but I cannot find much reference to this issue online. There's a brief mention of it in the [httr2 source][req-body.R], saying it "leaks connection if [the] request doesn't complete", however I am seeing this behaviour even for succesful requests. The httr2 implementation does not make much sense to me actually: it only closes the connections on short reads, but as far as I can tell libcurl always provides an accurate `nbytes` argument to the callback, meaning short reads never actually occur. This works around that issue by not using httr2's implementation of file upload, and instead implements an alternative where the caller provides and already opened connection. It is the caller's responsibility to close it after the request is complete. The only place we use this, `orderly_location_http$put_file`, does so using a `withr::defer` call to ensure this happens across all flow paths, even errors. I don't think we can easily write tests for this, since the garbage collector closes the handles for us already. The GC warnings don't get detected by `evaluate_promise` or `expect_silent`. [req-body.R]: https://github.com/r-lib/httr2/blob/0b795b664ce652c5bfeaf81b7048df389ec05689/R/req-body.R#L210 --- DESCRIPTION | 2 +- R/location_http.R | 9 ++++++++- R/outpack_http_client.R | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 26fe812d..1ddd209d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: orderly2 Title: Orderly Next Generation -Version: 1.99.32 +Version: 1.99.33 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("Robert", "Ashton", role = "aut"), diff --git a/R/location_http.R b/R/location_http.R index 66205a03..47e84f3d 100644 --- a/R/location_http.R +++ b/R/location_http.R @@ -70,9 +70,16 @@ orderly_location_http <- R6::R6Class( }, push_file = function(src, hash) { + size <- file.info(src)$size + con <- file(src, "rb") + withr::defer(close(con)) + res <- private$client$request( sprintf("/file/%s", hash), - function(r) httr2::req_body_file(r, src, "application/octet-stream")) + function(r) { + http_body_connection(r, con, size, "application/octet-stream") + }) + invisible(NULL) }, diff --git a/R/outpack_http_client.R b/R/outpack_http_client.R index 94a33c55..6954f8b5 100644 --- a/R/outpack_http_client.R +++ b/R/outpack_http_client.R @@ -33,6 +33,22 @@ outpack_http_client <- R6::R6Class( } )) + +#' Attach a connection as the request's body. +#' +#' It is the caller's responsibility to keep the connection open for the +#' duration of the request and to close it afterwards. +http_body_connection <- function(request, con, size, type) { + request <- httr2::req_headers(request, "Content-Type" = type) + request <- httr2::req_options( + request, + post = TRUE, + readfunction = function(nbytes, ...) readBin(con, "raw", nbytes), + seekfunction = function(offset, ...) seed(con, offset), + postfieldsize_large = size + ) +} + http_client_request <- function(url, customize = identity, download = NULL, parse_json = TRUE) { req <- httr2::request(url)