From d932969ba53252bb0fabb5616191f36000b5d9ea Mon Sep 17 00:00:00 2001 From: Alexander Barth Date: Thu, 8 Feb 2024 11:40:55 +0100 Subject: [PATCH] omit chunks on client HTTP error (like 404, 403,...) (#134) * omit chunk on client HTTP error * Allow non-404 errors only if user sets it * Add a doc string * add a test case --------- Co-authored-by: Fabian Gans --- src/Storage/http.jl | 36 +++++++++++++++++++++++++++++++----- test/storage.jl | 12 ++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/src/Storage/http.jl b/src/Storage/http.jl index 2bf79af..1fa8962 100644 --- a/src/Storage/http.jl +++ b/src/Storage/http.jl @@ -12,15 +12,24 @@ python package. In case you experience performance options, one can try to use """ struct HTTPStore <: AbstractStore url::String + allowed_codes::Set{Int} end +HTTPStore(url) = HTTPStore(url,Set((404,))) function Base.getindex(s::HTTPStore, k::String) r = HTTP.request("GET",string(s.url,"/",k),status_exception = false,socket_type_tls=OpenSSL.SSLStream) if r.status >= 300 - if r.status == 404 + if r.status in s.allowed_codes nothing else - error("Error connecting to $(s.url) :", String(r.body)) + err_msg = + """Received error code $(r.status) when connecting to $(s.url) with message $(String(r.body)). + This might be an actual error or an indication that the server returns a different error code + than 404 for missing chunks. In the later case you can run + `Zarr.missing_chunk_return_code!(a.storage,$(r.status))` where a is your Zarr array or group to + fix the issue. + """ + throw(ErrorException(err_msg)) end else r.body @@ -32,11 +41,28 @@ push!(storageregexlist,r"^https://"=>HTTPStore) push!(storageregexlist,r"^http://"=>HTTPStore) storefromstring(::Type{<:HTTPStore}, s,_) = ConsolidatedStore(HTTPStore(s),""),"" +""" + missing_chunk_return_code!(s::HTTPStore, code::Union{Int,AbstractVector{Int}}) + +Extends the list of HTTP return codes that signals that a certain key in a HTTPStore is not available. Most data providers +return code 404 for missing elements, but some may use different return codes like 403. This function can be used +to add return codes that signal missing chunks. + +### Example + +````julia +a = zopen("https://path/to/remote/array") +missing_chunk_return_code!(a.storage, 403) +```` +""" +missing_chunk_return_code!(s::ConsolidatedStore,code) = missing_chunk_return_code!(s.parent,code) +missing_chunk_return_code!(s::HTTPStore, code::Integer) = push!(s.allowed_codes,code) +missing_chunk_return_code!(s::HTTPStore, codes::AbstractVector{<:Integer}) = foreach(c->push!(s.allowed_codes,c),codes) store_read_strategy(::HTTPStore) = ConcurrentRead(concurrent_io_tasks[]) ## This is a server implementation for Zarr datasets -function zarr_req_handler(s::AbstractStore, p) +function zarr_req_handler(s::AbstractStore, p, notfound = 404) if s[p,".zmetadata"] === nothing consolidate_metadata(s) end @@ -47,12 +73,12 @@ function zarr_req_handler(s::AbstractStore, p) r = s[p,k] try if r === nothing - return HTTP.Response(404, "Error: Key $k not found") + return HTTP.Response(notfound, "Error: Key $k not found") else return HTTP.Response(200, r) end catch e - return HTTP.Response(404, "Error: $e") + return HTTP.Response(notfound, "Error: $e") end end end diff --git a/test/storage.jl b/test/storage.jl index 1dde7b8..a478633 100644 --- a/test/storage.jl +++ b/test/storage.jl @@ -146,4 +146,16 @@ end @test g2["a1"].attrs == Dict("arratt"=>2.5) @test g2["a1"][:,:] == reshape(1:200,10,20) close(server) + #Test server that returns 403 instead of 404 for missing chunks + server = Sockets.listen(0) + ip,port = getsockname(server) + s = Zarr.DictStore() + g = zgroup(s, attrs = Dict("groupatt"=>5)) + a = zcreate(Int,g,"a",10,20,chunks=(5,5),attrs=Dict("arratt"=>2.5),fill_value = -1) + @async HTTP.serve(Zarr.zarr_req_handler(s,g.path,403),ip,port,server=server) + g3 = zopen("http://$ip:$port") + @test_throws "Received error code 403" g3["a"][:,:] + Zarr.missing_chunk_return_code!(g3.storage,403) + @test all(==(-1),g3["a"][:,:]) + close(server) end