From 91b49cac6ac2555cf7b2433e47d6e5c17029720d Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Tue, 25 Jun 2024 03:38:11 -0400 Subject: [PATCH 1/2] Attempt to implement dimension_separator --- src/Storage/Storage.jl | 13 +++++++------ src/Storage/s3store.jl | 1 + src/ZArray.jl | 17 ++++++++++++++++- src/metadata.jl | 16 +++++++++++----- test/runtests.jl | 4 +++- 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/Storage/Storage.jl b/src/Storage/Storage.jl index e6355b6..3fe1451 100644 --- a/src/Storage/Storage.jl +++ b/src/Storage/Storage.jl @@ -47,17 +47,18 @@ function subkeys end Deletes the given key from the store. """ -citostring(i::CartesianIndex) = join(reverse((i - oneunit(i)).I), '.') -citostring(::CartesianIndex{0}) = "0" +citostring(i::CartesianIndex, sep::Char='.') = join(reverse((i - oneunit(i)).I), sep) +citostring(::CartesianIndex{0}, _) = "0" +citostring(i::CartesianIndex, s::AbstractStore, p) = (@info("citostring",i,s,p);citostring(i, only(getmetadata(s, p, true).dimension_separator))) _concatpath(p,s) = isempty(p) ? s : rstrip(p,'/') * '/' * s -Base.getindex(s::AbstractStore, p, i::CartesianIndex) = s[p, citostring(i)] +Base.getindex(s::AbstractStore, p, i::CartesianIndex) = s[p, citostring(i, s, p)] Base.getindex(s::AbstractStore, p, i) = s[_concatpath(p,i)] -Base.delete!(s::AbstractStore, p, i::CartesianIndex) = delete!(s, p, citostring(i)) +Base.delete!(s::AbstractStore, p, i::CartesianIndex) = delete!(s, p, citostring(i, s, p)) Base.delete!(s::AbstractStore, p, i) = delete!(s, _concatpath(p,i)) Base.haskey(s::AbstractStore, k) = isinitialized(s,k) Base.setindex!(s::AbstractStore,v,p,i) = setindex!(s,v,_concatpath(p,i)) -Base.setindex!(s::AbstractStore,v,p,i::CartesianIndex) = s[p, citostring(i)]=v +Base.setindex!(s::AbstractStore,v,p,i::CartesianIndex) = s[p, citostring(i, s, p)]=v maybecopy(x) = copy(x) @@ -82,7 +83,7 @@ end is_zgroup(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zgroup")) is_zarray(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zarray")) -isinitialized(s::AbstractStore, p, i::CartesianIndex)=isinitialized(s,p,citostring(i)) +isinitialized(s::AbstractStore, p, i::CartesianIndex)=isinitialized(s, p, citostring(i, s, p)) isinitialized(s::AbstractStore, p, i) = isinitialized(s,_concatpath(p,i)) isinitialized(s::AbstractStore, i) = s[i] !== nothing diff --git a/src/Storage/s3store.jl b/src/Storage/s3store.jl index aaab004..10f8bdd 100644 --- a/src/Storage/s3store.jl +++ b/src/Storage/s3store.jl @@ -22,6 +22,7 @@ function Base.getindex(s::S3Store, i::String) return s3_get(s.aws,s.bucket,i,raw=true,retry=false) catch e if e isa AWSS3.AWS.AWSException && e.code == "NoSuchKey" + @info "getindex(::S3Store, $i)" s.aws s.bucket i return nothing else throw(e) diff --git a/src/ZArray.jl b/src/ZArray.jl index 1cd7255..4181757 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -329,10 +329,24 @@ function zcreate(::Type{T},storage::AbstractStore, fill_value=nothing, fill_as_missing=false, compressor=BloscCompressor(), - filters = filterfromtype(T), + filters = filterfromtype(T), + dimension_separator='.', attrs=Dict(), writeable=true, ) where T + + if compressor isa AbstractString + if haskey(compressortypes, String(compressor)) + compressor = compressortypes[compressor]() + else + throw(UnknownCompressorException(compressor)) + end + end + + if dimension_separator isa AbstractString + # Convert AbstractString to Char + dimension_separator = only(dimension_separator) + end length(dims) == length(chunks) || throw(DimensionMismatch("Dims must have the same length as chunks")) N = length(dims) @@ -347,6 +361,7 @@ function zcreate(::Type{T},storage::AbstractStore, fill_value, 'C', filters, + dimension_separator, ) isemptysub(storage,path) || error("$storage $path is not empty") diff --git a/src/metadata.jl b/src/metadata.jl index c7660e7..10df800 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -131,14 +131,15 @@ struct Metadata{T, N, C, F} fill_value::Union{T, Nothing} order::Char filters::F # not yet supported - function Metadata{T2, N, C, F}(zarr_format, shape, chunks, dtype, compressor,fill_value, order, filters) where {T2,N,C,F} + dimension_separator::Char + function Metadata{T2, N, C, F}(zarr_format, shape, chunks, dtype, compressor,fill_value, order, filters, dimension_separator) where {T2,N,C,F} #We currently only support version zarr_format == 2 || throw(ArgumentError("Zarr.jl currently only support v2 of the protocol")) #Do some sanity checks to make sure we have a sane array any(<(0), shape) && throw(ArgumentError("Size must be positive")) any(<(1), chunks) && throw(ArgumentError("Chunk size must be >= 1 along each dimension")) order === 'C' || throw(ArgumentError("Currently only 'C' storage order is supported")) - new{T2, N, C, F}(zarr_format, Base.RefValue{NTuple{N,Int}}(shape), chunks, dtype, compressor,fill_value, order, filters) + new{T2, N, C, F}(zarr_format, Base.RefValue{NTuple{N,Int}}(shape), chunks, dtype, compressor,fill_value, order, filters, dimension_separator) end end @@ -152,7 +153,8 @@ function ==(m1::Metadata, m2::Metadata) m1.compressor == m2.compressor && m1.fill_value == m2.fill_value && m1.order == m2.order && - m1.filters == m2.filters + m1.filters == m2.filters && + m1.dimension_separator == m2.dimension_separator end @@ -163,6 +165,7 @@ function Metadata(A::AbstractArray{T, N}, chunks::NTuple{N, Int}; fill_value::Union{T, Nothing}=nothing, order::Char='C', filters::Nothing=nothing, + dimension_separator::Char='.', fill_as_missing = false, ) where {T, N, C} T2 = (fill_value === nothing || !fill_as_missing) ? T : Union{T,Missing} @@ -174,7 +177,8 @@ function Metadata(A::AbstractArray{T, N}, chunks::NTuple{N, Int}; compressor, fill_value, order, - filters + filters, + dimension_separator, ) end @@ -207,6 +211,7 @@ function Metadata(d::AbstractDict, fill_as_missing) fv, first(d["order"]), filters, + only(get(d, "dimension_separator", '.')), ) end @@ -220,7 +225,8 @@ function JSON.lower(md::Metadata) "compressor" => md.compressor, "fill_value" => fill_value_encoding(md.fill_value), "order" => md.order, - "filters" => md.filters + "filters" => md.filters, + "dimension_separator" => md.dimension_separator, ) end diff --git a/test/runtests.jl b/test/runtests.jl index 78067b9..06aa5e7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -77,7 +77,9 @@ end "zarr_format" => 2, "chunks" => [3, 2], "fill_value" => nothing, - "compressor" => nothing) + "compressor" => nothing, + "dimension_separator" => ".", + ) # call gc to avoid unlink: operation not permitted (EPERM) on Windows # might be because files are left open # from https://github.com/JuliaLang/julia/blob/f6344d32d3ebb307e2b54a77e042559f42d2ebf6/stdlib/SharedArrays/test/runtests.jl#L146 From 174689300579f2d20b6631f72a2ba5cdf6b1b072 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Fri, 28 Jun 2024 03:58:19 -0400 Subject: [PATCH 2/2] Fix ambiguity with citostring --- src/Storage/Storage.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storage/Storage.jl b/src/Storage/Storage.jl index 3fe1451..3b4b2d4 100644 --- a/src/Storage/Storage.jl +++ b/src/Storage/Storage.jl @@ -48,8 +48,8 @@ Deletes the given key from the store. """ citostring(i::CartesianIndex, sep::Char='.') = join(reverse((i - oneunit(i)).I), sep) -citostring(::CartesianIndex{0}, _) = "0" -citostring(i::CartesianIndex, s::AbstractStore, p) = (@info("citostring",i,s,p);citostring(i, only(getmetadata(s, p, true).dimension_separator))) +citostring(::CartesianIndex{0}, _::Char) = "0" +citostring(i::CartesianIndex, s::AbstractStore, p) = citostring(i, only(getmetadata(s, p, true).dimension_separator)) _concatpath(p,s) = isempty(p) ? s : rstrip(p,'/') * '/' * s Base.getindex(s::AbstractStore, p, i::CartesianIndex) = s[p, citostring(i, s, p)]