Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for symbols as keys #130

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/src/s3examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ the overhead of repeatedly requesting many metadata files:
g = zopen(store.zstore[1], consolidated=true)
````

You can access the meta-information through `g.attrs` or for example read the first
You can access the meta-information through `attributes(g)` or for example read the first
time slice through

````julia
Expand Down
7 changes: 2 additions & 5 deletions docs/src/storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@ CurrentModule = Zarr

```@docs
storagesize
zname
Zarr.zname
Base.getindex(d::AbstractStore,i::String)
Base.setindex!(d::AbstractStore,v,i::String)
subdirs
Base.keys(d::AbstractStore)
newsub
getsub
Zarr.subdirs
```

You can get some inspiration on how to implement this by looking at the source code of existing storage backends.
37 changes: 21 additions & 16 deletions src/ZArray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,12 @@ function Base.show(io::IO,::MIME"text/plain",z::ZArray)
print(io, "ZArray{", eltype(z) ,"} of size ",join(string.(size(z)), " x "))
end

zname(z::ZArray) = zname(z.path)
"""
zname(z::ZArray)

Extract the name of a Zarr array from the object
"""
zname(z::ZArray) = zname(path(z))

function zname(s::String)
spl = split(rstrip(s,'/'),'/')
Expand All @@ -66,7 +71,7 @@ storagesize(z::ZArray)

Returns the size of the compressed data stored in the ZArray `z` in bytes
"""
storagesize(z::ZArray) = storagesize(z.storage,z.path)
storagesize(z::ZArray) = storagesize(storage(z),path(z))

"""
storageratio(z::ZArray)
Expand All @@ -84,18 +89,18 @@ nobytes(z::ZArray{<:String}) = "unknown"
zinfo(z::ZArray) = zinfo(stdout,z)
function zinfo(io::IO,z::ZArray)
ninit = sum(chunkindices(z)) do i
isinitialized(z.storage,z.path,i)
isinitialized(storage(z),path(z),i)
end
allinfos = [
"Type" => "ZArray",
"Data type" => eltype(z),
"Shape" => size(z),
"Chunk Shape" => z.metadata.chunks,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does z.metadata.chunks need to be updated now that we have . inexing

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. It is only ZGroups that have the new . indexing where we need to be careful, ZArrays would still behave the same

"Order" => z.metadata.order,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does z.metadata.order need to be updated now that we have . inexing

"Read-Only" => !z.writeable,
"Read-Only" => !iswriteable(z),
"Compressor" => z.metadata.compressor,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment for all occurrences ofz.metadata

"Filters" => z.metadata.filters,
"Store type" => z.storage,
"Store type" => storage(z),
"No. bytes" => nobytes(z),
"No. bytes stored" => storagesize(z),
"Storage ratio" => storageratio(z),
Expand Down Expand Up @@ -159,10 +164,10 @@ function readblock!(aout::AbstractArray{<:Any,N}, z::ZArray{<:Any, N}, r::Cartes
#bufferdict = IdDict((current_task()=>getchunkarray(z),))
a = getchunkarray(z)
# Now loop through the chunks
c = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(z.storage))
c = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(storage(z)))

task = @async begin
read_items!($z.storage,c, $z.path, $blockr)
read_items!($storage(z),c, $path(z), $blockr)
end
bind(c,task)

Expand All @@ -186,7 +191,7 @@ end

function writeblock!(ain::AbstractArray{<:Any,N}, z::ZArray{<:Any, N}, r::CartesianIndices{N}) where {N}

z.writeable || error("Can not write to read-only ZArray")
iswriteable(z) || error("Can not write to read-only ZArray")

input_base_offsets = map(i->first(i)-1,r.indices)
# Determines which chunks are affected
Expand All @@ -195,17 +200,17 @@ function writeblock!(ain::AbstractArray{<:Any,N}, z::ZArray{<:Any, N}, r::Cartes
#bufferdict = IdDict((current_task()=>getchunkarray(z),))
a = getchunkarray(z)
# Now loop through the chunks
readchannel = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(z.storage))
readchannel = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(storage(z)))

readtask = @async begin
read_items!(z.storage,readchannel, z.path, blockr)
read_items!(storage(z),readchannel, path(z), blockr)
end
bind(readchannel,readtask)

writechannel = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(z.storage))
writechannel = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(storage(z)))

writetask = @async begin
write_items!(z.storage,writechannel,z.path,blockr)
write_items!(storage(z),writechannel,path(z),blockr)
end
bind(writechannel,writetask)

Expand Down Expand Up @@ -394,9 +399,9 @@ function zzeros(T,dims...;kwargs...)
z = zcreate(T,dims...;kwargs...)
as = zeros(T, z.metadata.chunks...)
data_encoded = compress_raw(as,z)
p = z.path
p = path(z)
for i in chunkindices(z)
z.storage[p,i] = data_encoded
storage(z)[p,i] = data_encoded
end
z
end
Expand All @@ -414,9 +419,9 @@ function Base.resize!(z::ZArray{T,N}, newsize::NTuple{N}) where {T,N}
z.metadata.shape[] = newsize
#Check if array was shrunk
if any(map(<,newsize, oldsize))
prune_oob_chunks(z.storage,z.path,oldsize,newsize, z.metadata.chunks)
prune_oob_chunks(storage(z),path(z),oldsize,newsize, z.metadata.chunks)
end
writemetadata(z.storage, z.path, z.metadata)
writemetadata(storage(z), path(z), z.metadata)
nothing
end
Base.resize!(z::ZArray, newsize::Integer...) = resize!(z,newsize)
Expand Down
69 changes: 50 additions & 19 deletions src/ZGroup.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,20 @@ struct ZGroup{S<:AbstractStore}
attrs::Dict
writeable::Bool
end
const ZArrayOrGroup = Union{ZArray, ZGroup}
storage(a::ZArrayOrGroup)=getfield(a,:storage)
path(a::ZArrayOrGroup)=getfield(a,:path)
attributes(a::ZArrayOrGroup)=getfield(a,:attrs)
iswriteable(a::ZArrayOrGroup)=getfield(a,:writeable)
arrays(g::ZGroup)=getfield(g,:arrays)
groups(g::ZGroup)=getfield(g,:groups)
export attributes

# path can also be a SubString{String}
ZGroup(storage, path::AbstractString, arrays, groups, attrs, writeable) =
ZGroup(storage, String(path), arrays, groups, attrs, writeable)

zname(g::ZGroup) = zname(g.path)
zname(g::ZGroup) = zname(path(g))

#Open an existing ZGroup
function ZGroup(s::T,mode="r",path="";fill_as_missing=false) where T <: AbstractStore
Expand Down Expand Up @@ -58,23 +66,46 @@ function zopen_noerr(s::AbstractStore, mode="r";
end

function Base.show(io::IO, g::ZGroup)
print(io, "ZarrGroup at ", g.storage, " and path ", g.path)
!isempty(g.arrays) && print(io, "\nVariables: ", map(i -> string(zname(i), " "), values(g.arrays))...)
!isempty(g.groups) && print(io, "\nGroups: ", map(i -> string(zname(i), " "), values(g.groups))...)
print(io, "ZarrGroup at ", storage(g), " and path ", path(g))
!isempty(arrays(g)) && print(io, "\nVariables: ", map(i -> string(zname(i), " "), values(arrays(g)))...)
!isempty(groups(g)) && print(io, "\nGroups: ", map(i -> string(zname(i), " "), values(groups(g)))...)
nothing
end
Base.haskey(g::ZGroup,k)= haskey(g.groups,k) || haskey(g.arrays,k)
Base.haskey(g::ZGroup,k)= haskey(groups(g),string(k)) || haskey(arrays(g),string(k))


function Base.getindex(g::ZGroup, k)
if haskey(g.groups, k)
return g.groups[k]
elseif haskey(g.arrays, k)
return g.arrays[k]
function Base.getindex(g::ZGroup, k::AbstractString)
if haskey(groups(g), k)
return groups(g)[k]
elseif haskey(arrays(g), k)
return arrays(g)[k]
else
throw(KeyError("Zarr Dataset does not contain $k"))
end
end
Base.getindex(g::ZGroup,k)=getindex(g,string(k))
function Base.propertynames(g::ZGroup,private::Bool=false)
p = if private
Symbol[:attrs]
else
Symbol[]
end
for k in keys(groups(g))
push!(p,Symbol(k))
end
for k in keys(arrays(g))
push!(p,Symbol(k))
end
p
end

function Base.getproperty(g::ZGroup, k::Symbol)
if k === :attrs
@warn "Accessing attributes through `.attrs` is not recommended anymore. Please use `attributes(g)` instead."
return getfield(g,:attrs)
else
return g[k]
end
end

"""
zopen(s::AbstractStore, mode="r"; consolidated = false, path = "", lru = 0)
Expand Down Expand Up @@ -142,21 +173,21 @@ zgroup(s::String;kwargs...)=zgroup(storefromstring(s, true)...;kwargs...)

"Create a subgroup of the group g"
function zgroup(g::ZGroup, name; attrs=Dict())
g.writeable || throw(IOError("Zarr group is not writeable. Please re-open in write mode to create an array"))
g.groups[name] = zgroup(g.storage,_concatpath(g.path,name),attrs=attrs)
iswriteable(g) || throw(IOError("Zarr group is not writeable. Please re-open in write mode to create an array"))
groups(g)[name] = zgroup(storage(g),_concatpath(path(g),name),attrs=attrs)
end

"Create a new subarray of the group g"
function zcreate(::Type{T},g::ZGroup, name::AbstractString, addargs...; kwargs...) where T
g.writeable || throw(IOError("Zarr group is not writeable. Please re-open in write mode to create an array"))
iswriteable(g) || throw(IOError("Zarr group is not writeable. Please re-open in write mode to create an array"))
name = string(name)
z = zcreate(T, g.storage, addargs...; path = _concatpath(g.path,name), kwargs...)
g.arrays[name] = z
z = zcreate(T, storage(g), addargs...; path = _concatpath(path(g),name), kwargs...)
arrays(g)[name] = z
return z
end

HTTP.serve(s::Union{ZArray,ZGroup}, args...; kwargs...) = HTTP.serve(s.storage, s.path, args...; kwargs...)
HTTP.serve(s::Union{ZArray,ZGroup}, args...; kwargs...) = HTTP.serve(storage(s), path(s), args...; kwargs...)
function consolidate_metadata(z::Union{ZArray,ZGroup})
z.writeable || throw(Base.IOError("Zarr group is not writeable. Please re-open in write mode to create an array",0))
consolidate_metadata(z.storage,z.path)
iswriteable(z) || throw(Base.IOError("Zarr group is not writeable. Please re-open in write mode to create an array",0))
consolidate_metadata(storage(z),path(z))
end
10 changes: 5 additions & 5 deletions test/python.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,11 @@ zarr.consolidate_metadata($ppython)
#Open in Julia
g = zopen(ppython)
@test g isa Zarr.ZGroup
@test g.attrs["groupatt"] == "Hi"
@test attributes(g)["groupatt"] == "Hi"
a1 = g["a1"]
@test a1 isa ZArray
@test a1[:,:,:]==permutedims(data,(3,2,1))
@test a1.attrs["test"]==Dict("b"=>6)
@test attributes(a1)["test"]==Dict("b"=>6)
# Test reading the string array
@test String(g["a2"][:])=="hallo"
@test g["a3"] == ["test1", "test234"]
Expand All @@ -149,13 +149,13 @@ rm(joinpath(ppython,"a1",".zarray"))
rm(joinpath(ppython,"a2",".zarray"))
g = zopen(ppython, "w", consolidated=true)
@test g isa Zarr.ZGroup
@test g.attrs["groupatt"] == "Hi"
@test attributes(g)["groupatt"] == "Hi"
a1 = g["a1"]
@test a1 isa ZArray
@test a1[:,:,:]==permutedims(data,(3,2,1))
@test a1.attrs["test"]==Dict("b"=>6)
@test attributes(a1)["test"]==Dict("b"=>6)
@test storagesize(a1) == 960
@test sort(Zarr.subkeys(a1.storage,"a1"))[1:5] == ["0.0.0","0.0.1","0.0.2","0.0.3","0.1.0"]
@test sort(Zarr.subkeys(Zarr.storage(a1),"a1"))[1:5] == ["0.0.0","0.0.1","0.0.2","0.0.3","0.1.0"]
a1[:,1,1] = 1:10
@test a1[:,1,1] == 1:10
# Test reading the string array
Expand Down
42 changes: 33 additions & 9 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using JSON
using Pkg
using PyCall
using Dates
using Zarr: storage

macro test_py(ex)
quote
Expand All @@ -20,9 +21,9 @@ end
@test z isa ZArray{Int64, 2, Zarr.BloscCompressor,
Zarr.DictStore}

@test length(z.storage.a) === 3
@test length(z.storage.a["0.0"]) === 64
@test eltype(z.storage.a["0.0"]) === UInt8
@test length(storage(z).a) === 3
@test length(storage(z).a["0.0"]) === 64
@test eltype(storage(z).a["0.0"]) === UInt8
@test z.metadata.shape[] === (2, 3)
@test z.metadata.order === 'C'
@test z.metadata.chunks === (2, 3)
Expand All @@ -32,8 +33,8 @@ end
@test z.metadata.compressor.clevel === 5
@test z.metadata.compressor.cname === "lz4"
@test z.metadata.compressor.shuffle === 1
@test z.attrs == Dict{Any, Any}()
@test z.writeable === true
@test attributes(z) == Dict{Any, Any}()
@test Zarr.iswriteable(z) === true
@test_throws ArgumentError zzeros(Int64,2,3, chunks = (0,1))
@test_throws ArgumentError zzeros(Int64,0,-1)
@test_throws ArgumentError Zarr.Metadata(zeros(2,2), (2,2), zarr_format = 3)
Expand Down Expand Up @@ -63,7 +64,7 @@ end
compressor=Zarr.NoCompressor())

@test z.metadata.compressor === Zarr.NoCompressor()
@test z.storage === Zarr.DirectoryStore("$dir/$name")
@test storage(z) === Zarr.DirectoryStore("$dir/$name")
@test isdir("$dir/$name")
@test ispath("$dir/$name/.zarray")
@test ispath("$dir/$name/.zattrs")
Expand Down Expand Up @@ -92,9 +93,32 @@ end
g2 = zgroup(g,"asubgroup",attrs = Dict("a1"=>5))
@test Zarr.is_zgroup(store,"mygroup")
@test Zarr.is_zgroup(store,"mygroup/asubgroup")
@test g2.attrs["a1"]==5
@test attributes(g2)["a1"]==5
@test isdir(joinpath(store.folder,"mygroup"))
@test isdir(joinpath(store.folder,"mygroup","asubgroup"))

#Another test for indexing and getproperty
a = zgroup(Zarr.DictStore(),attrs=Dict("a"=>5))

zzeros(Float64,a,"a",3,3)
zzeros(Int,a,"b",5,4,2)
zgroup(a,"subgroup")

@test a["a"] isa ZArray
@test a[:a] isa ZArray
@test a.b isa ZArray
@test a.subgroup isa ZGroup
@test haskey(a,"a")
@test haskey(a,:a)
@test !haskey(a,"something")
@test !haskey(a,:something)
@test issetequal(propertynames(a),(:a,:b,:subgroup))
@test issetequal(propertynames(a,true),(:a,:b,:subgroup,:attrs))
@test @test_warn "Accessing attributes" a.attrs["a"]==5
@test attributes(a) == Dict("a"=>5)
buf=IOBuffer()
show(buf,a)
@test startswith(String(take!(buf)),"ZarrGroup at Dictionary Storage and path \nVariables:")
end

@testset "Metadata" begin
Expand Down Expand Up @@ -177,7 +201,7 @@ end
@test all(ismissing,amiss[:,2])
@test all(i->isequal(i...),zip(amiss[1:3,4],[1,missing,3]))
# Test that chunk containing only missings is not initialized
@test !Zarr.isinitialized(amiss.storage,Zarr.citostring(CartesianIndex((1,5))))
@test !Zarr.isinitialized(Zarr.storage(amiss),Zarr.citostring(CartesianIndex((1,5))))
#
amiss = zcreate(Int64, 10,10,chunks=(5,2), fill_value=-1, fill_as_missing=false)
amiss[:,1] = 1:10
Expand All @@ -189,7 +213,7 @@ end
@test all(==(-1),amiss[:,2])
@test all(i->isequal(i...),zip(amiss[1:3,4],[1,-1,3]))
# Test that chunk containing only fill values is not initialized
@test !Zarr.isinitialized(amiss.storage,Zarr.citostring(CartesianIndex((1,5))))
@test !Zarr.isinitialized(Zarr.storage(amiss),Zarr.citostring(CartesianIndex((1,5))))
end

@testset "resize" begin
Expand Down
6 changes: 3 additions & 3 deletions test/storage.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ end
@test storagesize(S3,p) == 0
@test Zarr.is_zgroup(S3,p) == true
S3group = zopen(S3,path=p)
S3Array = S3group.groups["bar"].arrays["baz"]
S3Array = Zarr.arrays(Zarr.groups(S3group)["bar"])["baz"]
@test eltype(S3Array) == Zarr.ASCIIChar
@test storagesize(S3Array) == 69
@test String(S3Array[:]) == "Hello from the cloud!"
Expand Down Expand Up @@ -142,8 +142,8 @@ end
ip,port = getsockname(server)
@async HTTP.serve(g,ip,port,server=server)
g2 = zopen("http://$ip:$port")
@test g2.attrs == Dict("groupatt"=>5)
@test g2["a1"].attrs == Dict("arratt"=>2.5)
@test attributes(g2) == Dict("groupatt"=>5)
@test attributes(g2["a1"]) == Dict("arratt"=>2.5)
@test g2["a1"][:,:] == reshape(1:200,10,20)
close(server)
end
Loading