diff --git a/Project.toml b/Project.toml index 27c0cd9..0b2b2cf 100644 --- a/Project.toml +++ b/Project.toml @@ -19,10 +19,14 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" +[weakdeps] +CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" + [compat] AWSS3 = "0.10" Blosc = "0.5, 0.6, 0.7" CodecZlib = "0.6, 0.7" +CodecZstd = "0.8.3" DataStructures = "0.17, 0.18" DiskArrays = "0.4.2" HTTP = "^1.3" @@ -34,6 +38,9 @@ URIs = "1" ZipArchives = "2" julia = "1.2" +[extensions] +CodecZstdExt = "CodecZstd" + [extras] Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" diff --git a/docs/src/reference.md b/docs/src/reference.md index f9d4402..156953a 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -20,3 +20,19 @@ Pages = ["ZGroup.jl"] Modules = [Zarr] Pages = ["Compressors.jl"] ``` + +Additional compressors can be loaded via Julia's package extension mechanism. + +For example, the "zstd" compressor ID can be enabled by loading CodecZstd.jl. +This uses Zstandard directly rather than using Blosc. + +```julia-repl +julia> using Zarr, CodecZstd + +julia> zarray = zzeros(UInt16, 1024, 512, compressor="zstd", path="zarr_zstd_demo"); + +julia> zarray2 = zopen("zarr_zstd_demo"); + +julia> zarray == zarray2 +true +``` diff --git a/ext/CodecZstdExt.jl b/ext/CodecZstdExt.jl new file mode 100644 index 0000000..62c5253 --- /dev/null +++ b/ext/CodecZstdExt.jl @@ -0,0 +1,54 @@ +module CodecZstdExt + +using Zarr: Zarr +using JSON: JSON +using CodecZstd: CodecZstd + +""" + ZstdZarrCompressor(clevel::Int=0) + ZstdZarrCompressor(c::CodecZstd.ZstdCompressor, [d::CodecZstd.ZstdDecompressor]) + +Zstandard compression for Zarr.jl. This is a `Zarr.Compressor` wrapper around +`CodecZstd`. Construct with either the compression level, `clevel`, or by +providing an instance of a `ZstdCompressor`. `ZstdFrameCompressor` is +recommended. +""" +struct ZstdZarrCompressor <: Zarr.Compressor + compressor::CodecZstd.ZstdCompressor + decompressor::CodecZstd.ZstdDecompressor +end +# Use default ZstdDecompressor if only compressor is provided +function ZstdZarrCompressor(compressor::CodecZstd.ZstdCompressor) + return ZstdZarrCompressor( + compressor, + CodecZstd.ZstdDecompressor() + ) +end +function ZstdZarrCompressor(clevel::Int) + return ZstdZarrCompressor( + CodecZstd.ZstdFrameCompressor(; level = clevel) + ) +end +ZstdZarrCompressor(;clevel::Int=3) = ZstdZarrCompressor(clevel) + +function Zarr.getCompressor(::Type{ZstdZarrCompressor}, d::Dict) + return ZstdZarrCompressor(d["level"]) +end + +function Zarr.zuncompress(a, z::ZstdZarrCompressor, T) + result = transcode(z.decompressor, a) + return Zarr._reinterpret(Base.nonmissingtype(T), result) +end + +function Zarr.zcompress(a, z::ZstdZarrCompressor) + a_uint8 = Zarr._reinterpret(UInt8,a)[:] + transcode(z.compressor, a_uint8) +end + +JSON.lower(z::ZstdZarrCompressor) = Dict("id"=>"zstd", "level" => z.compressor.level) + +function __init__() + Zarr.compressortypes["zstd"] = ZstdZarrCompressor +end + +end # module CodecZstdExt diff --git a/src/Compressors.jl b/src/Compressors.jl index b54e97a..9673dc3 100644 --- a/src/Compressors.jl +++ b/src/Compressors.jl @@ -6,9 +6,46 @@ _reinterpret(::Type{T}, x::AbstractArray{S, 0}) where {T, S} = reinterpret(T, re _reinterpret(::Type{T}, x::AbstractArray) where T = reinterpret(T, x) abstract type Compressor end -getCompressor(compdict::Dict) = getCompressor(compressortypes[compdict["id"]],compdict) +function getCompressor(compdict::Dict) + if haskey(compressortypes, compdict["id"]) + getCompressor(compressortypes[compdict["id"]],compdict) + else + throw(UnknownCompressorException(compdict["id"])) + end +end getCompressor(::Nothing) = NoCompressor() +""" + UnknownCompressorException(compid::String) + +Exception that occurs when an unknown compressor id string is encountered. If +a package that will enable the compressor is known, then we will recommend that +the user load that package. +""" +struct UnknownCompressorException <: Exception + compid::String +end +function Base.show(io::IO, e::UnknownCompressorException) + println(io, "Zarr compressor $(e.compid) is not loaded.") + if haskey(compressorpkgs, e.compid) + pkg = compressorpkgs[e.compid] + println(io, """ + Loading the Julia package $(pkg).jl will trigger the compressor + extension package to load: + ``` + using Pkg + Pkg.add("$pkg") + using $pkg + ``` + """) + else + println(io, """ + A compressor for $(e.compid) has not been implemented. Please file an + issue at https://github.com/JuliaIO/Zarr.jl/issues . + """) + end +end + #Compression when no filter is given zcompress!(compressed,data,c,::Nothing) = zcompress!(compressed,data,c) zuncompress!(data,compressed,c,::Nothing) = zuncompress!(data,compressed,c) @@ -116,9 +153,20 @@ end JSON.lower(::NoCompressor) = nothing -compressortypes = Dict("blosc"=>BloscCompressor, nothing=>NoCompressor) +const compressortypes = Dict("blosc"=>BloscCompressor, nothing=>NoCompressor) +""" + Zarr.compressorpkgs::Dict{String,Symbol} +Dictionary mapping compressor names to package names containing the compressor +implementations. Loading the packages in the values will trigger package +extensions to load. +""" +const compressorpkgs = Dict( + "blosc" => :Blosc, + "zlib" => :CodecZlib, + "zstd" => :CodecZstd +) """ ZlibCompressor(clevel=-1) diff --git a/src/ZArray.jl b/src/ZArray.jl index 1cd7255..5365ba3 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -333,6 +333,14 @@ function zcreate(::Type{T},storage::AbstractStore, attrs=Dict(), writeable=true, ) where T + + if compressor isa AbstractString + if haskey(compressortypes, String(compressor)) + compressor = compressortypes[compressor]() + else + throw(UnknownCompressorException(compressor)) + end + end length(dims) == length(chunks) || throw(DimensionMismatch("Dims must have the same length as chunks")) N = length(dims) diff --git a/test/Project.toml b/test/Project.toml index 008ae2a..6f9abb1 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,5 @@ [deps] +CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" diff --git a/test/ext.jl b/test/ext.jl new file mode 100644 index 0000000..39ef1b9 --- /dev/null +++ b/test/ext.jl @@ -0,0 +1,34 @@ +using Zarr +using Test + +@testset "Zarr Extension Packages" begin + @test_throws Zarr.UnknownCompressorException("zstd") zzeros(UInt8, 512, compressor="zstd") + @test_throws Zarr.UnknownCompressorException("asdf") zzeros(UInt8, 512, compressor="asdf") + d = Dict("id" => "zstd") + @test_throws Zarr.UnknownCompressorException("zstd") Zarr.getCompressor(d) + + iob = IOBuffer() + show(iob, Zarr.UnknownCompressorException("zstd")) + @test occursin("CodecZstd.jl", String(take!(iob))) + + iob = IOBuffer() + show(iob, Zarr.UnknownCompressorException("asdf")) + @test occursin("issue", String(take!(iob))) + @test Zarr.getCompressor(nothing) == Zarr.NoCompressor() +end + +using CodecZstd +@testset "Zarr CodecZstd Extension" begin + CodecZstdExt = Base.get_extension(Zarr, :CodecZstdExt) + @test haskey(Zarr.compressortypes, "zstd") + @test Zarr.compressortypes["zstd"] == CodecZstdExt.ZstdZarrCompressor + td = tempname() + zarray = zzeros(UInt16, 16, 16, compressor="zstd", path=td) + zarray .= reshape(1:256,16,16) + @test isa(zarray, ZArray{UInt16}) + @test zarray.metadata.compressor isa CodecZstdExt.ZstdZarrCompressor + zarray2 = zopen(td) + @test isa(zarray2, ZArray{UInt16}) + @test zarray2.metadata.compressor isa CodecZstdExt.ZstdZarrCompressor + @test zarray2 == reshape(1:256,16,16) +end diff --git a/test/runtests.jl b/test/runtests.jl index 78067b9..6197bdb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -266,9 +266,7 @@ end end include("storage.jl") - - - include("python.jl") +include("ext.jl") end # @testset "Zarr"