Skip to content

Commit

Permalink
bump to Arrow.jl v2 (#21)
Browse files Browse the repository at this point in the history
Co-authored-by: Eric Hanson <[email protected]>
  • Loading branch information
jrevels and ericphanson authored Oct 14, 2021
1 parent e9178c0 commit 00e7448
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 64 deletions.
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
name = "Legolas"
uuid = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"
authors = ["Beacon Biosignals, Inc."]
version = "0.2.4"
version = "0.3.0"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
Arrow = "1.6.2"
Arrow = "2"
DataFrames = "1"
Tables = "1.4"
julia = "1.3"
Expand Down
1 change: 1 addition & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Legolas.schema_parent
## Validating/Writing/Reading Legolas Tables

```@docs
Legolas.extract_schema
Legolas.validate
Legolas.write
Legolas.read
Expand Down
17 changes: 11 additions & 6 deletions examples/tour.jl
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,18 @@ invalid = vcat(rows, Tables.rowmerge(child; a="this violates the schema's `a::Re
# Similarly, Legolas provides `Legolas.read(path_or_io)`, which wraps `Arrow.Table(path_or_io)`
# and performs `Legolas.validate` on the resulting `Arrow.Table` before returning it.
@test table_isequal(Legolas.read(Legolas.tobuffer(rows, schema)), t)
@test_throws ArgumentError("`legolas_schema_qualified` field not found in Arrow table metadata") Legolas.read(Arrow.tobuffer(rows))
invalid = Tables.columns(invalid) # ref https://github.com/JuliaData/Arrow.jl/issues/211
Arrow.setmetadata!(invalid, Dict("legolas_schema_qualified" => "my-child-schema@1>my-schema@1"))
@test_throws ArgumentError("field `a` has unexpected type; expected <:Real, found Union{Missing, Float64, String}") Legolas.read(Arrow.tobuffer(invalid))
msg = """
could not extract valid `Legolas.Schema` from provided Arrow table;
is it missing the expected custom metadata and/or the expected
\"legolas_schema_qualified\" field?
"""
@test_throws ArgumentError(msg) Legolas.read(Arrow.tobuffer(rows))
invalid_but_has_schema_metadata = Arrow.tobuffer(invalid;
metadata = ("legolas_schema_qualified" => Legolas.schema_qualified_string(schema),))
@test_throws ArgumentError("field `a` has unexpected type; expected <:Real, found Union{Missing, Float64, String}") Legolas.read(invalid_but_has_schema_metadata)

# A note about one additional benefit of `Legolas.read`/`Legolas.write`: Unlike their Arrow.jl counterparts,
# these functions are relatively agnostic to the types of provided path arguments. Generally, as long as a
# given `path` supports `Base.read(path)::Vector{UInt8}` and `Base.write(path, bytes::Vector{UInt8})` then
# `path` will work as an argument to `Legolas.read`/`Legolas.write`. At some point, we'd like to make similar
# given `path` supports `Base.read(path)::Vector{UInt8}` and `Base.write(path, bytes::Vector{UInt8})` then
# `path` will work as an argument to `Legolas.read`/`Legolas.write`. At some point, we'd like to make similar
# upstream improvements to Arrow.jl to render its API more path-type-agnostic.
87 changes: 39 additions & 48 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,25 @@ end
##### validate tables
#####

"""
Legolas.extract_schema(table)
Attempt to extract Arrow metadata from `table` via `Arrow.getmetadata(table)`.
If Arrow metadata is present and contains `\"$LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY\" => s`, return [`Legolas.Schema(s)`](@ref).
Otherwise, return `nothing`.
"""
function extract_schema(table)
metadata = Arrow.getmetadata(table)
if !isnothing(metadata)
for (k, v) in metadata
k == LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY && return Schema(v)
end
end
return nothing
end

"""
Legolas.validate(table, legolas_schema::Legolas.Schema)
Expand Down Expand Up @@ -38,19 +57,18 @@ end
"""
Legolas.validate(table)
Attempt to determine `s::Legolas.Schema` from the `$LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY` field in `table`'s
Arrow metadata and return `Legolas.validate(table, s)`.
If the `$LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY` field isn't found or has an invalid value, this function
will throw an `ArgumentError`.
If [`Legolas.extract_schema(table)`](@ref) returns a valid `Legolas.Schema`, return `Legolas.validate(table, Legolas.extract_schema(table))`.
Otherwise, returns `nothing`.
Otherwise, if a `Legolas.Schema` isn't found or is invalid, an `ArgumentError` is thrown.
"""
function validate(table)
metadata = Arrow.getmetadata(table)
(metadata isa Dict && haskey(metadata, LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY)) || throw(ArgumentError("`$LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY` field not found in Arrow table metadata"))
validate(table, Schema(metadata[LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY]))
return nothing
schema = Legolas.extract_schema(table)
isnothing(schema) && throw(ArgumentError("""
could not extract valid `Legolas.Schema` from provided Arrow table;
is it missing the expected custom metadata and/or the expected
\"$LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY\" field?
"""))
return validate(table, schema)
end

#####
Expand Down Expand Up @@ -84,15 +102,17 @@ Any other provided `kwargs` are forwarded to an internal invocation of `Arrow.wr
Note that `io_or_path` may be any type that supports `Base.write(io_or_path, bytes::Vector{UInt8})`.
"""
function write(io_or_path, table, schema::Schema; validate::Bool=true, kwargs...)
# This `_columns` call is unfortunately necessary; ref https://github.com/JuliaData/Arrow.jl/issues/211
# It is also the case that `Tables.schema(Tables.columns(table))` is more likely to return a `Tables.Schema`
# (rather than `nothing`) than a bare `table`, especially if `table::Vector`. We should probably fix/improve
# these upstream at some point.
columns = _columns(table)
validate && Legolas.validate(columns, schema)
assign_to_table_metadata!(columns, (LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => schema_qualified_string(schema),))
write_arrow(io_or_path, columns; kwargs...)
function write(io_or_path, table, schema::Schema; validate::Bool=true,
metadata=Arrow.getmetadata(table), kwargs...)
validate && Legolas.validate(table, schema)
schema_metadata = LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => schema_qualified_string(schema)
if isnothing(metadata)
metadata = (schema_metadata,)
else
metadata = Set(metadata)
push!(metadata, schema_metadata)
end
write_arrow(io_or_path, table; metadata=metadata, kwargs...)
return table
end

Expand Down Expand Up @@ -130,35 +150,6 @@ write_arrow(path::String, table; kwargs...) = Arrow.write(path, table; kwargs...
write_arrow(io::IO, table; kwargs...) = Arrow.write(io, table; file=get(kwargs, :file, true), kwargs...)
write_arrow(path, table; kwargs...) = (io = IOBuffer(); write_arrow(io, table; kwargs...); write_full_path(path, take!(io)))

#####
##### Arrow.Table metadata manipulation
#####
# TODO: upstream to Arrow.jl?

"""
assign_to_table_metadata!(table, pairs)
Assign the given `pairs` (an iterable of `Pair{String,String}`) to `table`'s associated
Arrow metadata `Dict`, creating this metadata `Dict` if it doesn't already exist.
Returns `table`'s associated Arrow metadata `Dict`.
Please note https://github.com/JuliaData/Arrow.jl/issues/211 before using this function.
Note that we intend to eventually migrate this function from Legolas.jl to a more appropriate package.
"""
function assign_to_table_metadata!(table, pairs)
m = Arrow.getmetadata(table)
if !(m isa Dict)
m = Dict{String,String}()
Arrow.setmetadata!(table, m)
end
for (k, v) in pairs
m[k] = v
end
return m
end

#####
##### Tables.jl operations
#####
Expand Down
19 changes: 11 additions & 8 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,6 @@ end
end

@testset "miscellaneous Legolas/src/tables.jl tests" begin
struct Foo
meta
end
Legolas.Arrow.getmetadata(foo::Foo) = foo.meta
foo = Foo(Dict("a" => "b", "b" => "b"))
@test foo.meta === Legolas.assign_to_table_metadata!(foo, ("b" => "c", "d" => "e"))
@test foo.meta == Dict("a" => "b", "b" => "c", "d" => "e")

struct MyPath
x::String
end
Expand All @@ -102,6 +94,17 @@ end
t = [Baz(a=1, b=2), Baz(a=3, b=4)]
Legolas.write(path, t, Schema("baz", 1))
@test t == Baz.(Tables.rows(Legolas.read(path)))
tbl = Arrow.Table(Legolas.tobuffer(t, Schema("baz", 1); metadata=("a" => "b", "c" => "d")))
@test Set(Arrow.getmetadata(tbl)) == Set((Legolas.LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => "baz@1",
"a" => "b", "c" => "d"))

struct Foo
meta
end
Legolas.Arrow.getmetadata(foo::Foo) = foo.meta
foo = Foo(Dict("a" => "b", "b" => "b",
Legolas.LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => "baz@1"))
@test Legolas.Schema("baz", 1) == Legolas.extract_schema(foo)

t = [(a="a", c=1, b="b"), Baz(a=1, b=2)] # not a valid Tables.jl table
@test_throws ErrorException Legolas.validate(t, Schema("baz", 1))
Expand Down

2 comments on commit 00e7448

@jrevels
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/46751

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.0 -m "<description of version>" 00e7448be4bd91f084fe69fbae17e095afba4bcc
git push origin v0.3.0

Please sign in to comment.