Skip to content

Commit

Permalink
improve nested arrow serialization support (#40)
Browse files Browse the repository at this point in the history
  • Loading branch information
ericphanson authored May 12, 2022
1 parent 8fa3eae commit 1be43f7
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Legolas"
uuid = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"
authors = ["Beacon Biosignals, Inc."]
version = "0.3.3"
version = "0.3.4"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
34 changes: 25 additions & 9 deletions src/rows.jl
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ macro row(schema_expr, fields...)
parent_transform = :(fields = transform($quoted_parent; fields...))
parent_validate = :(validate(tables_schema, $quoted_parent))
end

legolas_row_arrow_name = :(Symbol("JuliaLang.", $schema_qualified_string))
return quote
Legolas.schema_qualified_string(::$schema_type) = $schema_qualified_string

Expand All @@ -313,19 +315,33 @@ macro row(schema_expr, fields...)
return _validate(tables_schema, legolas_schema)
end


# Support (de)serialization as an Arrow column value via Arrow.ArrowTypes overloads.
#
# Note that this only really works in relatively simple cases; rely on this at your own peril.
# See https://github.com/JuliaData/Arrow.jl/issues/230 for more details.
#
# Note also that the limited support here that DOES work participates in SemVer,
# e.g. if we break this in future Legolas versions we should treat it as a breaking
# change and bump version numbers accordingly.

# We serialize as a triple of schema name, schema version, and fields.
# This is for backwards compatibility. With this approach, defining methods per-Row type,
# we could just serialize the fields alone.
# This approach allows nested arrow serialization to work, ref <https://github.com/beacon-biosignals/Legolas.jl/issues/39>.
Arrow.ArrowTypes.arrowname(::Type{<:Legolas.Row{$schema_type}}) = $legolas_row_arrow_name
Arrow.ArrowTypes.ArrowType(::Type{Legolas.Row{$schema_type,F}}) where {F} = Tuple{String,Int,F}
Arrow.ArrowTypes.toarrow(row::Legolas.Row{$schema_type}) = (String(Legolas.schema_name($schema_type)), Legolas.schema_version($schema_type), getfield(row, :fields))
Arrow.ArrowTypes.JuliaType(::Val{$legolas_row_arrow_name}, ::Any) = Legolas.Row{$schema_type}
Arrow.ArrowTypes.fromarrow(::Type{<:Legolas.Row{$schema_type}}, name, version, fields) = Legolas.Row{$schema_type}(fields)


Legolas.Row{$schema_type}
end
end

# Support (de)serialization as an Arrow column value via Arrow.ArrowTypes overloads.
#
# Note that this only really works in relatively simple cases; rely on this at your own peril.
# See https://github.com/JuliaData/Arrow.jl/issues/230 for more details.
#
# Note also that the limited support here that DOES work participates in SemVer,
# e.g. if we break this in future Legolas versions we should treat it as a breaking
# change and bump version numbers accordingly.

# More Arrow serialization: here we provide backwards compatibility for `JuliaLang.Legolas.Row`
# serialized tables.
const LEGOLAS_ROW_ARROW_NAME = Symbol("JuliaLang.Legolas.Row")
Arrow.ArrowTypes.arrowname(::Type{<:Legolas.Row}) = LEGOLAS_ROW_ARROW_NAME
Arrow.ArrowTypes.ArrowType(::Type{Legolas.Row{_,F}}) where {_,F} = Tuple{String,Int,F}
Expand Down
11 changes: 11 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,14 @@ end
@test !isequal(foo, foo3)
@test hash(foo) != hash(foo3)
end

const MyInnerRow = @row("my-inner-schema@1", b::Int=1)
const MyOuterRow = @row("my-outer-schema@1",
a::String,
x::MyInnerRow=MyInnerRow(x))

@testset "Nested arrow serialization" begin
table = [MyOuterRow(; a="outer_a", x = MyInnerRow())]
roundtripped_table = Legolas.read(Legolas.tobuffer(table, Legolas.Schema("my-outer-schema@1")))
@test table == MyOuterRow.(Tables.rows(roundtripped_table))
end

6 comments on commit 1be43f7

@ericphanson
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@beacon-buddy register

@beacon-buddy
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: beacon-biosignals/BeaconRegistry/558

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.4 -m "<description of version>" 1be43f708281fe35a223c081975db95e833735c1
git push origin v0.3.4

Also, note the warning: This looks like a new registration that registers version 0.3.4.
Ideally, you should register an initial release with 0.0.1, 0.1.0 or 1.0.0 version numbers
This can be safely ignored. However, if you want to fix this you can do so. Call register() again after making the fix. This will update the Pull request.

@kleinschmidt
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ericphanson shouldn't this go to julia registrator since it's public? beacon buddy thinks it's a new package...

@ericphanson
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ericphanson
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops!

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/60122

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.4 -m "<description of version>" 1be43f708281fe35a223c081975db95e833735c1
git push origin v0.3.4

Please sign in to comment.