From 1be43f708281fe35a223c081975db95e833735c1 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Thu, 12 May 2022 17:12:32 +0200 Subject: [PATCH] improve nested arrow serialization support (#40) --- Project.toml | 2 +- src/rows.jl | 34 +++++++++++++++++++++++++--------- test/runtests.jl | 11 +++++++++++ 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/Project.toml b/Project.toml index a312104..39f8a64 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Legolas" uuid = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd" authors = ["Beacon Biosignals, Inc."] -version = "0.3.3" +version = "0.3.4" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" diff --git a/src/rows.jl b/src/rows.jl index 6e2964c..78a254a 100644 --- a/src/rows.jl +++ b/src/rows.jl @@ -288,6 +288,8 @@ macro row(schema_expr, fields...) parent_transform = :(fields = transform($quoted_parent; fields...)) parent_validate = :(validate(tables_schema, $quoted_parent)) end + + legolas_row_arrow_name = :(Symbol("JuliaLang.", $schema_qualified_string)) return quote Legolas.schema_qualified_string(::$schema_type) = $schema_qualified_string @@ -313,19 +315,33 @@ macro row(schema_expr, fields...) return _validate(tables_schema, legolas_schema) end + + # Support (de)serialization as an Arrow column value via Arrow.ArrowTypes overloads. + # + # Note that this only really works in relatively simple cases; rely on this at your own peril. + # See https://github.com/JuliaData/Arrow.jl/issues/230 for more details. + # + # Note also that the limited support here that DOES work participates in SemVer, + # e.g. if we break this in future Legolas versions we should treat it as a breaking + # change and bump version numbers accordingly. + + # We serialize as a triple of schema name, schema version, and fields. + # This is for backwards compatibility. With this approach, defining methods per-Row type, + # we could just serialize the fields alone. + # This approach allows nested arrow serialization to work, ref . + Arrow.ArrowTypes.arrowname(::Type{<:Legolas.Row{$schema_type}}) = $legolas_row_arrow_name + Arrow.ArrowTypes.ArrowType(::Type{Legolas.Row{$schema_type,F}}) where {F} = Tuple{String,Int,F} + Arrow.ArrowTypes.toarrow(row::Legolas.Row{$schema_type}) = (String(Legolas.schema_name($schema_type)), Legolas.schema_version($schema_type), getfield(row, :fields)) + Arrow.ArrowTypes.JuliaType(::Val{$legolas_row_arrow_name}, ::Any) = Legolas.Row{$schema_type} + Arrow.ArrowTypes.fromarrow(::Type{<:Legolas.Row{$schema_type}}, name, version, fields) = Legolas.Row{$schema_type}(fields) + + Legolas.Row{$schema_type} end end -# Support (de)serialization as an Arrow column value via Arrow.ArrowTypes overloads. -# -# Note that this only really works in relatively simple cases; rely on this at your own peril. -# See https://github.com/JuliaData/Arrow.jl/issues/230 for more details. -# -# Note also that the limited support here that DOES work participates in SemVer, -# e.g. if we break this in future Legolas versions we should treat it as a breaking -# change and bump version numbers accordingly. - +# More Arrow serialization: here we provide backwards compatibility for `JuliaLang.Legolas.Row` +# serialized tables. const LEGOLAS_ROW_ARROW_NAME = Symbol("JuliaLang.Legolas.Row") Arrow.ArrowTypes.arrowname(::Type{<:Legolas.Row}) = LEGOLAS_ROW_ARROW_NAME Arrow.ArrowTypes.ArrowType(::Type{Legolas.Row{_,F}}) where {_,F} = Tuple{String,Int,F} diff --git a/test/runtests.jl b/test/runtests.jl index 7aab69b..75ca032 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -161,3 +161,14 @@ end @test !isequal(foo, foo3) @test hash(foo) != hash(foo3) end + +const MyInnerRow = @row("my-inner-schema@1", b::Int=1) +const MyOuterRow = @row("my-outer-schema@1", + a::String, + x::MyInnerRow=MyInnerRow(x)) + +@testset "Nested arrow serialization" begin + table = [MyOuterRow(; a="outer_a", x = MyInnerRow())] + roundtripped_table = Legolas.read(Legolas.tobuffer(table, Legolas.Schema("my-outer-schema@1"))) + @test table == MyOuterRow.(Tables.rows(roundtripped_table)) +end