-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Arrow support #167
Open
omus
wants to merge
10
commits into
master
Choose a base branch
from
cv/arrow-support
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Arrow support #167
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
005e1cb
Register the Interval structs with Arrow.jl via Requires.jl.
rofinn 1f97d4f
Only test on 1.3 or higher.
rofinn d3bc51c
Address review comments.
rofinn b77fd2c
Update test/runtests.jl
rofinn 6311514
Add ArrowTypes support
omus 40b2f7d
Test against Julia 1.0 LTS
omus 3def9b0
Remove dependency on Requires
omus 04105c6
Add Arrow compat entry
omus 9c3aa28
Require ArrowTypes 1.2
omus 973e286
Removing Arrow compat entry
omus File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
for T in (Closed, Open, Unbounded) | ||
name = QuoteNode(Symbol("JuliaLang.Intervals.$(string(T))")) | ||
|
||
@eval begin | ||
ArrowTypes.arrowname(::Type{$T}) = $name | ||
ArrowTypes.JuliaType(::Val{$name}) = $T | ||
end | ||
end | ||
|
||
# Use a more efficient Arrow serialization when a vector uses a concrete element type | ||
let name = Symbol("JuliaLang.Intervals.Interval{T,L,R}") | ||
ArrowTypes.arrowname(::Type{Interval{T,L,R}}) where {T, L <: Bound, R <: Bound} = name | ||
function ArrowTypes.ArrowType(::Type{Interval{T,L,R}}) where {T, L <: Bound, R <: Bound} | ||
return Interval{T,L,R} | ||
end | ||
function ArrowTypes.arrowmetadata(::Type{Interval{T,L,R}}) where {T, L <: Bound, R <: Bound} | ||
return join(arrowname.([L, R]), ",") | ||
end | ||
function ArrowTypes.JuliaType(::Val{name}, ::Type{NamedTuple{(:first, :last), Tuple{T, T}}}, meta) where T | ||
L, R = ArrowTypes.JuliaType.(Val.(Symbol.(split(meta, ",")))) | ||
return Interval{T,L,R} | ||
end | ||
function ArrowTypes.fromarrow(::Type{Interval{T,L,R}}, left, right) where {T, L <: Bound, R <: Bound} | ||
return Interval{T,L,R}( | ||
L === Unbounded ? nothing : left, | ||
R === Unbounded ? nothing : right, | ||
) | ||
end | ||
end | ||
|
||
# A less efficient Arrow serialization format for when a vector contains non-concrete element types | ||
let name = Symbol("JuliaLang.Intervals.Interval{T}") | ||
ArrowTypes.arrowname(::Type{<:Interval{T}}) where T = name | ||
function ArrowTypes.ArrowType(::Type{<:Interval{T}}) where T | ||
return NamedTuple{(:left, :right), Tuple{Tuple{String, T}, Tuple{String, T}}} | ||
end | ||
function ArrowTypes.toarrow(x::Interval{T}) where T | ||
L, R = bounds_types(x) | ||
return (; left=(string(arrowname(L)), x.first), right=(string(arrowname(R)), x.last)) | ||
end | ||
function ArrowTypes.JuliaType(::Val{name}, ::Type{NamedTuple{names, types}}) where {names, types} | ||
T = fieldtype(fieldtype(types, 1), 2) | ||
return Interval{T} | ||
end | ||
function ArrowTypes.fromarrow(::Type{Interval{T}}, left, right) where T | ||
L = ArrowTypes.JuliaType(Val(Symbol(left[1]))) | ||
R = ArrowTypes.JuliaType(Val(Symbol(right[1]))) | ||
return Interval{T,L,R}( | ||
L === Unbounded ? nothing : left[2], | ||
R === Unbounded ? nothing : right[2], | ||
) | ||
end | ||
end | ||
|
||
# Note: The type returnedy by the `ArrowType` function is not passed into the `JuliaType` | ||
# function. Instead the result of `typeof(toarrow(...))` is passed into `JuliaType`. | ||
# To reproduce this use an isbits object as a type parameter in `ArrowType`. | ||
|
||
# An inefficient Arrow serialization format which supports non-concrete element types | ||
let name = Symbol("JuliaLang.Intervals.AnchoredInterval{P,T}") | ||
ArrowTypes.arrowname(::Type{<:AnchoredInterval{P,T}}) where {P,T} = name | ||
function ArrowTypes.ArrowType(::Type{<:AnchoredInterval{P,T}}) where {P,T} | ||
return NamedTuple{(:anchor,), Tuple{Tuple{typeof(P), T, String, String}}} | ||
end | ||
function ArrowTypes.toarrow(x::AnchoredInterval{P,T}) where {P,T} | ||
L, R = bounds_types(x) | ||
return (; anchor=(P, x.anchor, string(arrowname(L)), string(arrowname(R)))) | ||
end | ||
function ArrowTypes.JuliaType(::Val{name}) | ||
return AnchoredInterval | ||
end | ||
function ArrowTypes.fromarrow(::Type{AnchoredInterval}, anchor) | ||
P = anchor[1] | ||
T = typeof(anchor[2]) # Note: Arrow can't access the original `T` anyway | ||
L = ArrowTypes.JuliaType(Val(Symbol(anchor[3]))) | ||
R = ArrowTypes.JuliaType(Val(Symbol(anchor[4]))) | ||
return AnchoredInterval{P,T,L,R}(anchor[2]) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
@testset "Arrow support" begin | ||
@testset "Interval (concrete)" begin | ||
col = [Interval{Closed,Unbounded}(1, nothing)] | ||
|
||
table = (; col) | ||
t = Arrow.Table(Arrow.tobuffer(table)) | ||
|
||
@test eltype(t.col) == Interval{Int, Closed, Unbounded} | ||
@test t.col == col | ||
end | ||
|
||
@testset "Interval (non-concrete)" begin | ||
col = [ | ||
Interval{Closed, Closed}(1, 2), | ||
Interval{Closed, Open}(2, 3), | ||
Interval{Unbounded, Open}(nothing, 4), | ||
] | ||
|
||
table = (; col) | ||
t = Arrow.Table(Arrow.tobuffer(table)) | ||
|
||
@test eltype(t.col) == Interval{Int} | ||
@test t.col == col | ||
end | ||
|
||
@testset "AnchoredInterval" begin | ||
zdt_start = ZonedDateTime(2016, 8, 11, 1, tz"America/Winnipeg") | ||
zdt_end = ZonedDateTime(2016, 8, 12, 0, tz"America/Winnipeg") | ||
col = HE.(zdt_start:Hour(1):zdt_end) | ||
|
||
table = (; col) | ||
t = Arrow.Table(Arrow.tobuffer(table)) | ||
|
||
# Arrow.jl converts all Period types into Second | ||
@test_broken eltype(t.col) == HourEnding{ZonedDateTime, Open, Closed} | ||
@test t.col == col | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll probably end up disabling
AnchoredInterval
support to start with because of thisThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can't explicitly override this behaviour? Given how much data we have saved in HE format needing to save hour data as Intervals would be a significant performance hit and likely wouldn't make this PR worth it for us. Also, doesn't Arrow have it's own interval type which does the same thing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've forgotten a bunch about this PR but it should be possible to encode the period span in the serialized Arrow form. I do remember I was mainly focused on
Interval
support so I think this comment was more about that the implementation currently in place is incomplete.