Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store package names in arrow metadata #122

Merged
merged 22 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ ArrowTypes = "2.3"
Compat = "3.34, 4"
ConstructionBase = "1.5.7"
DataFrames = "1"
Pkg = "<0.0.1, 1"
omus marked this conversation as resolved.
Show resolved Hide resolved
Tables = "1.4"
Test = "1"
UUIDs = "1"
Test = "<0.0.1, 1"
UUIDs = "<0.0.1, 1"
julia = "1.6"

[extensions]
Expand All @@ -31,11 +32,12 @@ Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[targets]
test = ["Accessors", "Aqua", "Compat", "DataFrames", "Test", "UUIDs"]
test = ["Accessors", "Aqua", "Compat", "DataFrames", "Pkg", "Test", "UUIDs"]

[weakdeps]
ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
ericphanson marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions src/Legolas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module Legolas
using Tables, Arrow, UUIDs

const LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY = "legolas_schema_qualified"
const LEGOLAS_SCHEMA_PROVIDER_METADATA_KEY = "legolas_julia_schema_provider"

include("lift.jl")
include("schemas.jl")
Expand Down
51 changes: 45 additions & 6 deletions src/schemas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,11 @@ end

struct UnknownSchemaVersionError <: Exception
schema_version::SchemaVersion
schema_provider::Union{Nothing, Symbol}
end

UnknownSchemaVersionError(schema_version::SchemaVersion) = UnknownSchemaVersionError(schema_version, nothing)

function Base.showerror(io::IO, e::UnknownSchemaVersionError)
print(io, """
UnknownSchemaVersionError: encountered unknown Legolas schema version:
Expand All @@ -110,13 +113,30 @@ function Base.showerror(io::IO, e::UnknownSchemaVersionError)
This generally indicates that this schema has not been declared (i.e.
the corresponding `@schema` and/or `@version` statements have not been
executed) in the current Julia session.
""")
println(io)

In practice, this can arise if you try to read a Legolas table with a
prescribed schema, but haven't actually loaded the schema definition
(or commonly, haven't loaded the dependency that contains the schema
definition - check the versions of loaded packages/modules to confirm
your environment is as expected).
if e.schema_provider !== nothing
print(io, """
The table's metadata indicates that the schema was defined in:

$(e.schema_provider)

You likely need to load this package (`using $(e.schema_provider)`)
to populate your session with the schema definition.
""")
else
print(io, """
In practice, this can arise if you try to read a Legolas table with a
prescribed schema, but haven't actually loaded the schema definition
(or commonly, haven't loaded the dependency that contains the schema
definition - check the versions of loaded packages/modules to confirm
your environment is as expected).
""")
end
println(io)

print(io, """
Note that if you're in this particular situation, you can still load the raw
table as-is without Legolas (e.g. via `Arrow.Table(path_to_table)`).
""")
Expand Down Expand Up @@ -165,6 +185,24 @@ written via [`Legolas.write`](@ref).
"""
identifier(sv::SchemaVersion) = throw(UnknownSchemaVersionError(sv))

"""
Legolas.schema_provider(::SchemaVersion)

Returns a `Symbol` corresponding to the package which defines the schema version, if known.
Otherwise returns `nothing`.
"""
schema_provider(::SchemaVersion) = nothing

# Used in the implementation of `schema_provider`.
function defining_package(m::Module)
rootmodule = Base.moduleroot(m)
# Check if this module was defined in a package.
# If not, return `nothing`
path = pathof(rootmodule)
path === nothing && return nothing
return Symbol(rootmodule)
end

"""
Legolas.declared_fields(sv::Legolas.SchemaVersion)

Expand Down Expand Up @@ -375,7 +413,7 @@ macro schema(schema_name, schema_prefix)
schema_prefix isa Symbol || return :(throw(ArgumentError(string("`Prefix` provided to `@schema` is not a valid type name: ", $(Base.Meta.quot(schema_prefix))))))
return quote
# This approach provides some safety against accidentally replacing another module's schema's name,
# without making it annoying to reload code/modules in an interactice development context.
# without making it annoying to reload code/modules in an interactive development context.
m = $Legolas._schema_declared_in_module(Val(Symbol($schema_name)))
if m isa Module && string(m) != string(@__MODULE__)
throw(ArgumentError(string("A schema with this name was already declared by a different module: ", m)))
Expand Down Expand Up @@ -476,6 +514,7 @@ function _generate_schema_version_definitions(schema_version::SchemaVersion, par
return quote
@inline $Legolas.declared(::$quoted_schema_version_type) = true
@inline $Legolas.identifier(::$quoted_schema_version_type) = $identifier_string
$Legolas.schema_provider(::$quoted_schema_version_type) = $Legolas.defining_package(@__MODULE__)
@inline $Legolas.parent(::$quoted_schema_version_type) = $(Base.Meta.quot(parent))
$Legolas.declared_fields(::$quoted_schema_version_type) = $declared_field_names_types
$Legolas.declaration(::$quoted_schema_version_type) = $(Base.Meta.quot(schema_version_declaration))
Expand Down
28 changes: 25 additions & 3 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,16 @@ return `first(parse_identifier(s))`
Otherwise, return `nothing`.
"""
function extract_schema_version(table)
v = extract_metadata(table, LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY)
isnothing(v) && return nothing
return first(parse_identifier(v))
end

function extract_metadata(table, key)
metadata = Arrow.getmetadata(table)
if !isnothing(metadata)
for (k, v) in metadata
k == LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY && return first(parse_identifier(v))
k == key && return v
end
end
return nothing
Expand Down Expand Up @@ -165,6 +171,14 @@ function read(io_or_path; validate::Bool=true)
via `Legolas.read`; is it missing the expected custom metadata and/or the
expected \"$LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY\" field?
"""))

provider = extract_metadata(table, LEGOLAS_SCHEMA_PROVIDER_METADATA_KEY)
# If we don't have the schema defined in our session (i.e. `Legolas.schema_provider` is `nothing`),
# but we do have a hint of where the schema was defined via the metadata, then throw an informative
# error. If we don't error now, we will throw an `UnknownSchemaVersionError` with less information later.
if Legolas.schema_provider(sv) === nothing && provider !== nothing
throw(UnknownSchemaVersionError(sv, Symbol(provider)))
end
try
Legolas.validate(Tables.schema(table), sv)
catch
Expand Down Expand Up @@ -213,11 +227,20 @@ function write(io_or_path, table, sv::SchemaVersion; validate::Bool=true,
end
end
schema_metadata = LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => identifier(sv)
provider = schema_provider(sv)
provider_metadata = LEGOLAS_SCHEMA_PROVIDER_METADATA_KEY => provider
if isnothing(metadata)
metadata = (schema_metadata,)
if isnothing(provider)
metadata = (schema_metadata,)
else
metadata = (schema_metadata, provider_metadata)
end
else
metadata = Set(metadata)
push!(metadata, schema_metadata)
if !isnothing(provider)
push!(metadata, provider_metadata)
end
end
write_arrow(io_or_path, table; metadata=metadata, kwargs...)
return table
Expand All @@ -237,4 +260,3 @@ function tobuffer(args...; kwargs...)
seekstart(io)
return io
end

6 changes: 6 additions & 0 deletions test/TestProviderPkg/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name = "TestProviderPkg"
uuid = "0abfdf01-ee0b-4279-9694-f097aec3ad32"
version = "0.1.0"

[deps]
Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"
11 changes: 11 additions & 0 deletions test/TestProviderPkg/src/TestProviderPkg.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module TestProviderPkg

using Legolas: @schema, @version

@schema "test-provider-pkg.foo" Foo

@version FooV1 begin
a::Int
end

end # module TestProviderPkg
20 changes: 20 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,26 @@ using Legolas, Test, DataFrames, Arrow, UUIDs
using Legolas: SchemaVersion, @schema, @version, SchemaVersionDeclarationError, DeclaredFieldInfo
using Accessors
using Aqua
using Pkg

# This test set goes before we load `TestProviderPkg`
@testset "#46: Informative errors when reading unknown schemas from packages" begin
err = Legolas.UnknownSchemaVersionError(Legolas.SchemaVersion("test-provider-pkg.foo", 1), :TestProviderPkg)
@test_throws err Legolas.read("test_provider_pkg.arrow")
@test contains(sprint(Base.showerror, err), "TestProviderPkg")
end

# Now load the package, and verify we can write the tables with this metadata
Pkg.develop(; path=joinpath(@__DIR__, "TestProviderPkg"))
using TestProviderPkg

@testset "#46: Writing informative metadata about packages providing schemas" begin
table = [TestProviderPkg.FooV1(; a=1)]
Legolas.write("test_provider_pkg.arrow", table, TestProviderPkg.FooV1SchemaVersion())
table = Legolas.read("test_provider_pkg.arrow")
v = Legolas.extract_metadata(table, Legolas.LEGOLAS_SCHEMA_PROVIDER_METADATA_KEY)
@test v == "TestProviderPkg"
end

@test_throws SchemaVersionDeclarationError("no prior `@schema` declaration found in current module") @version(TestV1, begin x end)

Expand Down
Binary file added test/test_provider_pkg.arrow
Binary file not shown.
Loading