From 560222d8ce01fda655a97fc56dc64651cd28ce89 Mon Sep 17 00:00:00 2001 From: Jarrett Revels Date: Sun, 13 Jun 2021 23:09:02 -0400 Subject: [PATCH] add docs/tests/codecov CI and related stubs (#2) --- .github/workflows/CI.yml | 58 ++++++++++++++++++++++++++++++++++++ .github/workflows/TagBot.yml | 14 +++++++++ Project.toml | 9 +++++- README.md | 9 ++++-- codecov.yml | 1 + docs/Project.toml | 6 ++++ docs/make.jl | 10 +++++++ docs/src/index.md | 38 +++++++++++++++++++++++ docs/src/schema.md | 12 ++++++++ examples/tour.jl | 21 +------------ src/rows.jl | 38 ++++++++++++++++++++++- src/tables.jl | 24 +++++++++++++++ test/runtests.jl | 1 + 13 files changed, 216 insertions(+), 25 deletions(-) create mode 100644 .github/workflows/CI.yml create mode 100644 .github/workflows/TagBot.yml create mode 100644 codecov.yml create mode 100644 docs/Project.toml create mode 100644 docs/make.jl create mode 100644 docs/src/index.md create mode 100644 docs/src/schema.md create mode 100644 test/runtests.jl diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..6fa5bab --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,58 @@ +name: CI +on: + push: + branches: + - main + tags: + - v* + pull_request: +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1' + - '1.3' + os: + - ubuntu-latest + arch: + - x64 + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: actions/cache@v2 + with: + path: ~/.julia/artifacts + key: ${{ runner.os }}-test-artifacts-${{ hashFiles('**/Project.toml') }} + restore-keys: ${{ runner.os }}-test-artifacts + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v1 + with: + file: lcov.info + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: '1' + - run: | + julia --project=docs -e ' + using Pkg + Pkg.develop(PackageSpec(path=pwd())) + Pkg.instantiate()' + - run: julia --project=docs docs/make.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml new file mode 100644 index 0000000..778c06f --- /dev/null +++ b/.github/workflows/TagBot.yml @@ -0,0 +1,14 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/Project.toml b/Project.toml index 0579aa9..c0b7464 100644 --- a/Project.toml +++ b/Project.toml @@ -9,4 +9,11 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] Arrow = "1.5" -Tables = "1.4" \ No newline at end of file +Tables = "1.4" +julia = "1.3" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test"] diff --git a/README.md b/README.md index ef07f2f..743f8ed 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,12 @@ # Legolas.jl +[![CI](https://github.com/beacon-biosignals/Legolas.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/beacon-biosignals/Legolas.jl/actions/workflows/CI.yml) +[![codecov](https://codecov.io/gh/beacon-biosignals/Legolas.jl/branch/master/graph/badge.svg?token=D0bcI0Rtsw)](https://codecov.io/gh/beacon-biosignals/Legolas.jl) +[![](https://img.shields.io/badge/docs-stable-blue.svg)](https://beacon-biosignals.github.io/Legolas.jl/stable) +[![](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/Legolas.jl/dev) + *wield `.arrow`s with style* Legolas.jl is a Julia package that provides opinionated utilities for constructing, reading, writing, and validating Arrow tables against extensible, versioned, user-specified schemas. -Currently WIP. - -NOTE TO BEACON EMPLOYEES: This repository is intended to be open-sourced directly; please don't include private/internal Beacon content in commits/issues/etc. \ No newline at end of file +[Take The Tour](https://github.com/beacon-biosignals/Legolas.jl/tree/master/examples/tour.jl) \ No newline at end of file diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..db24720 --- /dev/null +++ b/codecov.yml @@ -0,0 +1 @@ +comment: off diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 0000000..8b3e12b --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,6 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd" + +[compat] +Documenter = "0.24" diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 0000000..2af2ced --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,10 @@ +using Legolas +using Documenter + +makedocs(modules=[Legolas], + sitename="Legolas", + authors="Beacon Biosignals, Inc.", + pages=["API Documentation" => "index.md", + "Tips For Schema Authors" => "schema.md"]) + +deploydocs(repo="github.com/beacon-biosignals/Legolas.jl.git", push_preview=true) diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 0000000..994160d --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,38 @@ +# API Documentation + +If you're a newcomer to Legolas.jl, please familiarize yourself with via the [tour](https://github.com/beacon-biosignals/Legolas.jl/blob/master/examples/tour.jl) before diving into this documentation. + +```@meta +CurrentModule = Legolas +``` + +## Legolas `Schema`s and `Row`s + +```@docs +Legolas.@row +Legolas.Row +Legolas.Schema +Legolas.is_valid_schema_name +Legolas.schema_name +Legolas.schema_version +Legolas.schema_qualified_string +Legolas.schema_parent +Legolas.transform +``` + +## Validating/Writing/Reading Legolas Tables + +```@docs +Legolas.validate +Legolas.write +Legolas.read +``` + +## Utilities + +```@docs +Legolas.lift +Legolas.assign_to_table_metadata! +Legolas.gather +Legolas.materialize +``` diff --git a/docs/src/schema.md b/docs/src/schema.md new file mode 100644 index 0000000..7233471 --- /dev/null +++ b/docs/src/schema.md @@ -0,0 +1,12 @@ +# Tips for Schema Authors + +If you're a newcomer to Legolas.jl, please familiarize yourself with via the [tour](https://github.com/beacon-biosignals/Legolas.jl/blob/master/examples/tour.jl) before diving into this documentation. + +TODO: cover the following items: + +- Legolas.jl's Simple Integer Versioning: You Break It, You Bump It +- forward/backward compatibility via allowing `missing` columns when possible +- avoid bumping schema versions by handling the deprecation path in the constructor +- prefer idempotency in field expressions when possible +- prefer Liskov substitutability when possible + diff --git a/examples/tour.jl b/examples/tour.jl index 4a1bed9..eb67844 100644 --- a/examples/tour.jl +++ b/examples/tour.jl @@ -174,23 +174,4 @@ Arrow.setmetadata!(invalid, Dict("legolas_schema_qualified" => "my-child-schema@ # these functions are relatively agnostic to the types of provided path arguments. Generally, as long as a # given `path` supports `Base.read(path)::Vector{UInt8}`, `Base.write(path, bytes::Vector{UInt8})`, and # `mkpath(dirname(path))`, then `path` will work as an argument to `Legolas.read`/`Legolas.write`. At some -# point, we'd like to make similar upstream improvements to Arrow.jl to render its API more path-type-agnostic. - -##### -##### Simple Integer Versioning: You Break It, You Bump It -##### -# TODO - -##### -##### Tips For Schema Design -##### -# TODO: Cover the following: -# -# - forward/backward compatibility via allowing `missing` columns when possible -# - avoid bumping schema versions by handling the deprecation path in the constructor -# - prefer idempotency in field expressions when possible -# - prefer Liskov substitutability when possible - -##### -##### Miscellaneous Utilities -##### +# point, we'd like to make similar upstream improvements to Arrow.jl to render its API more path-type-agnostic. \ No newline at end of file diff --git a/src/rows.jl b/src/rows.jl index ee822a8..2bf215d 100644 --- a/src/rows.jl +++ b/src/rows.jl @@ -4,15 +4,27 @@ const ALLOWED_SCHEMA_NAME_CHARACTERS = Char['-', '.', 'a':'z'..., '0':'9'...] +""" + TODO +""" is_valid_schema_name(x::AbstractString) = all(i -> i in ALLOWED_SCHEMA_NAME_CHARACTERS, x) +""" + TODO +""" struct Schema{name,version} end +""" + TODO +""" function Schema(name::AbstractString, version::Integer) is_valid_schema_name(name) || throw(ArgumentError("TODO")) return Schema{Symbol(name),version}() end +""" + TODO +""" function Schema(str::AbstractString) x = split(first(split(str, '>', limit=2)), '@') if length(x) == 2 @@ -23,15 +35,27 @@ function Schema(str::AbstractString) throw(ArgumentError("TODO")) end +""" + TODO +""" @inline schema_version(::Type{<:Schema{name,version}}) where {name,version} = version @inline schema_version(schema::Schema) = schema_version(typeof(schema)) +""" + TODO +""" @inline schema_name(::Type{<:Schema{name}}) where {name} = name @inline schema_name(schema::Schema) = schema_name(typeof(schema)) +""" + TODO +""" @inline schema_parent(::Type{<:Schema}) = nothing @inline schema_parent(schema::Schema) = schema_parent(typeof(schema)) +""" + TODO +""" function schema_qualified_string end # Note that there exist very clean generic implementations of `transform`/`validate`: @@ -53,10 +77,16 @@ function schema_qualified_string end # unnecessarily for schemas with a few ancestors, while the "hardcoded" versions # generated by the current implementation of the `@row` macro (see below) do not. +""" + TODO +""" function transform end function _transform end +""" + TODO +""" function validate end function _validate end @@ -77,6 +107,9 @@ Base.show(io::IO, schema::Schema) = print(io, "Schema(\"$(schema_name(schema))@$ ##### Row ##### +""" + TODO +""" struct Row{S<:Schema,F} <: Tables.AbstractRow schema::S fields::F @@ -115,6 +148,9 @@ end _parse_schema_expr(str::AbstractString) = Schema(str), nothing +""" + TODO +""" macro row(schema_expr, fields...) schema, parent = _parse_schema_expr(schema_expr) isnothing(schema) && throw(ArgumentError("`@row` schema argument must be of the form `\"name@X\"` or `\"name@X\" > \"parent@Y\"`. Received: $schema_expr")) @@ -148,7 +184,7 @@ macro row(schema_expr, fields...) function Legolas._transform(::$schema_type; $([Expr(:kw, f, :missing) for f in field_names]...), other...) $(map(esc, fields)...) - return (; $(field_names...), other...) + return (; $([Expr(:kw, f, f) for f in field_names]...), other...) end function Legolas._validate(tables_schema::Tables.Schema, legolas_schema::$schema_type) diff --git a/src/tables.jl b/src/tables.jl index 88cfc4f..2ce6e76 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -4,6 +4,9 @@ const LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY = "legolas_schema_qualified" ##### validate tables ##### +""" + TODO +""" function validate(table, legolas_schema::Schema) columns = Tables.columns(table) Tables.rowcount(columns) > 0 || return nothing @@ -21,6 +24,9 @@ function validate(table, legolas_schema::Schema) return nothing end +""" + TODO +""" function validate(table) metadata = Arrow.getmetadata(table) (metadata isa Dict && haskey(metadata, LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY)) || throw(ArgumentError("`$LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY` field not found in Arrow table metadata")) @@ -32,12 +38,18 @@ end ##### read/write tables ##### +""" + TODO +""" function read(path; validate::Bool=true) table = read_arrow(path) validate && Legolas.validate(table) return table end +""" + TODO +""" function write(io_or_path, table, schema::Schema; validate::Bool=true, kwargs...) # This `Tables.columns` call is unfortunately necessary; ref https://github.com/JuliaData/Arrow.jl/issues/211 # It is also the case that `Tables.schema(Tables.columns(table))` is more likely to return a `Tables.Schema` @@ -50,6 +62,9 @@ function write(io_or_path, table, schema::Schema; validate::Bool=true, kwargs... return table end +""" + TODO +""" function tobuffer(args...; kwargs...) io = IOBuffer() Legolas.write(io, args...; kwargs...) @@ -81,6 +96,11 @@ write_arrow(path, table; kwargs...) = (io = IOBuffer(); write_arrow(io, table; k ##### # TODO: upstream to Arrow.jl? +""" + TODO + +Note that we intend to eventually migrate this function from Legolas.jl to a more appropriate package. +""" function assign_to_table_metadata!(table, pairs) m = Arrow.getmetadata(table) if !(m isa Dict) @@ -134,6 +154,8 @@ subtable. The default definition is sufficient for `DataFrames` tables. Note that this function may internally call `Tables.columns` on each input table, so it may be slower and/or require more memory if `any(!Tables.columnaccess, tables)`. + +Note that we intend to eventually migrate this function from Legolas.jl to a more appropriate package. """ function gather(column_name, tables::Vararg{Any,N}; extract=((cols, idxs) -> view(cols, idxs, :))) where {N} @@ -164,5 +186,7 @@ julia> materialized = Onda.materialize(items); julia> @time foreach(identity, (nested_structure for nested_structure in materialized.nested_structures)); 0.000014 seconds (2 allocations: 80 bytes) ``` + +Note that we intend to eventually migrate this function from Legolas.jl to a more appropriate package. """ materialize(table) = map(collect, Tables.columntable(table)) diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..5afce9f --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1 @@ +include(joinpath(dirname(@__DIR__), "examples", "tour.jl")) \ No newline at end of file