From 170fff129abd00fc8132fc8a0dd2e1724b7bf77b Mon Sep 17 00:00:00 2001 From: Iblis Lin Date: Mon, 10 May 2021 15:55:00 +0800 Subject: [PATCH] timetable wip --- Project.toml | 2 + src/TimeSeries.jl | 7 ++ src/adt.jl | 3 + src/ats.jl | 52 +++++++++++ src/tables.jl | 5 +- src/timearray.jl | 16 ++-- src/timetable.jl | 221 ++++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + test/timetable.jl | 33 +++++++ 9 files changed, 330 insertions(+), 10 deletions(-) create mode 100644 src/adt.jl create mode 100644 src/ats.jl create mode 100644 src/timetable.jl create mode 100644 test/timetable.jl diff --git a/Project.toml b/Project.toml index 427cb555..2dddcf2d 100644 --- a/Project.toml +++ b/Project.toml @@ -4,10 +4,12 @@ authors = ["JuliaStats "] version = "0.22.1" [deps] +DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" +PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/src/TimeSeries.jl b/src/TimeSeries.jl index 3c0b23ae..c6af0785 100644 --- a/src/TimeSeries.jl +++ b/src/TimeSeries.jl @@ -5,7 +5,9 @@ using Dates using DelimitedFiles using Statistics # third-party +using DataStructures using DocStringExtensions: SIGNATURES +using PaddedViews using RecipesBase using Reexport using Tables @@ -21,6 +23,9 @@ export TimeArray, AbstractTimeSeries, # modify.jl export rename, rename! +# timetable.jl +export TimeTable + ############################################################################### # Submodule ############################################################################### @@ -33,7 +38,9 @@ include("timeaxis/TimeAxis.jl") ############################################################################### include(".timeseriesrc.jl") +include("ats.jl") include("timearray.jl") +include("timetable.jl") include("utilities.jl") include("tables.jl") include("split.jl") diff --git a/src/adt.jl b/src/adt.jl new file mode 100644 index 00000000..7b4e0088 --- /dev/null +++ b/src/adt.jl @@ -0,0 +1,3 @@ +""" +Abstract data types +""" diff --git a/src/ats.jl b/src/ats.jl new file mode 100644 index 00000000..76beff4b --- /dev/null +++ b/src/ats.jl @@ -0,0 +1,52 @@ +############################################################################### +# AbstractTimeSeries +############################################################################### + +""" + AbstractTimeSeries{T} + +An `AbstractTimeSeries{T}` is a table-like data structure with a time index and +named columns. +Where `T` denotes the type of time index. + +In the case of multiple columns as compound index, `T <: Tuple`. +For instance, let `T = Tuple{Date,Time}` implies there are two columns +which forms the time index. + +# Interfaces + + +## Dimension and size + +- `length` +- `ndims` +- `size` +- `axes` + +- `copy` +- `deepcopy` +- `similar` + +- `names` +- `rename` +- `rename!` + +- `hcat` +- `vcat` + +""" +abstract type AbstractTimeSeries{T} end + +Base.names(ats::AbstractTimeSeries) = getfield(ats, :names) + + +Tables.istable(::Type{<:AbstractTimeSeries}) = true + +Tables.columnaccess(::Type{<:AbstractTimeSeries}) = true +Tables.columns(ats::AbstractTimeSeries) = ats + +Tables.rowaccess(::Type{<:AbstractTimeSeries}) = true +# TODO +# Tables.rows(x::AbstractTimeSeries) + +Tables.schema(ats::AbstractTimeSeries) = Tables.Schema(names(ats), #= TODO =#) diff --git a/src/tables.jl b/src/tables.jl index 7e3041f1..14c06e53 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -47,7 +47,6 @@ function Base.iterate(x::TableIter, i::Integer = 1) x[i], i + 1 end -Tables.istable(::Type{<:AbstractTimeSeries}) = true Tables.rowaccess(::Type{<:TimeArray}) = true Tables.rows(ta::TimeArray) = Tables.rows(Tables.columntable(ta)) Tables.columnaccess(::Type{<:TimeArray}) = true @@ -58,10 +57,10 @@ Tables.getcolumn(ta::TimeArray, i::Int) = Tables.getcolumn(TableIter(ta), i) Tables.getcolumn(ta::TimeArray, nm::Symbol) = Tables.getcolumn(TableIter(ta), nm) Tables.getcolumn(i::TableIter, n::Int) = i[n] Tables.getcolumn(i::TableIter, nm::Symbol) = getproperty(i, nm) -Tables.schema(ta::AbstractTimeSeries{T,N,D}) where {T,N,D} = Tables.schema(TableIter(ta)) +Tables.schema(ta::TimeArray) = Tables.schema(TableIter(ta)) Tables.schema(i::TableIter{T,S}) where {T,S} = Tables.Schema(S, coltypes(data(i))) -coltypes(x::AbstractTimeSeries{T,N,D}) where {T,N,D} = (D, (T for _ ∈ 1:size(x, 2))...) +coltypes(x::TimeArray{T,N,D}) where {T,N,D} = (D, (T for _ ∈ 1:size(x, 2))...) ############################################################################### diff --git a/src/timearray.jl b/src/timearray.jl index 3200cccd..58ef2467 100644 --- a/src/timearray.jl +++ b/src/timearray.jl @@ -6,8 +6,6 @@ import Base: convert, copy, length, show, getindex, iterate, lastindex, size, eachindex, ==, isequal, hash, ndims, getproperty, propertynames, values -abstract type AbstractTimeSeries{T,N,D} end - """ TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,N,D} @@ -51,7 +49,7 @@ The third constructor builds a `TimeArray` from a `NamedTuple`. ta = TimeArray(data; timestamp = :datetime, meta = "Example") """ -struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,N,D} +struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T} timestamp::Vector{D} values::A @@ -81,6 +79,8 @@ struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T, timestamp_r, reverse(values, dims = 1), replace_dupes!(colnames), meta) throw(ArgumentError("timestamps must be monotonic")) + + # TODO: padded array design? end end @@ -131,11 +131,13 @@ size(ta::TimeArray, dim) = size(values(ta), dim) ###### ndims ##################### -ndims(ta::AbstractTimeSeries{T,N}) where {T,N} = N +# ndims(ta::AbstractTimeSeries{T,N}) where {T,N} = N +ndims(::AbstractTimeSeries) = 2 +ndims(::TimeArray{T,N}) where {T,N} = N ###### iteration protocol ######## -@generated function iterate(ta::AbstractTimeSeries{T,N}, i = 1) where {T,N} +@generated function iterate(ta::TimeArray{T,N}, i = 1) where {T,N} val = (N == 1) ? :(values(ta)[i]) : :(values(ta)[i, :]) quote @@ -182,8 +184,8 @@ hash(x::TimeArray, h::UInt) = ###### eltype ##################### -Base.eltype(::AbstractTimeSeries{T,1,D}) where {T,D} = Tuple{D,T} -Base.eltype(::AbstractTimeSeries{T,2,D}) where {T,D} = Tuple{D,Vector{T}} +Base.eltype(::TimeArray{T,1,D}) where {T,D} = Tuple{D,T} +Base.eltype(::TimeArray{T,2,D}) where {T,D} = Tuple{D,Vector{T}} ###### show ##################### diff --git a/src/timetable.jl b/src/timetable.jl new file mode 100644 index 00000000..30621dfd --- /dev/null +++ b/src/timetable.jl @@ -0,0 +1,221 @@ +############################################################################### +# Type +############################################################################### + +# TODO: consider constrain T<:AbstractTimeAxis +mutable struct TimeTable{T} <: AbstractTimeSeries{T} + ta::T + vecs::OrderedDict{Symbol,AbstractVector} + n::Int # length, in case of infinte time axis + + function TimeTable{T}(ta::T, vecs) where {T} + m = mapreduce(length, max, values(vecs)) + n = if Base.haslength(T) + n′ = length(ta) + (n′ ≥ m) || throw(DimensionMismatch( + "The vector length should less or equal than the one of time axis")) + n′ + else + m + end + + # note that it will copy, if the length of a col is shorter than `m` + for (k, v) in vecs + (length(v) == n) && continue + vecs[k] = collect(PaddedView(missing, v, (n,))) + end + + new(ta, vecs, n) + end + # other design style: + # colnames::Vector{Symbol} + # cols::Vector{AbstractVector} +end + +TimeTable(ta::T, vecs::OrderedDict{Symbol}) where T = TimeTable{T}(ta, vecs) +function TimeTable(ta::T; kw...) where T + vecs = OrderedDict{Symbol,AbstractVector}() + for (k, v) ∈ kw + vecs[k] = v + end + TimeTable(ta, vecs) +end + +const TimeTableTimeCol = :time + +struct TimeTableRow{T,V} + i::Int + t::T + v::V +end + + +############################################################################### +# Iterator interfaces +############################################################################### + +Base.size(tt::TimeTable) = (length(tt), length(keys(_vecs(tt)))) +Base.size(tt::TimeTable, dim) = + (dim == 1) ? length(tt) : + (dim == 2) ? length(keys(_vecs(tt))) : + 1 + +@inline Base.length(tt::TimeTable) = getfield(tt, :n) + + +############################################################################### +# Indexing +############################################################################### + +Base.lastindex(tt::TimeTable) = getfield(tt, :n) + +Base.checkindex(::Type{Bool}, tt::TimeTable, i::Int) = (1 ≤ i ≤ lastindex(tt)) + +Base.getindex(tt::TimeTable, s::Symbol) = + (s ≡ TimeTableTimeCol) ? getfield(tt, :ta) : getvec(tt, s) + +function Base.getindex(tt::TimeTable, i::Int) + @boundscheck checkbounds(tt, i) + TimeTableRow(i, _ta(tt)[i], map(x -> x[i], values(_vecs(tt)))) +end + +Base.getindex(tt::TimeTable, t::TimeType) = tt[time2idx(tt, t)] +Base.getindex(tt::TimeTable, i::Int, s::Symbol) = + (@boundscheck checkbounds(tt, i); (s ≡ TimeTableTimeCol) ? _ta(tt)[i] : _vecs(tt)[s][i]) +Base.getindex(tt::TimeTable, t::TimeType, s::Symbol) = tt[time2idx(tt, t), s] + +for func ∈ [:findfirst, :findlast] + @eval function Base.$func(f::Function, tt::TimeTable) + i = $func(f, _ta(tt)) + isnothing(i) && return nothing + ifelse(i > getfield(tt, :n), nothing, i) + end + + # TODO: handle case of infinte timegrid for findlast +end + +for func ∈ [:findprev, :findnext] + @eval function Base.$func(f::Function, tt::TimeTable, j::Int) + i = $func(f, _ta(tt), j) + isnothing(i) && return nothing + ifelse(i > getfield(tt, :n), nothing, i) + end +end + +function Base.getindex(r::TimeTableRow, i::Int) + (i == 1) ? r.i : + (i == 2) ? r.t : + (i == 3) ? r.v : + throw(BoundsError(r, i)) +end + +############################################################################### +# Value modification +############################################################################### + +function Base.setproperty!(tt::TimeTable, name::Symbol, x::AbstractVector) + (length(tt) != length(x)) && throw(DimensionMismatch("length unmatched")) + _vecs(tt)[name] = x +end + +# TODO: support time axis modification +Base.setindex!(tt::TimeTable, v, i::Int, s::Symbol) = + (@boundscheck checkbounds(tt, i); _vecs(tt)[s][i] = v) +Base.setindex!(tt::TimeTable, v, t::TimeType, s::Symbol) = (tt[time2idx(tt, t), s] = v) + +function Base.resize!(tt::TimeTable, n′::Int) + n = length(tt) + (n == n′) && return tt + + for v ∈ values(_vecs(tt)) + resize!(v, n′) + end + setfield!(tt, :n, n′) + tt +end + +function Base.push!(tt::TimeTable{<:TimeGrid}, x::NamedTuple) + d = _vecs(tt) + (size(tt, 2) == length(x)) || throw(DimensionMismatch("input length unmatched")) + + ks = keys(d) + for k ∈ keys(x) + (k ∈ ks) || throw(ArgumentError("unknown column $k")) + end + + for (k, v) ∈ d + push!(v, x[k]) + end + + n = length(tt) + 1 + setfield!(tt, :n, n) + resize!(_ta(tt), n) + + tt +end + + +############################################################################### +# Time axis modification +############################################################################### + +# TODO: add a `shrink` kwarg for shrinking length after lag/lead +lag(tt::TimeTable{<:TimeGrid}, n::Int) = TimeTable(_ta(tt) + n, _vecs(tt)) +lead(tt::TimeTable{<:TimeGrid}, n::Int) = TimeTable(_ta(tt) - n, _vecs(tt)) + +# TODO: reindex ? + + +############################################################################### +# Join +############################################################################### + +# TODO: after DataAPI.jl v0.17 released, import method from it + +# TODO: support `on` kwarg +function innerjoin(x::TimeTable{<:TimeGrid}, y::TimeTable{<:TimeGrid}) + dx = _vecs(x) + dy = _vecs(y) + dz = OrderedDict{Symbol,AbstractVector}() + + tax = _ta(x) + tay = _ta(y) + + idxx = Int[] + idxy = Int[] + sizehint!(idxy, length(x)) + sizehint!(idxy, length(x)) + for (i, j) ∈ enumerate(findall(tax, tay)) + ismissing(j) && continue + push!(idxx, i) + push!(idxy, j) + end + + for (k, v) ∈ dx + dz[k] = v[idxx] # this will copy + end + + ks = keys(dx) + for (k, v) ∈ dy + k′ = ifelse(k ∈ ks, Symbol(k, :_), k) + dz[k′] = v[idxy] + end + + ta′ = [tax[i] for i ∈ idxx] + TimeTable(ta′, dz) +end + + +############################################################################### +# Private utils +############################################################################### + + +checkbounds(tt::TimeTable, i::Int) = + (checkindex(Bool, tt, i) || throw(BoundsError(tt, i)); nothing) + +@inline getvec(tt::TimeTable, s::Symbol) = _vecs(tt)[s] +@inline _vecs(tt::TimeTable) = getfield(tt, :vecs) +@inline _ta(tt::TimeTable) = getfield(tt, :ta) + +@inline time2idx(tt::TimeTable, t::TimeType) = _ta(tt)[t] diff --git a/test/runtests.jl b/test/runtests.jl index f0b4dd12..ec816184 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,6 +14,7 @@ tests = [ "tables", "plotrecipes", "timeaxis/timegrid", + "timetable", ] diff --git a/test/timetable.jl b/test/timetable.jl new file mode 100644 index 00000000..f6df0302 --- /dev/null +++ b/test/timetable.jl @@ -0,0 +1,33 @@ +using Test +using Dates + +using TimeSeries + +@testset "timetable" begin + + +@testset "getindex" begin + @testset "int -> row" begin + g = TimeGrid(DateTime(2021, 1, 1), Minute(15), 10) + tt = TimeTable(g; a = [1, 2 , 3, 5, 42, -10], + b = [4, 252, 14, 2, 1 , 6.]) + + r = tt[1] + @test r[1] == 1 + @test r[2] == g[1] + @test r[3] == [1, 4] + + r = tt[6] + @test r[1] == 6 + @test r[2] == g[6] + @test r[3] == [-10, 6] + + r = tt[end] + @test r[1] == 10 + @test r[2] == g[10] + @test isequal(r[3], [missing, missing]) + end +end + + +end # @testset "timetable"