Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: experiemental implementation of heterogeneous dynamic table type #494

Open
wants to merge 1 commit into
base: v0.30
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ authors = ["JuliaStats <https://github.com/JuliaStats>"]
version = "0.22.1"

[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand Down
7 changes: 7 additions & 0 deletions src/TimeSeries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ using Dates
using DelimitedFiles
using Statistics
# third-party
using DataStructures
using DocStringExtensions: SIGNATURES
using PaddedViews
using RecipesBase
using Reexport
using Tables
Expand All @@ -21,6 +23,9 @@ export TimeArray, AbstractTimeSeries,
# modify.jl
export rename, rename!

# timetable.jl
export TimeTable

###############################################################################
# Submodule
###############################################################################
Expand All @@ -33,7 +38,9 @@ include("timeaxis/TimeAxis.jl")
###############################################################################

include(".timeseriesrc.jl")
include("ats.jl")
include("timearray.jl")
include("timetable.jl")
include("utilities.jl")
include("tables.jl")
include("split.jl")
Expand Down
3 changes: 3 additions & 0 deletions src/adt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
Abstract data types
"""
52 changes: 52 additions & 0 deletions src/ats.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
###############################################################################
# AbstractTimeSeries
###############################################################################

"""
AbstractTimeSeries{T}
An `AbstractTimeSeries{T}` is a table-like data structure with a time index and
named columns.
Where `T` denotes the type of time index.
In the case of multiple columns as compound index, `T <: Tuple`.
For instance, let `T = Tuple{Date,Time}` implies there are two columns
which forms the time index.
# Interfaces
## Dimension and size
- `length`
- `ndims`
- `size`
- `axes`
- `copy`
- `deepcopy`
- `similar`
- `names`
- `rename`
- `rename!`
- `hcat`
- `vcat`
"""
abstract type AbstractTimeSeries{T} end

Base.names(ats::AbstractTimeSeries) = getfield(ats, :names)


Tables.istable(::Type{<:AbstractTimeSeries}) = true

Tables.columnaccess(::Type{<:AbstractTimeSeries}) = true
Tables.columns(ats::AbstractTimeSeries) = ats

Tables.rowaccess(::Type{<:AbstractTimeSeries}) = true
# TODO
# Tables.rows(x::AbstractTimeSeries)

Tables.schema(ats::AbstractTimeSeries) = Tables.Schema(names(ats), #= TODO =#)
5 changes: 2 additions & 3 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ function Base.iterate(x::TableIter, i::Integer = 1)
x[i], i + 1
end

Tables.istable(::Type{<:AbstractTimeSeries}) = true
Tables.rowaccess(::Type{<:TimeArray}) = true
Tables.rows(ta::TimeArray) = Tables.rows(Tables.columntable(ta))
Tables.columnaccess(::Type{<:TimeArray}) = true
Expand All @@ -58,10 +57,10 @@ Tables.getcolumn(ta::TimeArray, i::Int) = Tables.getcolumn(TableIter(ta), i)
Tables.getcolumn(ta::TimeArray, nm::Symbol) = Tables.getcolumn(TableIter(ta), nm)
Tables.getcolumn(i::TableIter, n::Int) = i[n]
Tables.getcolumn(i::TableIter, nm::Symbol) = getproperty(i, nm)
Tables.schema(ta::AbstractTimeSeries{T,N,D}) where {T,N,D} = Tables.schema(TableIter(ta))
Tables.schema(ta::TimeArray) = Tables.schema(TableIter(ta))
Tables.schema(i::TableIter{T,S}) where {T,S} = Tables.Schema(S, coltypes(data(i)))

coltypes(x::AbstractTimeSeries{T,N,D}) where {T,N,D} = (D, (T for _ 1:size(x, 2))...)
coltypes(x::TimeArray{T,N,D}) where {T,N,D} = (D, (T for _ 1:size(x, 2))...)


###############################################################################
Expand Down
16 changes: 9 additions & 7 deletions src/timearray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ import Base: convert, copy, length, show, getindex, iterate,
lastindex, size, eachindex, ==, isequal, hash, ndims,
getproperty, propertynames, values

abstract type AbstractTimeSeries{T,N,D} end

"""
TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,N,D}
Expand Down Expand Up @@ -51,7 +49,7 @@ The third constructor builds a `TimeArray` from a `NamedTuple`.
ta = TimeArray(data; timestamp = :datetime, meta = "Example")
"""
struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,N,D}
struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T}

timestamp::Vector{D}
values::A
Expand Down Expand Up @@ -81,6 +79,8 @@ struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,
timestamp_r, reverse(values, dims = 1), replace_dupes!(colnames), meta)

throw(ArgumentError("timestamps must be monotonic"))

# TODO: padded array design?
end
end

Expand Down Expand Up @@ -131,11 +131,13 @@ size(ta::TimeArray, dim) = size(values(ta), dim)

###### ndims #####################

ndims(ta::AbstractTimeSeries{T,N}) where {T,N} = N
# ndims(ta::AbstractTimeSeries{T,N}) where {T,N} = N
ndims(::AbstractTimeSeries) = 2
ndims(::TimeArray{T,N}) where {T,N} = N

###### iteration protocol ########

@generated function iterate(ta::AbstractTimeSeries{T,N}, i = 1) where {T,N}
@generated function iterate(ta::TimeArray{T,N}, i = 1) where {T,N}
val = (N == 1) ? :(values(ta)[i]) : :(values(ta)[i, :])

quote
Expand Down Expand Up @@ -182,8 +184,8 @@ hash(x::TimeArray, h::UInt) =

###### eltype #####################

Base.eltype(::AbstractTimeSeries{T,1,D}) where {T,D} = Tuple{D,T}
Base.eltype(::AbstractTimeSeries{T,2,D}) where {T,D} = Tuple{D,Vector{T}}
Base.eltype(::TimeArray{T,1,D}) where {T,D} = Tuple{D,T}
Base.eltype(::TimeArray{T,2,D}) where {T,D} = Tuple{D,Vector{T}}

###### show #####################

Expand Down
221 changes: 221 additions & 0 deletions src/timetable.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
###############################################################################
# Type
###############################################################################

# TODO: consider constrain T<:AbstractTimeAxis
mutable struct TimeTable{T} <: AbstractTimeSeries{T}
ta::T
vecs::OrderedDict{Symbol,AbstractVector}
n::Int # length, in case of infinte time axis

function TimeTable{T}(ta::T, vecs) where {T}
m = mapreduce(length, max, values(vecs))
n = if Base.haslength(T)
n′ = length(ta)
(n′ m) || throw(DimensionMismatch(
"The vector length should less or equal than the one of time axis"))
n′
else
m
end

# note that it will copy, if the length of a col is shorter than `m`
for (k, v) in vecs
(length(v) == n) && continue
vecs[k] = collect(PaddedView(missing, v, (n,)))
end

new(ta, vecs, n)
end
# other design style:
# colnames::Vector{Symbol}
# cols::Vector{AbstractVector}
end

TimeTable(ta::T, vecs::OrderedDict{Symbol}) where T = TimeTable{T}(ta, vecs)
function TimeTable(ta::T; kw...) where T
vecs = OrderedDict{Symbol,AbstractVector}()
for (k, v) kw
vecs[k] = v
end
TimeTable(ta, vecs)
end

const TimeTableTimeCol = :time

struct TimeTableRow{T,V}
i::Int
t::T
v::V
end


###############################################################################
# Iterator interfaces
###############################################################################

Base.size(tt::TimeTable) = (length(tt), length(keys(_vecs(tt))))
Base.size(tt::TimeTable, dim) =
(dim == 1) ? length(tt) :
(dim == 2) ? length(keys(_vecs(tt))) :
1

@inline Base.length(tt::TimeTable) = getfield(tt, :n)


###############################################################################
# Indexing
###############################################################################

Base.lastindex(tt::TimeTable) = getfield(tt, :n)

Base.checkindex(::Type{Bool}, tt::TimeTable, i::Int) = (1 i lastindex(tt))

Base.getindex(tt::TimeTable, s::Symbol) =
(s TimeTableTimeCol) ? getfield(tt, :ta) : getvec(tt, s)

function Base.getindex(tt::TimeTable, i::Int)
@boundscheck checkbounds(tt, i)
TimeTableRow(i, _ta(tt)[i], map(x -> x[i], values(_vecs(tt))))
end

Base.getindex(tt::TimeTable, t::TimeType) = tt[time2idx(tt, t)]
Base.getindex(tt::TimeTable, i::Int, s::Symbol) =
(@boundscheck checkbounds(tt, i); (s TimeTableTimeCol) ? _ta(tt)[i] : _vecs(tt)[s][i])
Base.getindex(tt::TimeTable, t::TimeType, s::Symbol) = tt[time2idx(tt, t), s]

for func [:findfirst, :findlast]
@eval function Base.$func(f::Function, tt::TimeTable)
i = $func(f, _ta(tt))
isnothing(i) && return nothing
ifelse(i > getfield(tt, :n), nothing, i)
end

# TODO: handle case of infinte timegrid for findlast
end

for func [:findprev, :findnext]
@eval function Base.$func(f::Function, tt::TimeTable, j::Int)
i = $func(f, _ta(tt), j)
isnothing(i) && return nothing
ifelse(i > getfield(tt, :n), nothing, i)
end
end

function Base.getindex(r::TimeTableRow, i::Int)
(i == 1) ? r.i :
(i == 2) ? r.t :
(i == 3) ? r.v :
throw(BoundsError(r, i))
end

###############################################################################
# Value modification
###############################################################################

function Base.setproperty!(tt::TimeTable, name::Symbol, x::AbstractVector)
(length(tt) != length(x)) && throw(DimensionMismatch("length unmatched"))
_vecs(tt)[name] = x
end

# TODO: support time axis modification
Base.setindex!(tt::TimeTable, v, i::Int, s::Symbol) =
(@boundscheck checkbounds(tt, i); _vecs(tt)[s][i] = v)
Base.setindex!(tt::TimeTable, v, t::TimeType, s::Symbol) = (tt[time2idx(tt, t), s] = v)

function Base.resize!(tt::TimeTable, n′::Int)
n = length(tt)
(n == n′) && return tt

for v values(_vecs(tt))
resize!(v, n′)
end
setfield!(tt, :n, n′)
tt
end

function Base.push!(tt::TimeTable{<:TimeGrid}, x::NamedTuple)
d = _vecs(tt)
(size(tt, 2) == length(x)) || throw(DimensionMismatch("input length unmatched"))

ks = keys(d)
for k keys(x)
(k ks) || throw(ArgumentError("unknown column $k"))
end

for (k, v) d
push!(v, x[k])
end

n = length(tt) + 1
setfield!(tt, :n, n)
resize!(_ta(tt), n)

tt
end


###############################################################################
# Time axis modification
###############################################################################

# TODO: add a `shrink` kwarg for shrinking length after lag/lead
lag(tt::TimeTable{<:TimeGrid}, n::Int) = TimeTable(_ta(tt) + n, _vecs(tt))
lead(tt::TimeTable{<:TimeGrid}, n::Int) = TimeTable(_ta(tt) - n, _vecs(tt))

# TODO: reindex ?


###############################################################################
# Join
###############################################################################

# TODO: after DataAPI.jl v0.17 released, import method from it

# TODO: support `on` kwarg
function innerjoin(x::TimeTable{<:TimeGrid}, y::TimeTable{<:TimeGrid})
dx = _vecs(x)
dy = _vecs(y)
dz = OrderedDict{Symbol,AbstractVector}()

tax = _ta(x)
tay = _ta(y)

idxx = Int[]
idxy = Int[]
sizehint!(idxy, length(x))
sizehint!(idxy, length(x))
for (i, j) enumerate(findall(tax, tay))
ismissing(j) && continue
push!(idxx, i)
push!(idxy, j)
end

for (k, v) dx
dz[k] = v[idxx] # this will copy
end

ks = keys(dx)
for (k, v) dy
k′ = ifelse(k ks, Symbol(k, :_), k)
dz[k′] = v[idxy]
end

ta′ = [tax[i] for i idxx]
TimeTable(ta′, dz)
end


###############################################################################
# Private utils
###############################################################################


checkbounds(tt::TimeTable, i::Int) =
(checkindex(Bool, tt, i) || throw(BoundsError(tt, i)); nothing)

@inline getvec(tt::TimeTable, s::Symbol) = _vecs(tt)[s]
@inline _vecs(tt::TimeTable) = getfield(tt, :vecs)
@inline _ta(tt::TimeTable) = getfield(tt, :ta)

@inline time2idx(tt::TimeTable, t::TimeType) = _ta(tt)[t]
Loading