Skip to content

Commit

Permalink
timetable wip
Browse files Browse the repository at this point in the history
  • Loading branch information
iblislin committed Jun 12, 2021
1 parent 15c3bd7 commit 170fff1
Show file tree
Hide file tree
Showing 9 changed files with 330 additions and 10 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ authors = ["JuliaStats <https://github.com/JuliaStats>"]
version = "0.22.1"

[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand Down
7 changes: 7 additions & 0 deletions src/TimeSeries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ using Dates
using DelimitedFiles
using Statistics
# third-party
using DataStructures
using DocStringExtensions: SIGNATURES
using PaddedViews
using RecipesBase
using Reexport
using Tables
Expand All @@ -21,6 +23,9 @@ export TimeArray, AbstractTimeSeries,
# modify.jl
export rename, rename!

# timetable.jl
export TimeTable

###############################################################################
# Submodule
###############################################################################
Expand All @@ -33,7 +38,9 @@ include("timeaxis/TimeAxis.jl")
###############################################################################

include(".timeseriesrc.jl")
include("ats.jl")
include("timearray.jl")
include("timetable.jl")
include("utilities.jl")
include("tables.jl")
include("split.jl")
Expand Down
3 changes: 3 additions & 0 deletions src/adt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
Abstract data types
"""
52 changes: 52 additions & 0 deletions src/ats.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
###############################################################################
# AbstractTimeSeries
###############################################################################

"""
AbstractTimeSeries{T}
An `AbstractTimeSeries{T}` is a table-like data structure with a time index and
named columns.
Where `T` denotes the type of time index.
In the case of multiple columns as compound index, `T <: Tuple`.
For instance, let `T = Tuple{Date,Time}` implies there are two columns
which forms the time index.
# Interfaces
## Dimension and size
- `length`
- `ndims`
- `size`
- `axes`
- `copy`
- `deepcopy`
- `similar`
- `names`
- `rename`
- `rename!`
- `hcat`
- `vcat`
"""
abstract type AbstractTimeSeries{T} end

Base.names(ats::AbstractTimeSeries) = getfield(ats, :names)


Tables.istable(::Type{<:AbstractTimeSeries}) = true

Tables.columnaccess(::Type{<:AbstractTimeSeries}) = true
Tables.columns(ats::AbstractTimeSeries) = ats

Tables.rowaccess(::Type{<:AbstractTimeSeries}) = true
# TODO
# Tables.rows(x::AbstractTimeSeries)

Tables.schema(ats::AbstractTimeSeries) = Tables.Schema(names(ats), #= TODO =#)
5 changes: 2 additions & 3 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ function Base.iterate(x::TableIter, i::Integer = 1)
x[i], i + 1
end

Tables.istable(::Type{<:AbstractTimeSeries}) = true
Tables.rowaccess(::Type{<:TimeArray}) = true
Tables.rows(ta::TimeArray) = Tables.rows(Tables.columntable(ta))
Tables.columnaccess(::Type{<:TimeArray}) = true
Expand All @@ -58,10 +57,10 @@ Tables.getcolumn(ta::TimeArray, i::Int) = Tables.getcolumn(TableIter(ta), i)
Tables.getcolumn(ta::TimeArray, nm::Symbol) = Tables.getcolumn(TableIter(ta), nm)
Tables.getcolumn(i::TableIter, n::Int) = i[n]
Tables.getcolumn(i::TableIter, nm::Symbol) = getproperty(i, nm)
Tables.schema(ta::AbstractTimeSeries{T,N,D}) where {T,N,D} = Tables.schema(TableIter(ta))
Tables.schema(ta::TimeArray) = Tables.schema(TableIter(ta))
Tables.schema(i::TableIter{T,S}) where {T,S} = Tables.Schema(S, coltypes(data(i)))

coltypes(x::AbstractTimeSeries{T,N,D}) where {T,N,D} = (D, (T for _ 1:size(x, 2))...)
coltypes(x::TimeArray{T,N,D}) where {T,N,D} = (D, (T for _ 1:size(x, 2))...)


###############################################################################
Expand Down
16 changes: 9 additions & 7 deletions src/timearray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ import Base: convert, copy, length, show, getindex, iterate,
lastindex, size, eachindex, ==, isequal, hash, ndims,
getproperty, propertynames, values

abstract type AbstractTimeSeries{T,N,D} end

"""
TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,N,D}
Expand Down Expand Up @@ -51,7 +49,7 @@ The third constructor builds a `TimeArray` from a `NamedTuple`.
ta = TimeArray(data; timestamp = :datetime, meta = "Example")
"""
struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,N,D}
struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T}

timestamp::Vector{D}
values::A
Expand Down Expand Up @@ -81,6 +79,8 @@ struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,
timestamp_r, reverse(values, dims = 1), replace_dupes!(colnames), meta)

throw(ArgumentError("timestamps must be monotonic"))

# TODO: padded array design?
end
end

Expand Down Expand Up @@ -131,11 +131,13 @@ size(ta::TimeArray, dim) = size(values(ta), dim)

###### ndims #####################

ndims(ta::AbstractTimeSeries{T,N}) where {T,N} = N
# ndims(ta::AbstractTimeSeries{T,N}) where {T,N} = N
ndims(::AbstractTimeSeries) = 2
ndims(::TimeArray{T,N}) where {T,N} = N

###### iteration protocol ########

@generated function iterate(ta::AbstractTimeSeries{T,N}, i = 1) where {T,N}
@generated function iterate(ta::TimeArray{T,N}, i = 1) where {T,N}
val = (N == 1) ? :(values(ta)[i]) : :(values(ta)[i, :])

quote
Expand Down Expand Up @@ -182,8 +184,8 @@ hash(x::TimeArray, h::UInt) =

###### eltype #####################

Base.eltype(::AbstractTimeSeries{T,1,D}) where {T,D} = Tuple{D,T}
Base.eltype(::AbstractTimeSeries{T,2,D}) where {T,D} = Tuple{D,Vector{T}}
Base.eltype(::TimeArray{T,1,D}) where {T,D} = Tuple{D,T}
Base.eltype(::TimeArray{T,2,D}) where {T,D} = Tuple{D,Vector{T}}

###### show #####################

Expand Down
221 changes: 221 additions & 0 deletions src/timetable.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
###############################################################################
# Type
###############################################################################

# TODO: consider constrain T<:AbstractTimeAxis
mutable struct TimeTable{T} <: AbstractTimeSeries{T}
ta::T
vecs::OrderedDict{Symbol,AbstractVector}
n::Int # length, in case of infinte time axis

function TimeTable{T}(ta::T, vecs) where {T}
m = mapreduce(length, max, values(vecs))
n = if Base.haslength(T)
n′ = length(ta)
(n′ m) || throw(DimensionMismatch(
"The vector length should less or equal than the one of time axis"))
n′
else
m
end

# note that it will copy, if the length of a col is shorter than `m`
for (k, v) in vecs
(length(v) == n) && continue
vecs[k] = collect(PaddedView(missing, v, (n,)))
end

new(ta, vecs, n)
end
# other design style:
# colnames::Vector{Symbol}
# cols::Vector{AbstractVector}
end

TimeTable(ta::T, vecs::OrderedDict{Symbol}) where T = TimeTable{T}(ta, vecs)
function TimeTable(ta::T; kw...) where T
vecs = OrderedDict{Symbol,AbstractVector}()
for (k, v) kw
vecs[k] = v
end
TimeTable(ta, vecs)
end

const TimeTableTimeCol = :time

struct TimeTableRow{T,V}
i::Int
t::T
v::V
end


###############################################################################
# Iterator interfaces
###############################################################################

Base.size(tt::TimeTable) = (length(tt), length(keys(_vecs(tt))))
Base.size(tt::TimeTable, dim) =
(dim == 1) ? length(tt) :
(dim == 2) ? length(keys(_vecs(tt))) :
1

@inline Base.length(tt::TimeTable) = getfield(tt, :n)


###############################################################################
# Indexing
###############################################################################

Base.lastindex(tt::TimeTable) = getfield(tt, :n)

Base.checkindex(::Type{Bool}, tt::TimeTable, i::Int) = (1 i lastindex(tt))

Base.getindex(tt::TimeTable, s::Symbol) =
(s TimeTableTimeCol) ? getfield(tt, :ta) : getvec(tt, s)

function Base.getindex(tt::TimeTable, i::Int)
@boundscheck checkbounds(tt, i)
TimeTableRow(i, _ta(tt)[i], map(x -> x[i], values(_vecs(tt))))
end

Base.getindex(tt::TimeTable, t::TimeType) = tt[time2idx(tt, t)]
Base.getindex(tt::TimeTable, i::Int, s::Symbol) =
(@boundscheck checkbounds(tt, i); (s TimeTableTimeCol) ? _ta(tt)[i] : _vecs(tt)[s][i])
Base.getindex(tt::TimeTable, t::TimeType, s::Symbol) = tt[time2idx(tt, t), s]

for func [:findfirst, :findlast]
@eval function Base.$func(f::Function, tt::TimeTable)
i = $func(f, _ta(tt))
isnothing(i) && return nothing
ifelse(i > getfield(tt, :n), nothing, i)
end

# TODO: handle case of infinte timegrid for findlast
end

for func [:findprev, :findnext]
@eval function Base.$func(f::Function, tt::TimeTable, j::Int)
i = $func(f, _ta(tt), j)
isnothing(i) && return nothing
ifelse(i > getfield(tt, :n), nothing, i)
end
end

function Base.getindex(r::TimeTableRow, i::Int)
(i == 1) ? r.i :
(i == 2) ? r.t :
(i == 3) ? r.v :
throw(BoundsError(r, i))
end

###############################################################################
# Value modification
###############################################################################

function Base.setproperty!(tt::TimeTable, name::Symbol, x::AbstractVector)
(length(tt) != length(x)) && throw(DimensionMismatch("length unmatched"))
_vecs(tt)[name] = x
end

# TODO: support time axis modification
Base.setindex!(tt::TimeTable, v, i::Int, s::Symbol) =
(@boundscheck checkbounds(tt, i); _vecs(tt)[s][i] = v)
Base.setindex!(tt::TimeTable, v, t::TimeType, s::Symbol) = (tt[time2idx(tt, t), s] = v)

function Base.resize!(tt::TimeTable, n′::Int)
n = length(tt)
(n == n′) && return tt

for v values(_vecs(tt))
resize!(v, n′)
end
setfield!(tt, :n, n′)
tt
end

function Base.push!(tt::TimeTable{<:TimeGrid}, x::NamedTuple)
d = _vecs(tt)
(size(tt, 2) == length(x)) || throw(DimensionMismatch("input length unmatched"))

ks = keys(d)
for k keys(x)
(k ks) || throw(ArgumentError("unknown column $k"))
end

for (k, v) d
push!(v, x[k])
end

n = length(tt) + 1
setfield!(tt, :n, n)
resize!(_ta(tt), n)

tt
end


###############################################################################
# Time axis modification
###############################################################################

# TODO: add a `shrink` kwarg for shrinking length after lag/lead
lag(tt::TimeTable{<:TimeGrid}, n::Int) = TimeTable(_ta(tt) + n, _vecs(tt))
lead(tt::TimeTable{<:TimeGrid}, n::Int) = TimeTable(_ta(tt) - n, _vecs(tt))

# TODO: reindex ?


###############################################################################
# Join
###############################################################################

# TODO: after DataAPI.jl v0.17 released, import method from it

# TODO: support `on` kwarg
function innerjoin(x::TimeTable{<:TimeGrid}, y::TimeTable{<:TimeGrid})
dx = _vecs(x)
dy = _vecs(y)
dz = OrderedDict{Symbol,AbstractVector}()

tax = _ta(x)
tay = _ta(y)

idxx = Int[]
idxy = Int[]
sizehint!(idxy, length(x))
sizehint!(idxy, length(x))
for (i, j) enumerate(findall(tax, tay))
ismissing(j) && continue
push!(idxx, i)
push!(idxy, j)
end

for (k, v) dx
dz[k] = v[idxx] # this will copy
end

ks = keys(dx)
for (k, v) dy
k′ = ifelse(k ks, Symbol(k, :_), k)
dz[k′] = v[idxy]
end

ta′ = [tax[i] for i idxx]
TimeTable(ta′, dz)
end


###############################################################################
# Private utils
###############################################################################


checkbounds(tt::TimeTable, i::Int) =
(checkindex(Bool, tt, i) || throw(BoundsError(tt, i)); nothing)

@inline getvec(tt::TimeTable, s::Symbol) = _vecs(tt)[s]
@inline _vecs(tt::TimeTable) = getfield(tt, :vecs)
@inline _ta(tt::TimeTable) = getfield(tt, :ta)

@inline time2idx(tt::TimeTable, t::TimeType) = _ta(tt)[t]
Loading

0 comments on commit 170fff1

Please sign in to comment.