Skip to content

Commit

Permalink
Global code cleanup. Also minor refactoring, improved some tests and …
Browse files Browse the repository at this point in the history
…minor fixes.
  • Loading branch information
sarusso committed Nov 15, 2024
1 parent dad9d15 commit 97446e4
Show file tree
Hide file tree
Showing 24 changed files with 522 additions and 775 deletions.
2 changes: 1 addition & 1 deletion jupyter.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
set -e

# This script will build the Timeseria container and start it with Jupyter.
# This script will build the Timeseria container and start it with Jupyter

# Build
if [[ "x$BUILD" != "xFalse" ]]; then
Expand Down
23 changes: 0 additions & 23 deletions pypi.sh

This file was deleted.

2 changes: 1 addition & 1 deletion requirements_pinned.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ fitter==1.7.0
# Optional
#tensorflow==2.7.0 # TensorFlow (also -gpu, -macos or -aarch64 variants)
#prophet==1.1.5 # Facebook's Prophet
#pmdarima==2.0.4 # AARIMA
#pmdarima==2.0.4 # AARIMA
#statsmodels==0.14.1 # ARIMA, SARIMAX
2 changes: 1 addition & 1 deletion test.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
set -e

# This script will build the Timeseria container and then run the tests into it.
# This script will build the Timeseria container and run the tests into it

# Build
if [[ "x$BUILD" != "xFalse" ]]; then
Expand Down
92 changes: 31 additions & 61 deletions timeseria/datastructures.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
"""Base data structures as Points, Slots and Series."""
"""Base data structures as Points, Slots, and Series."""

import json
from copy import deepcopy
Expand Down Expand Up @@ -95,7 +95,7 @@ class TimePoint(Point):

def __init__(self, *args, **kwargs):

# Handle timezone if any (removing it from kwargs)
# Handle time zone if any (removing it from kwargs)
tz = kwargs.pop('tz', None)
if tz:
self._tz = timezonize(tz)
Expand All @@ -112,21 +112,17 @@ def __init__(self, *args, **kwargs):
# Ok, will convert the datetime to epoch and then create the point in the standard way
t = s_from_dt(kwargs['dt'])

# If we do not have a timezone, can we use the one from the dt used to initialize this TimePoint?
# If we do not have a time zone, can we use the one from the dt used to initialize this TimePoint?
try:
self._tz
except AttributeError:
if kwargs['dt'].tzinfo:

#Do not set it if it is UTC, it is the default
# Do not set it if it is UTC, it is the default
if kwargs['dt'].tzinfo == UTC:
pass
else:
self._tz = kwargs['dt'].tzinfo
#raise NotImplementedError('Not yet tz from dt ("{}")'.format(kwargs['dt']))

#else:
# raise Exception('Don\'t know how to handle all kwargs (got "{}")'.format(kwargs))

# Cast or create in the standard way
elif args:
Expand Down Expand Up @@ -155,7 +151,7 @@ def __gt__(self, other):

@property
def tz(self):
"""The timezone."""
"""The time zone."""
try:
return self._tz
except AttributeError:
Expand Down Expand Up @@ -202,9 +198,6 @@ def __init__(self, *args, **kwargs):
raise ValueError('Got type "{}" for data_indexes, was expecitng a dict'.format(data_indexes.__class__.__name__))
except KeyError:
data_indexes = {}
#else:
# if None in data_indexes.values():
# raise ValueError('Cannot have an index set to None: do not set it at all ({})'.format(data_indexes))

# Special data loss index
try:
Expand Down Expand Up @@ -237,9 +230,9 @@ def __eq__(self, other):
@property
def data(self):
"""The data."""
# Data is set like this as it cannot be set if not in the init (read: changed after created)
# to prevent this to happend when the point is in a series where they are all supposed
# to carry the same data type and with the same number of elements. TODO: check!
# Data is implemented using a property to enforce that it cannot be changed after being set
# via the init, in particular with respect to the series, where data points are checked, upon
# insertion, to carry the same data type and with the same number of elements.
return self._data

@property
Expand All @@ -257,7 +250,6 @@ def data_loss(self):
try:
return self.data_indexes['data_loss']
except KeyError:
#raise AttributeError('No data loss index set for this point')
return None

def data_labels(self):
Expand Down Expand Up @@ -297,7 +289,6 @@ def __repr__(self):
return 'Time point @ {} ({}) with data "{}"'.format(self.t, self.dt, self.data)



#======================
# Slots
#======================
Expand Down Expand Up @@ -420,15 +411,11 @@ def __init__(self, start=None, end=None, unit=None, **kwargs):
# Extra time zone checks
if start and end:
if start.tz != end.tz:
raise ValueError('{} start and end must have the same timezone (got start.tz="{}", end.tz="{}")'.format(self.__class__.__name__, start.tz, end.tz))
raise ValueError('{} start and end must have the same time zone (got start.tz="{}", end.tz="{}")'.format(self.__class__.__name__, start.tz, end.tz))

# Call parent init
super(TimeSlot, self).__init__(start=start, end=end, unit=unit)

# If we did not have the end, set its timezone now:
#if end is None:
# self.end.change_tz(self.start.tz)

# Store time zone
self.tz = start.tz

Expand Down Expand Up @@ -504,9 +491,6 @@ def __init__(self, *args, **kwargs):
raise ValueError('Got type "{}" for data_indexes, was expecitng a dict'.format(data_indexes.__class__.__name__))
except KeyError:
data_indexes = {}
#else:
# if None in data_indexes.values():
# raise ValueError('Cannot have an index set to None: do not set it at all ({})'.format(data_indexes))

# Special data loss index
try:
Expand Down Expand Up @@ -536,9 +520,9 @@ def __eq__(self, other):
@property
def data(self):
"""The data."""
# Data is set like this as it cannot be set if not in the init (read: changed after created)
# to prevent this to happened when the point is in a series where they are all supposed
# to carry the same data type and with the same number of elements. TODO: check me!
# Data is implemented using a property to enforce that it cannot be changed after being set
# via the init, in particular with respect to the series, where data slots are checked, upon
# insertion, to carry the same data type and with the same number of elements.
return self._data

@property
Expand All @@ -556,7 +540,6 @@ def data_loss(self):
try:
return self.data_indexes['data_loss']
except KeyError:
#raise AttributeError('No data loss index set for this point')
return None

def data_labels(self):
Expand Down Expand Up @@ -643,8 +626,6 @@ def append(self, item):
"""Append an item to the series. Accepts only items of the same
type of the items already present in the series (unless empty)"""

# TODO: move to use the insert?

# Check type
if self.item_type:
if not isinstance(item, self.item_type):
Expand Down Expand Up @@ -714,7 +695,6 @@ def _all_data_indexes(self):
"""Return all the data_indexes of the series, to be intended as custom
defined indicators (i.e. data_loss, anomaly_index, etc.)."""

# TODO: move this to the Data*Series...?
data_index_names = []
for item in self:
for index_name in item.data_indexes:
Expand Down Expand Up @@ -874,7 +854,6 @@ def __getitem__(self, key):
# Try filtering on this data label only
return self.filter(key)
else:
# TOOD: this will not work for SeriesView if ever implemented
return super(Series, self).__getitem__(key)


Expand All @@ -898,15 +877,13 @@ def rename_data_label(self, old_data_label, new_data_label):
if len(self) > 0 and not self._item_data_reference:
raise TypeError('Series items have no data, cannot rename a label')
for item in self:
# TODO: move to the DataPoint/DataSlot?
item.data[new_data_label] = item.data.pop(old_data_label)

def remove_data_label(self, data_label):
"""Remove a data label, in-place."""
if len(self) > 0 and not self._item_data_reference:
raise TypeError('Series items have no data, cannot rename a label')
for item in self:
# TODO: move to the DataPoint/DataSlot?
item.data.pop(data_label, None)

def remove_data_index(self, data_index):
Expand All @@ -923,9 +900,6 @@ def remove_data_loss(self):
for item in self:
item.data_indexes.pop('data_loss', None)




#=========================
# Operations
#=========================
Expand Down Expand Up @@ -1206,7 +1180,7 @@ def __repr__(self):
def __init__(self, *args, **kwargs):


# Handle timezone
# Handle time zone
tz = kwargs.pop('tz', None)
if tz:
self._tz = timezonize(tz)
Expand All @@ -1231,13 +1205,10 @@ def append(self, item):
except AttributeError:
pass
else:
# Check time ordering and handle the resolution.

# The following if is to support the deepcopy, otherwise the original prev_t will be used
# TODO: maybe move the above to a "hasattr" plus an "and" instead of this logic?
# Check time ordering and handle the resolution. It is done in this way to support
# the deepcopy, otherwise the original prev_t will be used.
if len(self)>0:

# logger.debug('Checking time ordering for t="%s" (prev_t="%s")', item.t, self.prev_t)
if item.t < self.prev_t:
raise ValueError('Time t="{}" is out of order (prev t="{}")'.format(item.t, self.prev_t))

Expand Down Expand Up @@ -1265,7 +1236,7 @@ def append(self, item):
del self._resolution_as_seconds
self._resolution = 'variable'
finally:
# Delete the autodetected sampling interval cache if present
# Delete the auto-detected sampling interval cache if present
try:
del self._autodetected_sampling_interval
del self._autodetected_sampling_interval_confidence
Expand All @@ -1276,18 +1247,18 @@ def append(self, item):

elif isinstance(item, TimeSlot):

# Slots can belong to the same series if they are in succession (tested with the __succedes__ method)
# Slots can belong to the same series if they are in succession (checked with the __succedes__ method)
# and if they have the same unit, which we test here instead as the __succedes__ is more general.

# Check the timezone (only for slots, points are not affected by timezones)
# Check the time zone (only for slots, points are not affected by time zones)
if not self.tz:
# If no timezone set, use the item one's
# If no time zone set, use the item's one
self._tz = item.tz

else:
# Else, check for the same timezone
# Else, check for the same time zone
if self._tz != item.tz:
raise ValueError('Cannot append slots on different timezones (I have "{}" and you tried to add "{}")'.format(self.tz, item.start.tz))
raise ValueError('Cannot append slots on different time zones (I have "{}" and you tried to add "{}")'.format(self.tz, item.start.tz))

try:
if self._resolution != item.unit:
Expand All @@ -1310,7 +1281,7 @@ def append(self, item):
super(TimeSeries, self).append(item)

def _item_by_t(self, t):
# TODO: improve performance, bisection first, then use an index?
# TODO: improve performance here. Bisection first, then maybe use an index-based mapping?
for item in self:
if item.t == t:
return item
Expand Down Expand Up @@ -1351,25 +1322,24 @@ def __getitem__(self, arg):
return self.get(arg)

#=========================
# Timezone-related
# Time zone-related
#=========================

@property
def tz(self):
"""The timezone of the time series."""
# Note: we compute the tz on the fly because for point time series we assume to use the tz
# attribute way lass than the slot time series, where the tz is instead computed at append-time.
"""The time zone of the time series."""
try:
return self._tz
except AttributeError:
# Detect timezone on the fly
# TODO: this ensures that each point is on the same timezone. Do we want this?
# Detect time zone on the fly. Only applies for point time series.
# If different time zones are mixed, than fall back on UTC.
# TODO: set the tz at append-time for point time series as well?
detected_tz = None
for item in self:
if not detected_tz:
detected_tz = item.tz
else:
# Terrible but seems like no other way to compare pytz.tzfile.* classes
# Terrible, but there seems to be no other way to compare pytz.tzfile.* classes
if str(item.tz) != str(detected_tz):
return UTC
return detected_tz
Expand Down Expand Up @@ -1505,7 +1475,7 @@ def load(cls, file_name):
if loaded_series.__class__ == cls:
return loaded_series
else:
# TODO: the following is a huge performance hit...
# TODO: improve performance here, the following is highly inefficient.
series_items = loaded_series.contents()
cls(*series_items)

Expand Down Expand Up @@ -1597,7 +1567,7 @@ def from_csv(cls, file_name, *args, **kwargs):
if loaded_series.__class__ == cls:
return loaded_series
else:
# TODO: the following is a huge performance hit...
# TODO: improve performance here, the following is highly inefficient.
series_items = loaded_series.contents()
cls(*series_items)

Expand All @@ -1620,7 +1590,7 @@ def from_df(cls, df, item_type='auto'):

if not unit_str_pd:
if not item_type:
logger.info('Cannot infer the freqency of the dataframe, will just create points')
logger.info('Cannot infer the frequency of the dataframe, will just create points')
item_type = DataTimePoint

else:
Expand Down
21 changes: 7 additions & 14 deletions timeseria/interpolators.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,13 @@ def evaluate(self, at, prev_i=None, next_i=None):

for label in self.series.data_labels():

if False:
# TODO: check the math here, this should be a better approach but tests fail...
coordinate_increment = at - prev_point.t
value_diff = next_point.data[label] - prev_point.data[label]
interpolated_data[label] = prev_point.data[label] + (value_diff * coordinate_increment)

else:
# Compute the "growth" ratio
diff = next_point.data[label] - prev_point.data[label]
delta_t = next_point.t - prev_point.t
ratio = diff / delta_t

# Compute the value of the data for the new point
interpolated_data[label] = prev_point.data[label] + ((at-prev_point.t)*ratio)
# Compute the "growth" ratio
diff = next_point.data[label] - prev_point.data[label]
delta_t = next_point.t - prev_point.t
ratio = diff / delta_t

# Compute the value of the data for the new point
interpolated_data[label] = prev_point.data[label] + ((at-prev_point.t)*ratio)


return interpolated_data
Expand Down
1 change: 0 additions & 1 deletion timeseria/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
def setup(level=LOGLEVEL, force=False):
timeseria_logger = logging.getLogger('timeseria')
timeseria_logger.propagate = False
#print('Setting log level to "{}"'.format(level))
try:
configured = False
for handler in timeseria_logger.handlers:
Expand Down
Loading

0 comments on commit 97446e4

Please sign in to comment.