Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create validation method and apply to to_windows and shift #21

Merged
merged 6 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Version Notes
=============
0.1.1 (2024-01-31)
-------------------
* Addition of array-like and dataframe label-based length parameter options to the ``EventsCollection.to_windows()`` method. This allows for the creation of segments based on a variable length.
* Addition of array-like, dataframe label-based, and callable ``length`` parameter options to the ``EventsCollection.to_windows()`` method and ``distance`` parameter to the ``EventsCollection.shift()`` method. This allows for the creation of segments based on a variable length. This will be made available to other methods in future versions as well.
* Performance improvements
* Various bug fixes, minor features

Expand Down
106 changes: 81 additions & 25 deletions linref/events/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,12 +1159,12 @@ def to_windows(

Parameters
----------
length : numerical, array-like, or label, default 1.0
A length to cut down all events to. If an array is provided, must
have a length equal to the number of records in the events
dataframe. If a label is provided, it must be a valid label within
the events dataframe, containing valid numerical data defining
segment lengths. Not valid if dissolve=True.
length : scalar, array-like, label, or callable, default 1.0
A length to cut down all events to. Can be provided as a single
scalar value, an array-like of values with a length equal to the
number of records in the events dataframe, a column label in the
events dataframe, or a callable which can be applied to the events
dataframe along axis=1. Not valid if dissolve=True.
steps : int, default 1
A number of steps per window length. The resulting step length will
be equal to length / steps. For non-overlapped windows, use a steps
Expand Down Expand Up @@ -1199,28 +1199,16 @@ def to_windows(
endpoint : bool, default False
Add a point event at the end of each event range.
"""
# Validate
lengths = self._validate_array_input(
length, label='length', dtypes=(int, float))

# Dissolve events
if dissolve:
events = self.dissolve().df
else:
events = self.df
# Define cut lengths
if isinstance(length, str):
if not length in events.columns:
raise ValueError(f"Provided length label '{length}' is not "
"present within the events dataframe.")
else:
lengths = events[length].values
elif isinstance(length, (int, float)):
lengths = np.full(events.shape[0], length)
else:
try:
assert len(length) == events.shape[0]
lengths = length
except:
raise ValueError("Provided length array must be an array-like "
"and have a length equal to the number of records in the "
"events dataframe.")

# Iterate over roads and create sliding window segments
gen = zip(
events[self.keys + [self.beg, self.end]].values,
Expand Down Expand Up @@ -2084,6 +2072,68 @@ def _validate_keys(self, keys):
# Return validated keys
return keys

def _validate_array_input(self, value, label='value', dtypes=(int, float)):
"""
Validate the input to ensure it is a single value of the required
type(s), an array-like of such values with a length equal to the
number of records in the events dataframe, a pd.Series which aligns
with the events dataframe, a label of a column in the events
dataframe, or a callable function which can be applied to the events
dataframe along axis=1 to generate the required values.
"""
# Validate value input
if value is None:
raise ValueError(f"No input {label} provided.")
elif isinstance(value, dtypes):
return np.full(self.shape[0], value)
elif isinstance(value, str):
if not value in self.df.columns:
raise ValueError(
f"If provided as a string, input {label} '{value}' must "
f"be a valid column label in the events dataframe."
)
return self.df[value].values
elif isinstance(value, (list, tuple)):
if len(value) != self.shape[0]:
raise ValueError(
f"Input {label} array must have a length equal to the "
f"number of records in the events dataframe."
)
return np.array(value)
elif isinstance(value, np.ndarray):
if value.ndim > 1:
raise ValueError(
f"Input {label} array must be 1-dimensional."
)
if len(value) != self.shape[0]:
raise ValueError(
f"Input {label} array must have a length equal to the "
f"number of records in the events dataframe."
)
return value
elif isinstance(value, pd.Series):
if len(value) != self.shape[0]:
raise ValueError(
f"Input {label} series must have a length equal to the "
f"number of records in the events dataframe."
)
try:
return value.reindex_like(self.df).values
except:
raise ValueError(
f"Input {label} series must have an index which aligns "
f"with the events dataframe."
)
elif callable(value):
return self.df.apply(value, axis=1).values
else:
raise ValueError(
f"Input {label} must be a of dtype {dtypes}, an array-like "
f"of the same, a column label in the events dataframe, or a "
f"callable which can be applied to the events dataframe along "
f"axis=1."
)

def round(self, decimals=0, factor=1, inplace=False):
"""
Round the bounds of all events to the specified number of decimals
Expand Down Expand Up @@ -2160,9 +2210,13 @@ def shift(

Parameters
----------
distance : scalar, default 0
distance : scalar, array-like, label, or callable, default 0
The amount to shift each event bound by. Negative values will
result in an inversion of the `direction` parameter.
result in an inversion of the `direction` parameter. Can be
provided as a single scalar value, an array-like of values with a
length equal to the number of records in the events dataframe,
a column label in the events dataframe, or a callable which can
be applied to the events dataframe along axis=1.
direction : {'positive', 'negative', 'both'}, default 'positive'
Which direction the event bounds should be shifted.

Expand All @@ -2187,6 +2241,8 @@ def shift(
# Validation
_ops_direction = {'positive', 'negative', 'both'}
_ops_which = {'begs', 'ends', 'both'}
distance = self._validate_array_input(
distance, label='distance', dtypes=(int, float))
if not direction in _ops_direction:
raise ValueError(
f"Input `direction` parameter must be one of {_ops_direction}")
Expand Down