Skip to content

Commit

Permalink
Add docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhengyanZhu committed Mar 22, 2024
1 parent 757d9b6 commit 2b417dd
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 48 deletions.
82 changes: 42 additions & 40 deletions rul_datasets/reader/ncmapss.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,42 +123,42 @@ def __init__(
truncate_degraded_only: bool = False,
resolution_seconds: int = 1,
padding_value: float = 0.0,
scaling_range: [float, float] = None,
scaling_range: Optional[Tuple[int, int]] = (0, 1),
) -> None:
"""
Create a new reader for the New C-MAPSS dataset. The maximum RUL value is set
to 65 by default. The default channels are the four operating conditions,
the 14 physical, and 14 virtual sensors in this order.
The default window size is, by default, the longest flight cycle in the
sub-dataset. Shorter cycles are padded on the left. The default padding value
is zero but can be overridden, e.g., as -1 to make filtering for padding easier
later on.
The default `run_split_dist` is the same as in the original dataset, but with
the last unit of the original train split designated for validation.
If the features are downsampled in time, the default window size is
automatically adjusted to `window_size // resolution_seconds`. Any manually
set `window_size` needs to take this into account as it is applied after
downsampling.
For more information about using readers, refer to the [reader]
[rul_datasets.reader] module page.
Args:
fd: The sub-dataset to use. Must be in `[1, 7]`.
max_rul: The maximum RUL value.
percent_broken: The maximum relative degradation per unit.
percent_fail_runs: The percentage or index list of available units.
feature_select: The indices of the features to use.
truncate_val: Truncate the validation data with `percent_broken`, too.
run_split_dist: The assignment of units to each split.
truncate_degraded_only: Only truncate the degraded part of the data
(< max RUL).
resolution_seconds: The number of consecutive seconds to average over for
downsampling.
padding_value: The value to use for padding the flight cycles.
Create a new reader for the New C-MAPSS dataset. The maximum RUL value is set
to 65 by default. The default channels are the four operating conditions,
the 14 physical, and 14 virtual sensors in this order.
The default window size is, by default, the longest flight cycle in the
sub-dataset. Shorter cycles are padded on the left. The default padding value
is zero but can be overridden, e.g., as -1 to make filtering for padding easier
later on.
The default `run_split_dist` is the same as in the original dataset, but with
the last unit of the original train split designated for validation.
If the features are downsampled in time, the default window size is
automatically adjusted to `window_size // resolution_seconds`. Any manually
set `window_size` needs to take this into account as it is applied after
downsampling.
For more information about using readers, refer to the [reader]
[rul_datasets.reader] module page.
Args:
fd: The sub-dataset to use. Must be in `[1, 7]`.
max_rul: The maximum RUL value.
percent_broken: The maximum relative degradation per unit.
percent_fail_runs: The percentage or index list of available units.
feature_select: The indices of the features to use.
truncate_val: Truncate the validation data with `percent_broken`, too.
run_split_dist: The assignment of units to each split.
truncate_degraded_only: Only truncate the degraded part of the data
(< max RUL).
resolution_seconds: The number of consecutive seconds to average over for
downsampling.
padding_value: The value to use for padding the flight cycles.
"""
super().__init__(
fd,
Expand Down Expand Up @@ -217,13 +217,15 @@ def prepare_data(self) -> None:
"""
if not os.path.exists(self._NCMAPSS_ROOT):
_download_ncmapss(self._NCMAPSS_ROOT)
#if not os.path.exists(self._get_scaler_path()):
features, _, _ = self._load_data("dev")
scaler = scaling.fit_scaler(features, MinMaxScaler())
scaling.save_scaler(scaler, self._get_scaler_path())
if not os.path.exists(self._get_scaler_path()):
features, _, _ = self._load_data("dev")
scaler = scaling.fit_scaler(features, MinMaxScaler(self.scaling_range))
scaling.save_scaler(scaler, self._get_scaler_path())

def _get_scaler_path(self):
file_name = f"scaler_{self.fd}_{self.run_split_dist['dev']}.pkl"
file_name = (
f"scaler_{self.fd}_{self.run_split_dist['dev']}_{self.scaling_range}.pkl"
)
file_path = os.path.join(self._NCMAPSS_ROOT, file_name)

return file_path
Expand Down Expand Up @@ -304,7 +306,7 @@ def _select_units(self, units, split):
return [units[i] for i in self.run_split_dist[split]]

def _window_by_cycle(
self, features: np.ndarray, targets: np.ndarray, auxiliary: np.ndarray
self, features: np.ndarray, targets: np.ndarray, auxiliary: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
cycle_end_idx = self._get_end_idx(auxiliary[:, 1])
split_features = np.split(features, cycle_end_idx[:-1])
Expand Down
Empty file removed tests/reader/test_ncmapps.py
Empty file.
15 changes: 7 additions & 8 deletions tests/reader/test_ncmapss.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,19 @@ def test_prepare_data(should_run, mocker):
mock_save_scaler.assert_not_called()



@pytest.mark.needs_data
@pytest.mark.parametrize("scaling_range", [(-1.0, 1.0), (0.0, 2.0)])
@pytest.mark.parametrize("scaling_range", [(-1, 1), (0, 1)])
def test_scaling_range(scaling_range):
reader = NCmapssReader(fd=1, scaling_range=scaling_range)
reader.prepare_data()
features, _ = reader.load_split("dev")

reader = NCmapssReader(fd=1, scaling_range=(0, 1))
reader.prepare_data()
features_default, _ = reader.load_split("dev")

assert not np.array_equal(features[0][:, :, 1], features_default[0][:, :, 1])

min_val, max_val = scaling_range
for feature in features:
flat_features = feature.flatten()
np.testing.assert_almost_equal(
flat_features, np.clip(flat_features, min_val, max_val)
)


@pytest.mark.needs_data
Expand Down

0 comments on commit 2b417dd

Please sign in to comment.