diff --git a/rul_datasets/utils.py b/rul_datasets/utils.py index d88f5ae..233ffd6 100644 --- a/rul_datasets/utils.py +++ b/rul_datasets/utils.py @@ -60,7 +60,7 @@ def get_targets_from_file_paths( return targets -def extract_windows(seq: np.ndarray, window_size: int) -> np.ndarray: +def extract_windows(seq: np.ndarray, window_size: int, dilation: int = 1) -> np.ndarray: """ Extract sliding windows from a sequence. @@ -68,20 +68,27 @@ def extract_windows(seq: np.ndarray, window_size: int) -> np.ndarray: 1` extracted windows. The resulting array has the shape [num_windows, window_size, num_channels]. + If dilation is set to a value greater than one, the window will not contain + consecutive time steps. Instead, the time steps are spaced by the dilation value. + In this case, the number of extracted windows is `len(seq) - (window_size - 1) * + dilation`. + Args: seq: sequence to extract windows from window_size: length of the sliding window + dilation: dilation of the sliding window Returns: array of sliding windows """ if window_size > len(seq): raise ValueError( - f"Cannot extract windows of size {window_size} " + f"Cannot extract windows of size {window_size} with dilation {dilation}" f"from a sequence of length {len(seq)}." ) - num_frames = seq.shape[0] - window_size + 1 - window_idx = np.arange(window_size)[None, :] + np.arange(num_frames)[:, None] + num_frames = seq.shape[0] - (window_size - 1) * dilation + window_idx = np.arange(window_size)[None, :] * dilation + window_idx = window_idx + np.arange(num_frames)[:, None] windows = seq[window_idx] return windows @@ -137,7 +144,15 @@ def to_tensor( def feature_to_tensor(features: np.ndarray, dtype: torch.dtype) -> torch.Tensor: - if len(features.shape) == 2: - return torch.tensor(features, dtype=dtype).permute(1, 0) - else: - return torch.tensor(features, dtype=dtype).permute(0, 2, 1) + """ + Convert a numpy array to a torch tensor of `dtype` and swap the last dimensions. + + The function assumes that the last dimension of the numpy array is the channel + dimension, and the second to last is the time dimension. All preceding dimensions + are considered to be batch dimensions. + + Args: + features: numpy array to convert + dtype: dtype of the resulting tensor + """ + return torch.transpose(torch.tensor(features, dtype=dtype), -1, -2) diff --git a/tests/reader/test_ncmapss.py b/tests/reader/test_ncmapss.py index 7f06605..9bfc618 100644 --- a/tests/reader/test_ncmapss.py +++ b/tests/reader/test_ncmapss.py @@ -36,11 +36,14 @@ def test_additional_hparams(): def test_prepare_data(should_run, mocker): mocker.patch("os.path.exists", return_value=not should_run) mock_save_scaler = mocker.patch("rul_datasets.reader.ncmapss.scaling.save_scaler") + mock_download = mocker.patch("rul_datasets.reader.ncmapss._download_ncmapss") NCmapssReader(1).prepare_data() if should_run: + mock_download.assert_called_once() mock_save_scaler.assert_called_once() else: + mock_download.assert_not_called() mock_save_scaler.assert_not_called() diff --git a/tests/test_utils.py b/tests/test_utils.py index 3b2b5ff..f1aef27 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -54,25 +54,28 @@ def test_get_targets_from_file_paths(file_path_func): @pytest.mark.parametrize( "window_size", [1, 5, 10, pytest.param(11, marks=pytest.mark.xfail)] ) -def test_extract_windows(window_size): - inputs = np.arange(10) - windows = utils.extract_windows(inputs, window_size) +@pytest.mark.parametrize("dilation", [1, 2, 3]) +def test_extract_windows(window_size, dilation): + inputs = np.arange(30) + windows = utils.extract_windows(inputs, window_size, dilation) - expected_num_windows = len(inputs) - window_size + 1 + expected_num_windows = len(inputs) - (window_size - 1) * dilation for i in range(expected_num_windows): - expected_window = inputs[i : (i + window_size)] + expected_window = inputs[i : (i + window_size * dilation) : dilation] npt.assert_equal(windows[i], expected_window) @pytest.mark.parametrize("num_targets", [0, 1, 2]) -def test_to_tensor(num_targets): - features = [np.random.randn(10, 100, 2)] +@pytest.mark.parametrize("num_batch_dims", [0, 1, 2, 3]) +def test_to_tensor(num_targets, num_batch_dims): + batch_dims = (10,) * num_batch_dims + features = [np.random.randn(*batch_dims, 100, 2)] targets = [[np.arange(10)]] * num_targets tensor_features, *tensor_targets = utils.to_tensor(features, *targets) assert isinstance(tensor_features, list) - assert tensor_features[0].shape == (10, 2, 100) + assert tensor_features[0].shape == (*batch_dims, 2, 100) assert tensor_features[0].dtype == torch.float32 assert len(tensor_targets) == num_targets