From a1fc60ea4e4807e58c73021521dca1ac16e4e72e Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 20 Oct 2023 11:43:57 +0200 Subject: [PATCH 01/16] add generic frontend --- i6_models/parts/frontend/generic_frontend.py | 225 +++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 i6_models/parts/frontend/generic_frontend.py diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py new file mode 100644 index 00000000..eefc2c51 --- /dev/null +++ b/i6_models/parts/frontend/generic_frontend.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +__all__ = [ + "FrontendLayerType", + "GenericFrontendV1Config", + "GenericFrontendV1", +] + +from dataclasses import dataclass +from enum import Enum, auto +from typing import Callable, Optional, Tuple, Union, Sequence + +import torch +from torch import nn + +import sys + +sys.path.insert(0, "/Users/jxu/Desktop/PR/i6_models") +from i6_models.config import ModelConfiguration + +from i6_models.parts.frontend.common import get_same_padding, mask_pool, calculate_output_dim + + +class FrontendLayerType(Enum): + Conv2d = auto() + Pool2d = auto() + Activation = auto() + + +@dataclass +class GenericFrontendV1Config(ModelConfiguration): + """ + Attributes: + in_features: number of input features to module + layer_ordering: the ordering of the front end layer sequences, the ordering element must be selected from FrontendLayerType + e.g. the ordering of VGG4LayerActFrontendV1 would be [FrontendLayerType.Conv2d, FrontendLayerType.Activation, + FrontendLayerType.Pool2d, FrontendLayerType.Conv2d, FrontendLayerType.Conv2d, FrontendLayerType.Activation, FrontendLayerType.Pool2d] + conv_kernel_sizes: kernel sizes for each conv layer + conv_strides: stride sizes for each conv layer + conv_paddings: paddings sizes for each conv layer + conv_out_dims: number of out channels for each conv layer + pool_kernel_sizes: kernel sizes for each pool layer + pool_strides: stride sizes for each pool layer + pool_paddings: padding sizes for each pool layer + activations: activation functions + out_features: output size of the final linear layer + """ + + in_features: int + layer_ordering: Sequence[FrontendLayerType] + conv_kernel_sizes: Optional[Sequence[Union[int, Tuple[int, int]]]] + conv_strides: Optional[Sequence[Union[int, Tuple[int, int]]]] + conv_paddings: Optional[Sequence[Union[int, Tuple[int, int]]]] + conv_out_dims: Optional[Sequence[Union[int, Tuple[int, int]]]] + pool_kernel_sizes: Optional[Sequence[Union[int, Tuple[int, int]]]] + pool_strides: Optional[Sequence[Union[int, Tuple[int, int]]]] + pool_paddings: Optional[Sequence[Union[int, Tuple[int, int]]]] + activations: Optional[Sequence[Union[nn.Module, Callable[[torch.Tensor], torch.Tensor]]]] + out_features: int + + def check_valid(self): + num_convs = 0 if self.conv_kernel_sizes is None else len(self.conv_kernel_sizes) + num_pools = 0 if self.pool_kernel_sizes is None else len(self.pool_kernel_sizes) + num_activations = 0 if self.activations is None else len(self.activations) + + assert num_convs == self.layer_ordering.count( + FrontendLayerType.Conv2d + ), "Number of convolution layers mismatch!" + assert num_activations == self.layer_ordering.count( + FrontendLayerType.Activation + ), "Number of activation layers mismatch!" + assert num_pools == self.layer_ordering.count(FrontendLayerType.Pool2d), "Number of pooling layers mismatch!" + + if self.conv_strides is not None: + assert len(self.conv_strides) == num_convs, "Please specify stride for each convolution layer!" + if self.conv_paddings is not None: + assert len(self.conv_paddings) == num_convs, "Please specify padding for each convolution layer!" + if num_convs != 0: + assert ( + len(self.conv_out_dims) == num_convs + ), "Please specify the number of channels for each convolution layer!" + + if self.pool_strides is not None: + assert len(self.pool_strides) == num_pools, "Please specify stride for each pooling layer!" + if self.conv_paddings is not None: + assert len(self.pool_paddings) == num_pools, "Please specify padding for each pooling layer!" + + assert len(self.layer_ordering) == num_convs + num_pools + num_activations, "Number of total layers mismatch!" + + for kernel_sizes in [self.conv_kernel_sizes, self.pool_kernel_sizes]: + if kernel_sizes is not None: + for kernel_size in kernel_sizes: + if isinstance(kernel_size, int): + assert kernel_size % 2 == 1, "ConformerVGGFrontendV1 only supports odd kernel sizes" + elif isinstance(kernel_size, tuple): + for i in range(len(kernel_size)): + assert kernel_size[i] % 2 == 1, "ConformerVGGFrontendV1 only supports odd kernel sizes" + + def __post__init__(self): + super().__post_init__() + self.check_valid() + + +class GenericFrontendV1(nn.Module): + def __init__(self, model_cfg: GenericFrontendV1Config): + """ + Generic Front-End + can be used to generate customized frontend by combine convolutional and pooling layers, as well as activation functions different + + To get the ESPnet case, for example Conv2dSubsampling6, use these options + layer_ordering = [FrontendLayerType.Conv2d, FrontendLayerType.Conv2d] + conv_kernel_sizes = [3, 5] + strides = [2, 3] + + To get the i6_models VGG4LayerActFrontendV1, use the options: + layer_ordering = [FrontendLayerType.Conv2d, FrontendLayerType.Activation, FrontendLayerType.Pool2d, + FrontendLayerType.Conv2d, FrontendLayerType.Conv2d, FrontendLayerType.Activation, FrontendLayerType.Pool2d] + conv_kernel_sizes = [3, 3, 3] + conv_out_dims = [32, 34, 64] + pool_kernel_sizes = [3, 3] + pool_strides = [2, 2] + activations = [torch.nn.ReLU(), torch.nn.ReLU()] + """ + super().__init__() + + model_cfg.check_valid() + + self.cfg = model_cfg + + self.frontend_layers = nn.ModuleList([]) + + conv_layer_index = 0 + pool_layer_index = 0 + activation_layer_index = 0 + last_channel_dim = 1 + last_feat_dim = model_cfg.in_features + for layer_type in model_cfg.layer_ordering: + if layer_type == FrontendLayerType.Conv2d: + conv_out_dim = model_cfg.conv_out_dims[conv_layer_index] + conv_kernel_size = model_cfg.conv_kernel_sizes[conv_layer_index] + conv_stride = 1 if model_cfg.conv_strides is None else model_cfg.conv_strides[conv_layer_index] + conv_padding = ( + get_same_padding(conv_kernel_size) + if model_cfg.conv_paddings is None + else model_cfg.conv_paddings[conv_layer_index] + ) + + self.frontend_layers.append( + nn.Conv2d( + in_channels=last_channel_dim, + out_channels=conv_out_dim, + kernel_size=conv_kernel_size, + stride=conv_stride, + padding=conv_padding, + ) + ) + + last_channel_dim = conv_out_dim + last_feat_dim = calculate_output_dim( + in_dim=last_feat_dim, + filter_size=conv_kernel_size if isinstance(conv_kernel_size, int) else conv_kernel_size[1], + stride=conv_stride if isinstance(conv_stride, int) else conv_stride[1], + padding=conv_padding if isinstance(conv_padding, int) else conv_padding[1], + ) + conv_layer_index += 1 + + elif layer_type == FrontendLayerType.Pool2d: + pool_stride = 1 if model_cfg.pool_strides is None else model_cfg.pool_strides[pool_layer_index] + pool_kernel_size = model_cfg.pool_kernel_sizes[pool_layer_index] + pool_padding = ( + get_same_padding(pool_kernel_size) + if model_cfg.pool_paddings is None + else model_cfg.pool_paddings[pool_layer_index] + ) + + self.frontend_layers.append( + nn.MaxPool2d( + kernel_size=pool_kernel_size, + stride=pool_stride, + padding=pool_padding, + ) + ) + last_feat_dim = calculate_output_dim( + in_dim=last_feat_dim, + filter_size=pool_kernel_size if isinstance(pool_kernel_size, int) else pool_kernel_size[1], + stride=pool_stride if isinstance(pool_stride, int) else pool_stride[1], + padding=pool_padding if isinstance(pool_padding, int) else pool_padding[1], + ) + pool_layer_index += 1 + + elif layer_type == FrontendLayerType.Activation: + self.frontend_layers.append(model_cfg.activations[activation_layer_index]) + activation_layer_index += 1 + else: + raise NotImplementedError + + self.linear = nn.Linear( + in_features=last_feat_dim * last_channel_dim, + out_features=model_cfg.out_features, + bias=True, + ) + + def forward(self, tensor: torch.Tensor, sequence_mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + assert tensor.shape[-1] == self.cfg.in_features + # and add a dim + tensor = tensor[:, None, :, :] # [B,C=1,T,F] + + for i in range(len(self.cfg.layer_ordering)): + layer = self.frontend_layers[i] + tensor = layer(tensor) + + if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.MaxPool2d): + sequence_mask = mask_pool( + sequence_mask, + kernel_size=layer.kernel_size[0], + stride=layer.stride[0], + padding=layer.padding[0], + ) + + tensor = torch.transpose(tensor, 1, 2) # transpose to [B,T",C,F"] + tensor = torch.flatten(tensor, start_dim=2, end_dim=-1) # [B,T",C*F"] + + tensor = self.linear(tensor) + + return tensor, sequence_mask From 370fb1070740b9c70ffdd23d34c9d9b4ab62309d Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 20 Oct 2023 12:41:40 +0200 Subject: [PATCH 02/16] add test --- i6_models/parts/frontend/generic_frontend.py | 9 +- tests/test_generic_frontend.py | 174 +++++++++++++++++++ 2 files changed, 177 insertions(+), 6 deletions(-) create mode 100644 tests/test_generic_frontend.py diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index eefc2c51..9a913e1a 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -13,9 +13,6 @@ import torch from torch import nn -import sys - -sys.path.insert(0, "/Users/jxu/Desktop/PR/i6_models") from i6_models.config import ModelConfiguration from i6_models.parts.frontend.common import get_same_padding, mask_pool, calculate_output_dim @@ -212,9 +209,9 @@ def forward(self, tensor: torch.Tensor, sequence_mask: torch.Tensor) -> Tuple[to if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.MaxPool2d): sequence_mask = mask_pool( sequence_mask, - kernel_size=layer.kernel_size[0], - stride=layer.stride[0], - padding=layer.padding[0], + kernel_size=layer.kernel_size if isinstance(layer.kernel_size, int) else layer.kernel_size[0], + stride=layer.stride if isinstance(layer.stride, int) else layer.stride[0], + padding=layer.padding if isinstance(layer.padding, int) else layer.padding[0], ) tensor = torch.transpose(tensor, 1, 2) # transpose to [B,T",C,F"] diff --git a/tests/test_generic_frontend.py b/tests/test_generic_frontend.py new file mode 100644 index 00000000..5c7d4842 --- /dev/null +++ b/tests/test_generic_frontend.py @@ -0,0 +1,174 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional, Tuple, Sequence, Union, Callable, List + +import torch +from torch import nn + +import sys + +sys.path.insert(0, "/Users/jxu/Desktop/PR/i6_models") + +from i6_models.parts.frontend.generic_frontend import FrontendLayerType, GenericFrontendV1, GenericFrontendV1Config + + +@dataclass +class GenericFrontendV1TestParams: + batch: int + time: int + in_features: int + layer_ordering: Sequence[FrontendLayerType] + conv_kernel_sizes: Optional[Sequence[Union[int, Tuple[int, int]]]] + conv_strides: Optional[Sequence[Union[int, Tuple[int, int]]]] + conv_paddings: Optional[Sequence[Union[int, Tuple[int, int]]]] + conv_out_dims: Optional[Sequence[Union[int, Tuple[int, int]]]] + pool_kernel_sizes: Optional[Sequence[Union[int, Tuple[int, int]]]] + pool_strides: Optional[Sequence[Union[int, Tuple[int, int]]]] + pool_paddings: Optional[Sequence[Union[int, Tuple[int, int]]]] + activations: Optional[Sequence[Union[nn.Module, Callable[[torch.Tensor], torch.Tensor]]]] + out_features: int + output_shape: List[int] + in_sequence_mask: torch.Tensor + out_sequence_mask: torch.Tensor + + +def test_generic_frontend_v1(): + torch.manual_seed(42) + + def get_output_shape(test_parameters: GenericFrontendV1TestParams): + data_input = torch.randn( + test_parameters.batch, + test_parameters.time, + test_parameters.in_features, + ) + + cfg = GenericFrontendV1Config( + in_features=test_parameters.in_features, + layer_ordering=test_parameters.layer_ordering, + conv_kernel_sizes=test_parameters.conv_kernel_sizes, + conv_strides=test_parameters.conv_strides, + conv_paddings=test_parameters.conv_paddings, + conv_out_dims=test_parameters.conv_out_dims, + pool_kernel_sizes=test_parameters.pool_kernel_sizes, + pool_strides=test_parameters.pool_strides, + pool_paddings=test_parameters.pool_paddings, + activations=test_parameters.activations, + out_features=test_parameters.out_features, + ) + + output, sequence_mask = GenericFrontendV1(cfg)( + data_input, + test_parameters.in_sequence_mask, + ) + + return output.shape, sequence_mask + + for idx, test_params in enumerate( + [ + GenericFrontendV1TestParams( + batch=10, + time=50, + in_features=50, + layer_ordering=[FrontendLayerType.Conv2d, FrontendLayerType.Conv2d], + conv_kernel_sizes=[3, 5], + conv_strides=[1, 1], + conv_paddings=None, + conv_out_dims=[32, 32], + pool_kernel_sizes=None, + pool_strides=None, + pool_paddings=None, + activations=None, + out_features=384, + output_shape=[10, 50, 384], + in_sequence_mask=torch.Tensor(10 * [25 * [True] + 25 * [False]]).bool(), + out_sequence_mask=torch.Tensor(10 * [25 * [True] + 25 * [False]]).bool(), + ), + GenericFrontendV1TestParams( + batch=10, + time=50, + in_features=50, + layer_ordering=[ + FrontendLayerType.Conv2d, + FrontendLayerType.Pool2d, + FrontendLayerType.Conv2d, + FrontendLayerType.Activation, + ], + conv_kernel_sizes=[3, 5], + conv_strides=[1, 1], + conv_paddings=None, + conv_out_dims=[32, 32], + pool_kernel_sizes=[3], + pool_strides=[2], + pool_paddings=None, + activations=[nn.SiLU()], + out_features=384, + output_shape=[10, 25, 384], + in_sequence_mask=torch.Tensor(10 * [50 * [True] + 0 * [False]]).bool(), + out_sequence_mask=torch.Tensor(10 * [25 * [True] + 0 * [False]]).bool(), + ), + GenericFrontendV1TestParams( + batch=10, + time=50, + in_features=50, + layer_ordering=[ + FrontendLayerType.Conv2d, + FrontendLayerType.Pool2d, + FrontendLayerType.Activation, + FrontendLayerType.Conv2d, + FrontendLayerType.Activation, + ], + conv_kernel_sizes=[3, (3, 5)], + conv_strides=[1, (2, 1)], + conv_paddings=None, + conv_out_dims=[32, 32], + pool_kernel_sizes=[3], + pool_strides=[2], + pool_paddings=None, + activations=[nn.SiLU(), nn.SiLU()], + out_features=384, + output_shape=[10, 13, 384], + in_sequence_mask=torch.Tensor(10 * [50 * [True] + 0 * [False]]).bool(), + out_sequence_mask=torch.Tensor(10 * [13 * [True] + 0 * [False]]).bool(), + ), + GenericFrontendV1TestParams( + batch=10, + time=50, + in_features=50, + layer_ordering=[ + FrontendLayerType.Conv2d, + FrontendLayerType.Pool2d, + FrontendLayerType.Activation, + FrontendLayerType.Conv2d, + FrontendLayerType.Pool2d, + FrontendLayerType.Activation, + ], + conv_kernel_sizes=[3, (3, 5)], + conv_strides=[1, (2, 2)], + conv_paddings=None, + conv_out_dims=[32, 32], + pool_kernel_sizes=[3, 5], + pool_strides=[(1, 2), (2, 3)], + pool_paddings=None, + activations=[nn.SiLU(), nn.SiLU()], + out_features=384, + output_shape=[10, 13, 384], + in_sequence_mask=torch.Tensor(10 * [50 * [True] + 0 * [False]]).bool(), + out_sequence_mask=torch.Tensor(10 * [13 * [True] + 0 * [False]]).bool(), + ), + ] + ): + print(idx) + shape, seq_mask = get_output_shape(test_params) + print(shape) + print(test_params.output_shape) + assert list(shape) == test_params.output_shape, (type(shape), type(test_params.output_shape)) + assert torch.equal(seq_mask, test_params.out_sequence_mask), ( + seq_mask.shape, + test_params.out_sequence_mask.shape, + ) + + return + + +test_generic_frontend_v1() From f26d94cd0699ae4def90fcad5cd31d2078691a79 Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 20 Oct 2023 12:43:41 +0200 Subject: [PATCH 03/16] update --- i6_models/parts/frontend/generic_frontend.py | 6 ++++-- tests/test_generic_frontend.py | 7 ------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index 9a913e1a..e163815f 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -31,7 +31,8 @@ class GenericFrontendV1Config(ModelConfiguration): in_features: number of input features to module layer_ordering: the ordering of the front end layer sequences, the ordering element must be selected from FrontendLayerType e.g. the ordering of VGG4LayerActFrontendV1 would be [FrontendLayerType.Conv2d, FrontendLayerType.Activation, - FrontendLayerType.Pool2d, FrontendLayerType.Conv2d, FrontendLayerType.Conv2d, FrontendLayerType.Activation, FrontendLayerType.Pool2d] + FrontendLayerType.Pool2d, FrontendLayerType.Conv2d, FrontendLayerType.Conv2d, FrontendLayerType.Activation, + FrontendLayerType.Pool2d] conv_kernel_sizes: kernel sizes for each conv layer conv_strides: stride sizes for each conv layer conv_paddings: paddings sizes for each conv layer @@ -102,7 +103,8 @@ class GenericFrontendV1(nn.Module): def __init__(self, model_cfg: GenericFrontendV1Config): """ Generic Front-End - can be used to generate customized frontend by combine convolutional and pooling layers, as well as activation functions different + can be used to generate customized frontend by combining convolutional and pooling layers, as well as activation + functions differently To get the ESPnet case, for example Conv2dSubsampling6, use these options layer_ordering = [FrontendLayerType.Conv2d, FrontendLayerType.Conv2d] diff --git a/tests/test_generic_frontend.py b/tests/test_generic_frontend.py index 5c7d4842..dcab2984 100644 --- a/tests/test_generic_frontend.py +++ b/tests/test_generic_frontend.py @@ -6,10 +6,6 @@ import torch from torch import nn -import sys - -sys.path.insert(0, "/Users/jxu/Desktop/PR/i6_models") - from i6_models.parts.frontend.generic_frontend import FrontendLayerType, GenericFrontendV1, GenericFrontendV1Config @@ -169,6 +165,3 @@ def get_output_shape(test_parameters: GenericFrontendV1TestParams): ) return - - -test_generic_frontend_v1() From 662f7fa1185c7530cbd7d02778fb9f6286c8c345 Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 20 Oct 2023 12:48:18 +0200 Subject: [PATCH 04/16] no return --- tests/test_generic_frontend.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_generic_frontend.py b/tests/test_generic_frontend.py index dcab2984..a2bd55cd 100644 --- a/tests/test_generic_frontend.py +++ b/tests/test_generic_frontend.py @@ -162,6 +162,4 @@ def get_output_shape(test_parameters: GenericFrontendV1TestParams): assert torch.equal(seq_mask, test_params.out_sequence_mask), ( seq_mask.shape, test_params.out_sequence_mask.shape, - ) - - return + ) \ No newline at end of file From cac9961940fefcfba36b872e93992b9d733fc449 Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 20 Oct 2023 12:49:52 +0200 Subject: [PATCH 05/16] black format --- tests/test_generic_frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_generic_frontend.py b/tests/test_generic_frontend.py index a2bd55cd..e9cf537f 100644 --- a/tests/test_generic_frontend.py +++ b/tests/test_generic_frontend.py @@ -162,4 +162,4 @@ def get_output_shape(test_parameters: GenericFrontendV1TestParams): assert torch.equal(seq_mask, test_params.out_sequence_mask), ( seq_mask.shape, test_params.out_sequence_mask.shape, - ) \ No newline at end of file + ) From 84fcb74dee36f93a868747fdb11c2f156c0d748a Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:07:47 +0100 Subject: [PATCH 06/16] Update i6_models/parts/frontend/generic_frontend.py Co-authored-by: SimBe195 <37951951+SimBe195@users.noreply.github.com> --- i6_models/parts/frontend/generic_frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index e163815f..a50111dd 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -80,7 +80,7 @@ def check_valid(self): if self.pool_strides is not None: assert len(self.pool_strides) == num_pools, "Please specify stride for each pooling layer!" - if self.conv_paddings is not None: + if self.pool_paddings is not None: assert len(self.pool_paddings) == num_pools, "Please specify padding for each pooling layer!" assert len(self.layer_ordering) == num_convs + num_pools + num_activations, "Number of total layers mismatch!" From dde2533d2994824acb148a2d06c569ac4d97eefb Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:21:39 +0100 Subject: [PATCH 07/16] Update i6_models/parts/frontend/generic_frontend.py Co-authored-by: SimBe195 <37951951+SimBe195@users.noreply.github.com> --- i6_models/parts/frontend/generic_frontend.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index a50111dd..9b95f6c1 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -85,14 +85,9 @@ def check_valid(self): assert len(self.layer_ordering) == num_convs + num_pools + num_activations, "Number of total layers mismatch!" - for kernel_sizes in [self.conv_kernel_sizes, self.pool_kernel_sizes]: - if kernel_sizes is not None: - for kernel_size in kernel_sizes: - if isinstance(kernel_size, int): - assert kernel_size % 2 == 1, "ConformerVGGFrontendV1 only supports odd kernel sizes" - elif isinstance(kernel_size, tuple): - for i in range(len(kernel_size)): - assert kernel_size[i] % 2 == 1, "ConformerVGGFrontendV1 only supports odd kernel sizes" + for kernel_sizes in filter(None, [self.conv_kernel_sizes, self.pool_kernel_sizes]): + for kernel_size in kernel_sizes: + assert all(k % 2 for k in kernel_size), "ConformerVGGFrontendV1 only supports odd kernel sizes" def __post__init__(self): super().__post_init__() From aea612238919a240b5f02aa0b82d1ffe66f7def9 Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:21:51 +0100 Subject: [PATCH 08/16] Update i6_models/parts/frontend/generic_frontend.py Co-authored-by: SimBe195 <37951951+SimBe195@users.noreply.github.com> --- i6_models/parts/frontend/generic_frontend.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index 9b95f6c1..669cf646 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -46,13 +46,13 @@ class GenericFrontendV1Config(ModelConfiguration): in_features: int layer_ordering: Sequence[FrontendLayerType] - conv_kernel_sizes: Optional[Sequence[Union[int, Tuple[int, int]]]] - conv_strides: Optional[Sequence[Union[int, Tuple[int, int]]]] - conv_paddings: Optional[Sequence[Union[int, Tuple[int, int]]]] - conv_out_dims: Optional[Sequence[Union[int, Tuple[int, int]]]] - pool_kernel_sizes: Optional[Sequence[Union[int, Tuple[int, int]]]] - pool_strides: Optional[Sequence[Union[int, Tuple[int, int]]]] - pool_paddings: Optional[Sequence[Union[int, Tuple[int, int]]]] + conv_kernel_sizes: Optional[Sequence[Tuple[int, int]]] + conv_strides: Optional[Sequence[Tuple[int, int]]] + conv_paddings: Optional[Sequence[Tuple[int, int]]] + conv_out_dims: Optional[Sequence[int]] + pool_kernel_sizes: Optional[Sequence[Tuple[int, int]]] + pool_strides: Optional[Sequence[Tuple[int, int]]] + pool_paddings: Optional[Sequence[Tuple[int, int]]] activations: Optional[Sequence[Union[nn.Module, Callable[[torch.Tensor], torch.Tensor]]]] out_features: int From 695c5c8f7061cad4c5860bc48d549b42bbe5f043 Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:22:20 +0100 Subject: [PATCH 09/16] Update i6_models/parts/frontend/generic_frontend.py Co-authored-by: SimBe195 <37951951+SimBe195@users.noreply.github.com> --- i6_models/parts/frontend/generic_frontend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index 669cf646..07575194 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -152,9 +152,9 @@ def __init__(self, model_cfg: GenericFrontendV1Config): last_channel_dim = conv_out_dim last_feat_dim = calculate_output_dim( in_dim=last_feat_dim, - filter_size=conv_kernel_size if isinstance(conv_kernel_size, int) else conv_kernel_size[1], - stride=conv_stride if isinstance(conv_stride, int) else conv_stride[1], - padding=conv_padding if isinstance(conv_padding, int) else conv_padding[1], + filter_size=conv_kernel_size[1], + stride=conv_stride[1], + padding=conv_padding[1], ) conv_layer_index += 1 From ec3356fac4a5d701627799f274266080868ff96d Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:23:06 +0100 Subject: [PATCH 10/16] Update i6_models/parts/frontend/generic_frontend.py Co-authored-by: SimBe195 <37951951+SimBe195@users.noreply.github.com> --- i6_models/parts/frontend/generic_frontend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index 07575194..4291e2ca 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -206,9 +206,9 @@ def forward(self, tensor: torch.Tensor, sequence_mask: torch.Tensor) -> Tuple[to if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.MaxPool2d): sequence_mask = mask_pool( sequence_mask, - kernel_size=layer.kernel_size if isinstance(layer.kernel_size, int) else layer.kernel_size[0], - stride=layer.stride if isinstance(layer.stride, int) else layer.stride[0], - padding=layer.padding if isinstance(layer.padding, int) else layer.padding[0], + kernel_size=layer.kernel_size[0], + stride=layer.stride[0], + padding=layer.padding[0], ) tensor = torch.transpose(tensor, 1, 2) # transpose to [B,T",C,F"] From 052a4a03f980e457c536e4028d371db04a2028b2 Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:24:18 +0100 Subject: [PATCH 11/16] Update i6_models/parts/frontend/generic_frontend.py Co-authored-by: SimBe195 <37951951+SimBe195@users.noreply.github.com> --- i6_models/parts/frontend/generic_frontend.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index 4291e2ca..c0d465e6 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -199,8 +199,7 @@ def forward(self, tensor: torch.Tensor, sequence_mask: torch.Tensor) -> Tuple[to # and add a dim tensor = tensor[:, None, :, :] # [B,C=1,T,F] - for i in range(len(self.cfg.layer_ordering)): - layer = self.frontend_layers[i] + for layer in self.frontend_layers: tensor = layer(tensor) if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.MaxPool2d): From 570d6e3c3a6f8661a0bfbcfe560b7c77b19e0538 Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:35:37 +0100 Subject: [PATCH 12/16] update --- i6_models/parts/frontend/generic_frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index c0d465e6..da2aee25 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -159,7 +159,7 @@ def __init__(self, model_cfg: GenericFrontendV1Config): conv_layer_index += 1 elif layer_type == FrontendLayerType.Pool2d: - pool_stride = 1 if model_cfg.pool_strides is None else model_cfg.pool_strides[pool_layer_index] + pool_stride = None if model_cfg.pool_strides is None else model_cfg.pool_strides[pool_layer_index] pool_kernel_size = model_cfg.pool_kernel_sizes[pool_layer_index] pool_padding = ( get_same_padding(pool_kernel_size) From c8adfda259053a3645e98e3d4f23c8041eed166b Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:40:04 +0100 Subject: [PATCH 13/16] update --- tests/test_generic_frontend.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tests/test_generic_frontend.py b/tests/test_generic_frontend.py index e9cf537f..9282d5e0 100644 --- a/tests/test_generic_frontend.py +++ b/tests/test_generic_frontend.py @@ -15,13 +15,13 @@ class GenericFrontendV1TestParams: time: int in_features: int layer_ordering: Sequence[FrontendLayerType] - conv_kernel_sizes: Optional[Sequence[Union[int, Tuple[int, int]]]] - conv_strides: Optional[Sequence[Union[int, Tuple[int, int]]]] - conv_paddings: Optional[Sequence[Union[int, Tuple[int, int]]]] - conv_out_dims: Optional[Sequence[Union[int, Tuple[int, int]]]] - pool_kernel_sizes: Optional[Sequence[Union[int, Tuple[int, int]]]] - pool_strides: Optional[Sequence[Union[int, Tuple[int, int]]]] - pool_paddings: Optional[Sequence[Union[int, Tuple[int, int]]]] + conv_kernel_sizes: Optional[Sequence[Tuple[int, int]]] + conv_strides: Optional[Sequence[Tuple[int, int]]] + conv_paddings: Optional[Sequence[Tuple[int, int]]] + conv_out_dims: Optional[Sequence[int]] + pool_kernel_sizes: Optional[Sequence[Tuple[int, int]]] + pool_strides: Optional[Sequence[Tuple[int, int]]] + pool_paddings: Optional[Sequence[Tuple[int, int]]] activations: Optional[Sequence[Union[nn.Module, Callable[[torch.Tensor], torch.Tensor]]]] out_features: int output_shape: List[int] @@ -154,11 +154,8 @@ def get_output_shape(test_parameters: GenericFrontendV1TestParams): ), ] ): - print(idx) shape, seq_mask = get_output_shape(test_params) - print(shape) - print(test_params.output_shape) - assert list(shape) == test_params.output_shape, (type(shape), type(test_params.output_shape)) + assert list(shape) == test_params.output_shape, (shape, test_params.output_shape) assert torch.equal(seq_mask, test_params.out_sequence_mask), ( seq_mask.shape, test_params.out_sequence_mask.shape, From fc69b36607b046b82c83b7a99dc8d546d77223f9 Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:40:56 +0100 Subject: [PATCH 14/16] blank --- i6_models/parts/frontend/generic_frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index da2aee25..31ed252a 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -87,7 +87,7 @@ def check_valid(self): for kernel_sizes in filter(None, [self.conv_kernel_sizes, self.pool_kernel_sizes]): for kernel_size in kernel_sizes: - assert all(k % 2 for k in kernel_size), "ConformerVGGFrontendV1 only supports odd kernel sizes" + assert all(k % 2 for k in kernel_size), "ConformerVGGFrontendV1 only supports odd kernel sizes" def __post__init__(self): super().__post_init__() From ad1774650e7d6d506de5ced711d0a78ff908ea0c Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 15:59:22 +0100 Subject: [PATCH 15/16] update --- tests/test_generic_frontend.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/test_generic_frontend.py b/tests/test_generic_frontend.py index 9282d5e0..916efa45 100644 --- a/tests/test_generic_frontend.py +++ b/tests/test_generic_frontend.py @@ -67,8 +67,8 @@ def get_output_shape(test_parameters: GenericFrontendV1TestParams): time=50, in_features=50, layer_ordering=[FrontendLayerType.Conv2d, FrontendLayerType.Conv2d], - conv_kernel_sizes=[3, 5], - conv_strides=[1, 1], + conv_kernel_sizes=[(3, 3), (5, 5)], + conv_strides=[(1, 1), (1, 1)], conv_paddings=None, conv_out_dims=[32, 32], pool_kernel_sizes=None, @@ -90,12 +90,12 @@ def get_output_shape(test_parameters: GenericFrontendV1TestParams): FrontendLayerType.Conv2d, FrontendLayerType.Activation, ], - conv_kernel_sizes=[3, 5], - conv_strides=[1, 1], + conv_kernel_sizes=[(3, 3), (5, 5)], + conv_strides=[(1, 1), (1, 1)], conv_paddings=None, conv_out_dims=[32, 32], - pool_kernel_sizes=[3], - pool_strides=[2], + pool_kernel_sizes=[(3, 3)], + pool_strides=[(2, 2)], pool_paddings=None, activations=[nn.SiLU()], out_features=384, @@ -114,12 +114,12 @@ def get_output_shape(test_parameters: GenericFrontendV1TestParams): FrontendLayerType.Conv2d, FrontendLayerType.Activation, ], - conv_kernel_sizes=[3, (3, 5)], - conv_strides=[1, (2, 1)], + conv_kernel_sizes=[(3, 3), (3, 5)], + conv_strides=[(1, 1), (2, 1)], conv_paddings=None, conv_out_dims=[32, 32], - pool_kernel_sizes=[3], - pool_strides=[2], + pool_kernel_sizes=[(3, 3)], + pool_strides=[(2, 2)], pool_paddings=None, activations=[nn.SiLU(), nn.SiLU()], out_features=384, @@ -139,11 +139,11 @@ def get_output_shape(test_parameters: GenericFrontendV1TestParams): FrontendLayerType.Pool2d, FrontendLayerType.Activation, ], - conv_kernel_sizes=[3, (3, 5)], - conv_strides=[1, (2, 2)], + conv_kernel_sizes=[(3, 3), (3, 5)], + conv_strides=[(1, 1), (2, 2)], conv_paddings=None, conv_out_dims=[32, 32], - pool_kernel_sizes=[3, 5], + pool_kernel_sizes=[(3, 3), (5, 5)], pool_strides=[(1, 2), (2, 3)], pool_paddings=None, activations=[nn.SiLU(), nn.SiLU()], From 261b4f6573283e1b6172c12a77fc83c9ebaf06ab Mon Sep 17 00:00:00 2001 From: Judyxujj Date: Fri, 10 Nov 2023 16:07:59 +0100 Subject: [PATCH 16/16] Update i6_models/parts/frontend/generic_frontend.py Co-authored-by: SimBe195 <37951951+SimBe195@users.noreply.github.com> --- i6_models/parts/frontend/generic_frontend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/i6_models/parts/frontend/generic_frontend.py b/i6_models/parts/frontend/generic_frontend.py index 31ed252a..3df213d9 100644 --- a/i6_models/parts/frontend/generic_frontend.py +++ b/i6_models/parts/frontend/generic_frontend.py @@ -176,9 +176,9 @@ def __init__(self, model_cfg: GenericFrontendV1Config): ) last_feat_dim = calculate_output_dim( in_dim=last_feat_dim, - filter_size=pool_kernel_size if isinstance(pool_kernel_size, int) else pool_kernel_size[1], - stride=pool_stride if isinstance(pool_stride, int) else pool_stride[1], - padding=pool_padding if isinstance(pool_padding, int) else pool_padding[1], + filter_size=pool_kernel_size[1], + stride=pool_stride[1] or pool_kernel_size[1], + padding=pool_padding[1], ) pool_layer_index += 1