From 88f370295b6e1f54f2cb16de19ddb08a49b6fa2c Mon Sep 17 00:00:00 2001 From: Ping Zheng Date: Thu, 12 Sep 2024 05:30:03 -0400 Subject: [PATCH] expand docstring --- i6_models/parts/conformer/mhsa_rel_pos.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/i6_models/parts/conformer/mhsa_rel_pos.py b/i6_models/parts/conformer/mhsa_rel_pos.py index 3118cacf..12e174c1 100644 --- a/i6_models/parts/conformer/mhsa_rel_pos.py +++ b/i6_models/parts/conformer/mhsa_rel_pos.py @@ -61,9 +61,20 @@ def __post_init__(self) -> None: class ConformerMHSARelPosV1(nn.Module): """ Conformer multi-headed self-attention module supporting - - relative positional encoding proposed by Shaw et al. (cf. https://arxiv.org/abs/1803.02155) by setting `learnable_pos_emb` to True and `with_pos_bias` to False - - and Transformer-XL style relative PE by Dai et al. (cf. https://arxiv.org/abs/1901.02860) by setting `learnable_pos_emb` to False and `with_pos_bias` to True - + - self-attention with relative positional encoding proposed by Shaw et al. (cf. https://arxiv.org/abs/1803.02155) + * learnable_pos_emb = True + * with_pos_bias = False + * with_linear_pos = False + * separate_pos_emb_per_head = False (RETURNN default) + * with_bias = False (RETURNN default) + - and self-attention with Transformer-XL style relative PE by Dai et al. + (cf. https://arxiv.org/abs/1901.02860, https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py, + https://github.com/espnet/espnet/blob/master/espnet2/asr_transducer/encoder/modules/attention.py#L9) + * learnable_pos_emb = False + * with_pos_bias = True + * with_linear_pos = False (paper implementation) / with_linear_pos = True (ESPnet default) + * separate_pos_emb_per_head = False (paper implementation) / separate_pos_emb_per_head = True (ESPnet default) + * with_bias = False (paper implementation) / with_bias = True (ESPnet default) """ def __init__(self, cfg: ConformerMHSARelPosV1Config):