From 88f370295b6e1f54f2cb16de19ddb08a49b6fa2c Mon Sep 17 00:00:00 2001
From: Ping Zheng
Date: Thu, 12 Sep 2024 05:30:03 -0400
Subject: [PATCH] expand docstring
---
i6_models/parts/conformer/mhsa_rel_pos.py | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/i6_models/parts/conformer/mhsa_rel_pos.py b/i6_models/parts/conformer/mhsa_rel_pos.py
index 3118cacf..12e174c1 100644
--- a/i6_models/parts/conformer/mhsa_rel_pos.py
+++ b/i6_models/parts/conformer/mhsa_rel_pos.py
@@ -61,9 +61,20 @@ def __post_init__(self) -> None:
class ConformerMHSARelPosV1(nn.Module):
"""
Conformer multi-headed self-attention module supporting
- - relative positional encoding proposed by Shaw et al. (cf. https://arxiv.org/abs/1803.02155) by setting `learnable_pos_emb` to True and `with_pos_bias` to False
- - and Transformer-XL style relative PE by Dai et al. (cf. https://arxiv.org/abs/1901.02860) by setting `learnable_pos_emb` to False and `with_pos_bias` to True
-
+ - self-attention with relative positional encoding proposed by Shaw et al. (cf. https://arxiv.org/abs/1803.02155)
+ * learnable_pos_emb = True
+ * with_pos_bias = False
+ * with_linear_pos = False
+ * separate_pos_emb_per_head = False (RETURNN default)
+ * with_bias = False (RETURNN default)
+ - and self-attention with Transformer-XL style relative PE by Dai et al.
+ (cf. https://arxiv.org/abs/1901.02860, https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py,
+ https://github.com/espnet/espnet/blob/master/espnet2/asr_transducer/encoder/modules/attention.py#L9)
+ * learnable_pos_emb = False
+ * with_pos_bias = True
+ * with_linear_pos = False (paper implementation) / with_linear_pos = True (ESPnet default)
+ * separate_pos_emb_per_head = False (paper implementation) / separate_pos_emb_per_head = True (ESPnet default)
+ * with_bias = False (paper implementation) / with_bias = True (ESPnet default)
"""
def __init__(self, cfg: ConformerMHSARelPosV1Config):