From 5dd74a1e64bc7bcec0b950e92464073709b5ffdf Mon Sep 17 00:00:00 2001
From: Gabriel Moreira <gmoreira@nvidia.com>
Date: Fri, 19 May 2023 11:32:56 -0300
Subject: [PATCH 1/4] Fixed docstrings

---
 merlin/models/tf/blocks/mlp.py             | 31 ++++++++
 merlin/models/tf/core/combinators.py       |  5 ++
 merlin/models/tf/core/encoder.py           | 87 +++++++++++++++++++---
 merlin/models/tf/inputs/embedding.py       | 50 +++++++++++++
 merlin/models/tf/models/base.py            | 60 ++++++++++++++-
 merlin/models/tf/outputs/base.py           | 14 +++-
 merlin/models/tf/outputs/block.py          | 27 +++++++
 merlin/models/tf/outputs/classification.py | 19 ++++-
 8 files changed, 273 insertions(+), 20 deletions(-)

diff --git a/merlin/models/tf/blocks/mlp.py b/merlin/models/tf/blocks/mlp.py
index a95fc0a3a5..d5269d5511 100644
--- a/merlin/models/tf/blocks/mlp.py
+++ b/merlin/models/tf/blocks/mlp.py
@@ -224,6 +224,37 @@ def __init__(
         dense=None,
         **kwargs,
     ):
+        """A Dense layer that aggregates features before projection
+        Parameters
+        ----------
+        units : _type_
+            _description_
+        activation : Optional[Union[str,tf.keras.layers.Layer]], optional
+            The activation function to use. By default None
+        use_bias : bool, optional
+            Whether to use a bias in the MLP, by default True
+        kernel_initializer: InitializerType
+            Initializer for the kernel weights matrix. Defaults to "glorot_uniform".
+        bias_initializer: InitializerType
+            Initializer for the bias vector. Default to "zeros".
+        kernel_regularizer: Optional[RegularizerType]
+            Regularizer function applied to the kernel weights matrix. Default to None.
+        bias_regularizer: Optional[RegularizerType]
+            Regularizer function applied to the bias vector.  Default to None.
+        activity_regularizer : optional
+            Regularizer function applied to the output of the layer (its "activation"),
+            by default None
+        kernel_constraint : optional
+            Constraint function applied to the kernel weights matrix, by default None
+        bias_constraint : optional
+            Constraint function applied to the bias vector, by default None
+        pre_aggregation : str, optional
+            If provided, aggregates inputs before the dense projection, by default "concat"
+        dense : _type_, optional
+            A tf.keras.layers.Layer that can be used to project the inputs.
+            Typically used when deserializing the layer. By default None
+        """
+
         super(_Dense, self).__init__(**kwargs)
         self.dense = dense or tf.keras.layers.Dense(
             units,
diff --git a/merlin/models/tf/core/combinators.py b/merlin/models/tf/core/combinators.py
index a327c741b7..423c01b0de 100644
--- a/merlin/models/tf/core/combinators.py
+++ b/merlin/models/tf/core/combinators.py
@@ -702,6 +702,11 @@ def from_config(cls, config, **kwargs):
 
 @tf.keras.utils.register_keras_serializable(package="merlin.models")
 class ResidualBlock(WithShortcut):
+    """
+    Creates a shortcut connection where the residuals are
+    summed to the output of the block
+    """
+
     def __init__(
         self,
         block: Union[tf.keras.layers.Layer, Block],
diff --git a/merlin/models/tf/core/encoder.py b/merlin/models/tf/core/encoder.py
index 19ebaae02f..1fcab3c504 100644
--- a/merlin/models/tf/core/encoder.py
+++ b/merlin/models/tf/core/encoder.py
@@ -526,23 +526,90 @@ def fit(self, *args, **kwargs):
 
 @tf.keras.utils.register_keras_serializable(package="merlin.models")
 class EmbeddingEncoder(Encoder):
+    """Creates an Encoder from an EmbeddingTable.
+    Typically used with RetrievalModelV2.
+    Parameters
+    ----------
+    schema : Union[ColumnSchema, Schema]
+        The ColumnSchema of the column for which the
+        embedding table needs to be created.
+        If a Schema is passed, only the first column
+        is considered
+    dim : int
+        Dimension of the embeddings
+    embeddings_initializer : Union[str, tf.keras.layers.Layer], optional
+        Initializer for the `embeddings`
+        matrix (see `keras.initializers`). By default "uniform"
+    embeddings_regularizer : Union[str, tf.keras.layers.Layer], optional
+        Regularizer function applied to
+        the `embeddings` matrix (see `keras.regularizers`)., by default None
+    activity_regularizer : Union[str, tf.keras.layers.Layer], optional
+        Sets a layer that applies an update to the cost function based
+        input activity, by default None
+    embeddings_constraint : Union[str, tf.keras.layers.Layer], optional
+        Constraint function applied to
+        the `embeddings` matrix (see `keras.constraints`), by default None
+    mask_zero : bool, optional
+        Whether or not the input value 0 is a special "padding"
+        value that should be masked out.
+        This is useful when using recurrent layers
+        which may take variable length input.
+        If this is `True`, then all subsequent layers
+        in the model need to support masking or an exception will be raised.
+        If mask_zero is set to True, as a consequence, index 0 cannot be
+        used in the vocabulary (input_dim should equal size of
+        vocabulary + 1), by default False
+    input_length : int, optional
+        This argument is required if you are going to connect
+        `Flatten` then `Dense` layers upstream
+        (without it, the shape of the dense outputs cannot be computed),
+        by default None
+    sequence_combiner : Optional[CombinerType], optional
+        A string specifying how to combine embedding results for each
+        entry ("mean", "sqrtn" and "sum" are supported) or a layer.
+        Default is None (no combiner used), by default None
+    trainable : bool, optional
+        Whether the layer's variables should be trainable, by default True
+    name : str, optional
+        String name of the layer, by default None
+    dtype : optional
+        The dtype of the layer's computations and weights. Can also be a
+        `tf.keras.mixed_precision.Policy`, which allows the computation and weight
+        dtype to differ. Default of `None` means to use
+        `tf.keras.mixed_precision.global_policy()`, which is a float32 policy
+        unless set to different value., by default None
+    dynamic : bool, optional
+        Set this to `True` if your layer should only be run eagerly, and
+        should not be used to generate a static computation graph.
+        This would be the case for a Tree-RNN or a recursive network,
+        for example, or generally for any layer that manipulates tensors
+        using Python control flow. If `False`, we assume that the layer can
+        safely be used to generate a static computation graph., by default False
+    embeddings_l2_batch_regularization : Optional[Union[float, Dict[str, float]]], optional
+        Factor for L2 regularization of the embeddings vectors (from the current batch only)
+        by default 0.0, by default 0.0
+    post : Optional[tf.keras.layers.Layer], optional
+        _description_, by default None
+    **kwargs: Forwarded Encoder parameters
+    """
+
     def __init__(
         self,
         schema: Union[ColumnSchema, Schema],
         dim: int,
-        embeddings_initializer="uniform",
-        embeddings_regularizer=None,
-        activity_regularizer=None,
-        embeddings_constraint=None,
-        mask_zero=False,
-        input_length=None,
+        embeddings_initializer: Optional[Union[str, tf.keras.layers.Layer]] = "uniform",
+        embeddings_regularizer: Optional[Union[str, tf.keras.layers.Layer]] = None,
+        activity_regularizer: Optional[Union[str, tf.keras.layers.Layer]] = None,
+        embeddings_constraint: Optional[Union[str, tf.keras.layers.Layer]] = None,
+        mask_zero: bool = False,
+        input_length: int = None,
         sequence_combiner: Optional[CombinerType] = None,
-        trainable=True,
-        name=None,
+        trainable: bool = True,
+        name: str = None,
         dtype=None,
-        dynamic=False,
-        post: Optional[tf.keras.layers.Layer] = None,
+        dynamic: bool = False,
         embeddings_l2_batch_regularization: Optional[Union[float, Dict[str, float]]] = 0.0,
+        post: Optional[tf.keras.layers.Layer] = None,
         **kwargs,
     ):
         if isinstance(schema, ColumnSchema):
diff --git a/merlin/models/tf/inputs/embedding.py b/merlin/models/tf/inputs/embedding.py
index 3d461f9569..302e446dc1 100644
--- a/merlin/models/tf/inputs/embedding.py
+++ b/merlin/models/tf/inputs/embedding.py
@@ -1259,6 +1259,25 @@ def ContinuousEmbedding(
     name: str = "continuous",
     **kwargs,
 ) -> SequentialBlock:
+    """Concatenates all numerical features and project then using the
+        specified Block.
+    Parameters
+    ----------
+    inputs : Block
+        Expects a ParallelBlock with a number of features
+    embedding_block : Block
+        Block to project the continuous features
+    aggregation : optional
+        Aggregation combining continuous feature switch the other features, by default None
+    continuous_aggregation : str, optional
+        Aggregation for continuous features, by default "concat"
+    name : str, optional
+        Name of the projected continuous block, by default "continuous"
+    Returns
+    -------
+    SequentialBlock
+        A block that contains the projected continuous features along with the other features
+    """
     continuous_embedding = Filter(Tags.CONTINUOUS, aggregation=continuous_aggregation).connect(
         embedding_block
     )
@@ -1271,6 +1290,16 @@ def ContinuousEmbedding(
 
 
 def serialize_table_config(table_config: TableConfig) -> Dict[str, Any]:
+    """Serializes a TableConfig instance
+    Parameters
+    ----------
+    table_config : TableConfig
+        Configuration data for one embedding table
+    Returns
+    -------
+    Dict[str, Any]
+        A dict with the serialized embedding "initializer" and "optimizer"
+    """
     table = deepcopy(table_config.__dict__)
     if "initializer" in table:
         table["initializer"] = tf.keras.initializers.serialize(table["initializer"])
@@ -1281,6 +1310,17 @@ def serialize_table_config(table_config: TableConfig) -> Dict[str, Any]:
 
 
 def deserialize_table_config(table_params: Dict[str, Any]) -> TableConfig:
+    """Deserializes a TableConfig from the serialized
+    embedding "initializer" and "optimizer"
+    Parameters
+    ----------
+    table_params : Dict[str, Any]
+        Dict with the serialized values
+    Returns
+    -------
+    TableConfig
+        An instance of the TableConfig
+    """
     if "initializer" in table_params and table_params["initializer"]:
         table_params["initializer"] = tf.keras.initializers.deserialize(table_params["initializer"])
     if "optimizer" in table_params and table_params["optimizer"]:
@@ -1291,6 +1331,16 @@ def deserialize_table_config(table_params: Dict[str, Any]) -> TableConfig:
 
 
 def serialize_feature_config(feature_config: FeatureConfig) -> Dict[str, Any]:
+    """Serializes a FeatureConfig instance
+    Parameters
+    ----------
+    feature_config : FeatureConfig
+        Configuration data for one embedding feature
+    Returns
+    -------
+    Dict[str, Any]
+        Dict with FeatureConfig properties set
+    """
     outputs = {}
 
     for key, val in feature_config.items():
diff --git a/merlin/models/tf/models/base.py b/merlin/models/tf/models/base.py
index 5f1499757b..c2003c68dd 100644
--- a/merlin/models/tf/models/base.py
+++ b/merlin/models/tf/models/base.py
@@ -22,7 +22,17 @@
 import warnings
 from collections.abc import Sequence as SequenceCollection
 from functools import partial
-from typing import TYPE_CHECKING, Dict, List, Optional, Protocol, Sequence, Union, runtime_checkable
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Optional,
+    Protocol,
+    Sequence,
+    Union,
+    runtime_checkable,
+)
 
 import six
 import tensorflow as tf
@@ -105,7 +115,12 @@
 
 
 class MetricsComputeCallback(tf.keras.callbacks.Callback):
-    """Callback that handles when to compute metrics."""
+    """Callback that handles when to compute metrics."
+    Parameters
+    ----------
+    train_metrics_steps : int, optional
+        Frequency (number of steps) to compute train metrics, by default 1
+    """
 
     def __init__(self, train_metrics_steps=1, **kwargs):
         self.train_metrics_steps = train_metrics_steps
@@ -166,7 +181,14 @@ def get_output_schema(export_path: str) -> Schema:
 
 @tf.keras.utils.register_keras_serializable(package="merlin_models")
 class ModelBlock(Block, tf.keras.Model):
-    """Block that extends `tf.keras.Model` to make it saveable."""
+    """Block that extends `tf.keras.Model` to make it saveable.
+    Parameters
+    ----------
+    block : Block
+        Block to be turned into a model
+    prep_features : Optional[bool], optional
+        Whether features need to be prepared or not, by default True
+    """
 
     def __init__(self, block: Block, prep_features: Optional[bool] = True, **kwargs):
         super().__init__(**kwargs)
@@ -284,6 +306,10 @@ def get_config(self):
 
 
 class BaseModel(tf.keras.Model):
+    """Base model, that overrides Keras model methods
+    to compile, compute metrics and loss and also
+    to compute the train, eval, predict steps"""
+
     def __init__(self, **kwargs):
         super(BaseModel, self).__init__(**kwargs)
 
@@ -660,6 +686,10 @@ def _create_loss(
 
     @property
     def prediction_tasks(self) -> List[PredictionTask]:
+        """Returns the Prediction tasks in the model.
+        Going to be deprecated in favor of model_outputs()
+        """
+
         from merlin.models.tf.prediction_tasks.base import PredictionTask
 
         results = find_all_instances_in_layers(self, PredictionTask)
@@ -693,6 +723,7 @@ def prediction_tasks_by_target(self) -> Dict[str, List[PredictionTask]]:
 
     @property
     def model_outputs(self) -> List[ModelOutput]:
+        """Returns a list with the ModelOutput in the model"""
         results = find_all_instances_in_layers(self, ModelOutput)
         # Ensures tasks are sorted by name, so that they match the metrics
         # which are sorted the same way by Keras
@@ -701,6 +732,7 @@ def model_outputs(self) -> List[ModelOutput]:
         return results
 
     def outputs_by_name(self) -> Dict[str, ModelOutput]:
+        """Returns the task names from the model outputs"""
         return {task.full_name: task for task in self.model_outputs}
 
     def outputs_by_target(self) -> Dict[str, List[ModelOutput]]:
@@ -1175,6 +1207,7 @@ def test_step(self, data):
         return metrics
 
     def predict_step(self, data):
+        """Custom predict step to obtain the outputs"""
         x, _, _ = unpack_x_y_sample_weight(data)
 
         if getattr(self, "predict_pre", None):
@@ -2084,6 +2117,8 @@ def get_blocks_by_name(self, block_names: Sequence[str]) -> List[Block]:
 
 @runtime_checkable
 class RetrievalBlock(Protocol):
+    """Protocol class for a RetrievalBlock"""
+
     def query_block(self) -> Block:
         ...
 
@@ -2487,7 +2522,24 @@ def to_top_k_encoder(
         return topk_model
 
 
-def _maybe_convert_merlin_dataset(data, batch_size, shuffle=True, **kwargs):
+def _maybe_convert_merlin_dataset(
+    data: Any, batch_size: int, shuffle: bool = True, **kwargs
+) -> Any:
+    """Converts the Dataset to a Loader with the given
+    batch_size and shuffle options
+    Parameters
+    ----------
+    data
+        Dataset instance
+    batch_size : int
+        Batch size
+    shuffle : bool, optional
+        Enables data shuffling during loading, by default True
+    Returns
+    -------
+    Any
+        Returns a Loader instance if a Dataset, otherwise returns the data
+    """
     # Check if merlin-dataset is passed
     if hasattr(data, "to_ddf"):
         if not batch_size:
diff --git a/merlin/models/tf/outputs/base.py b/merlin/models/tf/outputs/base.py
index 2ff8d6ddbc..6957fdeda2 100644
--- a/merlin/models/tf/outputs/base.py
+++ b/merlin/models/tf/outputs/base.py
@@ -44,7 +44,7 @@ class ModelOutput(Layer):
         The layer to call in the forward-pass of the model
     default_loss: Union[str, tf.keras.losses.Loss]
         Default loss to set if the user does not specify one
-    get_default_metrics: Callable
+    default_metrics_fn: Callable
         A function returning the list of default metrics to set
         if the user does not specify any
     name: Optional[Text], optional
@@ -253,7 +253,17 @@ def get_config(self):
         return config
 
     @classmethod
-    def get_task_name(cls, target_name):
+    def get_task_name(cls, target_name: str) -> str:
+        """Returns the name of the task
+        Parameters
+        ----------
+        target_name : str
+            Name of the target
+        Returns
+        -------
+        str
+            Returns the task name, which includes the target name
+        """
         base_name = to_snake_case(cls.__name__)
         return name_fn(target_name, base_name) if target_name else base_name
 
diff --git a/merlin/models/tf/outputs/block.py b/merlin/models/tf/outputs/block.py
index 8e854251da..a328df2450 100644
--- a/merlin/models/tf/outputs/block.py
+++ b/merlin/models/tf/outputs/block.py
@@ -131,6 +131,18 @@ def OutputBlock(
 
 
 def _get_col_set_by_tags(schema: Schema, tags) -> Set[str]:
+    """Returns a set with the schema column names
+    Parameters
+    ----------
+    schema : Schema
+        Schema
+    tags :
+        Tags to filter
+    Returns
+    -------
+    Set[str]
+        A set with the schema column names
+    """
     return set(schema.select_by_tag(tags).column_names)
 
 
@@ -139,6 +151,21 @@ def _set_task_block(
     col_name: str,
     task_blocks: Optional[Union[Layer, Dict[str, Layer]]] = None,
 ):
+    """Creates a tower (task_block) for each task (output).
+    Parameters
+    ----------
+    output_block : OutputBlock
+        The output block with the tasks
+    col_name : str
+        Specify the task name
+    task_blocks : Optional[Union[Layer, Dict[str, Layer]]], optional
+        Task blocks to be used as task towers. If a single Layer, it is copied to all
+        tasks. If a dict, the keys must match the task names
+        (e.g. "click/binary_output", rating/regression_output", "item_id/categorical_output").
+        You might want to use the task_blocks to create a task-specific tower
+        (e.g. MLPBLock([32])) or to customize inputs, targets or sample_weights for a
+        given task. By default None
+    """
     task_block = None
     if task_blocks is not None:
         if isinstance(task_blocks, dict):
diff --git a/merlin/models/tf/outputs/classification.py b/merlin/models/tf/outputs/classification.py
index 08ed99e9f6..f06275979e 100644
--- a/merlin/models/tf/outputs/classification.py
+++ b/merlin/models/tf/outputs/classification.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 import logging
-from typing import Optional, Union
+from typing import List, Optional, Union
 
 import tensorflow as tf
 from tensorflow.keras.layers import Layer
@@ -34,7 +34,13 @@
 LOG = logging.getLogger("merlin_models")
 
 
-def default_binary_metrics():
+def default_binary_metrics() -> List[tf.keras.metrics.Metric]:
+    """Returns the default binary metrics
+    Returns
+    -------
+    List[tf.keras.metrics.Metric]
+        List with metrics for binary classification
+    """
     return (
         tf.keras.metrics.Precision(name="precision"),
         tf.keras.metrics.Recall(name="recall"),
@@ -44,6 +50,13 @@ def default_binary_metrics():
 
 
 def default_categorical_prediction_metrics(k=10):
+    """Returns the default top-k metrics for
+    categorical classification
+    Returns
+    -------
+    List[tf.keras.metrics.Metric]
+        List with top-k metrics for categorical classification
+    """
     return (
         RecallAt(k),
         MRRAt(k),
@@ -70,8 +83,6 @@ class BinaryOutput(ModelOutput):
         by default None
     name: str, optional
         The name of the task.
-    task_block: Block, optional
-        The block to use for the task.
     logits_temperature: float, optional
         Parameter used to reduce model overconfidence, so that logits / T.
         by default 1.

From 7ee8e84f8124d903ddd7a86d70ac18a27bbdbb09 Mon Sep 17 00:00:00 2001
From: rnyak <16246900+rnyak@users.noreply.github.com>
Date: Mon, 22 May 2023 10:46:16 -0400
Subject: [PATCH 2/4] [WIP] add docstrings in mms classes and functions (#1101)

* add docstings

* add docstrings
---
 merlin/models/tf/blocks/dlrm.py               | 87 ++++++++++---------
 merlin/models/tf/blocks/interaction.py        | 12 +++
 .../tf/prediction_tasks/classification.py     | 27 ++++++
 .../models/tf/prediction_tasks/regression.py  | 13 +++
 .../models/tf/prediction_tasks/retrieval.py   | 15 +++-
 5 files changed, 108 insertions(+), 46 deletions(-)

diff --git a/merlin/models/tf/blocks/dlrm.py b/merlin/models/tf/blocks/dlrm.py
index f51af7a426..30e59f9444 100644
--- a/merlin/models/tf/blocks/dlrm.py
+++ b/merlin/models/tf/blocks/dlrm.py
@@ -34,52 +34,55 @@ def DLRMBlock(
     *,
     embedding_dim: int = None,
     embedding_options: EmbeddingOptions = None,
+    embeddings: Optional[Block] = None,
     bottom_block: Optional[Block] = None,
     top_block: Optional[Block] = None,
-    embeddings: Optional[Block] = None,
 ) -> SequentialBlock:
     """Builds the DLRM architecture, as proposed in the following
-    `paper https://arxiv.org/pdf/1906.00091.pdf`_ [1]_.
-
-    References
-    ----------
-    .. [1] Naumov, Maxim, et al. "Deep learning recommendation model for
-       personalization and recommendation systems." arXiv preprint arXiv:1906.00091 (2019).
-
-    Parameters
-    ----------
-    schema : Schema
-        The `Schema` with the input features
-    bottom_block : Block
-        The `Block` that combines the continuous features (typically a `MLPBlock`)
-    top_block : Optional[Block], optional
-        The optional `Block` that combines the outputs of bottom layer and of
-        the factorization machine layer, by default None
-    embedding_dim : Optional[int], optional
-        Dimension of the embeddings, by default None
-    embedding_options : EmbeddingOptions
-        Options for the input embeddings.
-        - embedding_dim_default: int - Default dimension of the embedding
-        table, when the feature is not found in ``embedding_dims``, by default 64
-        - infer_embedding_sizes : bool, Automatically defines the embedding
-        dimension from the feature cardinality in the schema, by default False,
-        which needs to be kept False for the DLRM architecture.
-
-    Returns
-    -------
-    SequentialBlock
-        The DLRM block
-
-    Raises
-    ------
-    ValueError
-        The schema is required by DLRM
-    ValueError
-        The bottom_block is required by DLRM
-    ValueError
-        The embedding_dim (X) needs to match the last layer of bottom MLP (Y).
-    ValueError
-        Only one-of `embeddings` or `embedding_options` can be used.
+     `paper https://arxiv.org/pdf/1906.00091.pdf`_ [1]_.
+
+     References
+     ----------
+     .. [1] Naumov, Maxim, et al. "Deep learning recommendation model for
+        personalization and recommendation systems." arXiv preprint arXiv:1906.00091 (2019).
+
+     Parameters
+     ----------
+     schema : Schema
+         The `Schema` with the input features
+     embedding_dim : Optional[int], optional
+         Dimension of the embeddings, by default None
+     embedding_options : EmbeddingOptions
+         Options for the input embeddings.
+         - embedding_dim_default: int - Default dimension of the embedding
+         table, when the feature is not found in ``embedding_dims``, by default 64
+         - infer_embedding_sizes : bool, Automatically defines the embedding
+         dimension from the feature cardinality in the schema, by default False,
+         which needs to be kept False for the DLRM architecture.
+    embeddings: Optional[Block]
+         If provided creates a ParallelBlock with an EmbeddingTable for each
+         categorical feature in the schema.
+     bottom_block : Block
+         The `Block` that combines the continuous features (typically a `MLPBlock`)
+     top_block : Optional[Block], optional
+         The optional `Block` that combines the outputs of bottom layer and of
+         the factorization machine layer, by default None
+
+     Returns
+     -------
+     SequentialBlock
+         The DLRM block
+
+     Raises
+     ------
+     ValueError
+         The schema is required by DLRM
+     ValueError
+         The bottom_block is required by DLRM
+     ValueError
+         The embedding_dim (X) needs to match the last layer of bottom MLP (Y).
+     ValueError
+         Only one-of `embeddings` or `embedding_options` can be used.
     """
     if schema is None:
         raise ValueError("The schema is required by DLRM")
diff --git a/merlin/models/tf/blocks/interaction.py b/merlin/models/tf/blocks/interaction.py
index d1450cb0ef..745172c7e8 100644
--- a/merlin/models/tf/blocks/interaction.py
+++ b/merlin/models/tf/blocks/interaction.py
@@ -236,6 +236,18 @@ def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor:
         return 0.5 * tf.subtract(summed_square, squared_sum)
 
     def compute_output_shape(self, input_shapes):
+        """Computes the output shape based on the input shapes
+
+        Parameters
+        ----------
+        input_shapes : tf.TensorShape
+            The input shapes
+
+        Returns
+        -------
+        tf.TensorShape
+            The output shape
+        """
         if len(input_shapes) != 3:
             raise ValueError("Found shape {} without 3 dimensions".format(input_shapes))
         return (input_shapes[0], input_shapes[2])
diff --git a/merlin/models/tf/prediction_tasks/classification.py b/merlin/models/tf/prediction_tasks/classification.py
index 6d0668914e..5ad072ee94 100644
--- a/merlin/models/tf/prediction_tasks/classification.py
+++ b/merlin/models/tf/prediction_tasks/classification.py
@@ -99,12 +99,39 @@ def __init__(
         )
 
     def call(self, inputs, training=False, **kwargs):
+        """Projects the input with the output layer to a single logit
+
+        Parameters
+        ----------
+        inputs : tf.Tensor
+            Input tensor
+        training : bool, optional
+            Flag that indicates whether it is training or not, by default False
+
+        Returns
+        -------
+        tf.Tensor
+            Tensor with the classification probabilities
+        """
         return self.output_activation(self.output_layer(inputs))
 
     def compute_output_shape(self, input_shape):
+        """Computes the output shape based on the input shape
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            The input shape
+
+        Returns
+        -------
+        tf.TensorShape
+            The output shape
+        """
         return self.output_layer.compute_output_shape(input_shape)
 
     def get_config(self):
+        """Return a Python dict containing the configuration of the model."""
         config = super().get_config()
         config = maybe_serialize_keras_objects(
             self,
diff --git a/merlin/models/tf/prediction_tasks/regression.py b/merlin/models/tf/prediction_tasks/regression.py
index dedc4447ae..4869a9a9ff 100644
--- a/merlin/models/tf/prediction_tasks/regression.py
+++ b/merlin/models/tf/prediction_tasks/regression.py
@@ -105,9 +105,22 @@ def call(self, inputs: tf.Tensor, training=False, **kwargs) -> tf.Tensor:
         return self.output_activation(self.output_layer(inputs))
 
     def compute_output_shape(self, input_shape):
+        """Computes the output shape based on the input shape
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            The input shape
+
+        Returns
+        -------
+        tf.TensorShape
+            The output shape
+        """
         return self.output_layer.compute_output_shape(input_shape)
 
     def get_config(self):
+        """Return a Python dict containing the configuration of the model."""
         config = super().get_config()
         config = maybe_serialize_keras_objects(
             self, config, {"output_layer": tf.keras.layers.serialize}
diff --git a/merlin/models/tf/prediction_tasks/retrieval.py b/merlin/models/tf/prediction_tasks/retrieval.py
index c6733018ca..aa8ee8fd6c 100644
--- a/merlin/models/tf/prediction_tasks/retrieval.py
+++ b/merlin/models/tf/prediction_tasks/retrieval.py
@@ -39,10 +39,6 @@ class ItemRetrievalTask(MultiClassClassificationTask):
             The schema object including features to use and their properties.
         samplers: List[ItemSampler]
             List of samplers for negative sampling, by default `[InBatchSampler()]`
-        post_logits: Optional[PredictionBlock]
-            Optional extra pre-call block for post-processing the logits, by default None.
-            You can for example use `post_logits = mm.PopularitySamplingBlock(item_fequency)`
-            for populariy sampling correction.
         target_name: Optional[str]
             If specified, name of the target tensor to retrieve from dataloader.
             Defaults to None.
@@ -52,9 +48,17 @@ class ItemRetrievalTask(MultiClassClassificationTask):
         task_block: Block
             The `Block` that applies additional layers op to inputs.
             Defaults to None.
+        post_logits: Optional[PredictionBlock]
+            Optional extra pre-call block for post-processing the logits, by default None.
+            You can for example use `post_logits = mm.PopularitySamplingBlock(item_fequency)`
+            for populariy sampling correction.
         logits_temperature: float
             Parameter used to reduce the model overconfidence, so that logits / T.
             Defaults to 1.
+        cache_query: bool
+            Add query embeddings to the context block, by default False
+        store_negative_ids: bool
+            Returns negative items ids as part of the output, by default False
     Returns
     -------
         PredictionTask
@@ -112,6 +116,7 @@ def _build_prediction_call(
         store_negative_ids: bool = False,
         **kwargs,
     ):
+        """Returns a SequentialBlock of ItemRetrievalScorer() and LogitsTemperatureScaler()"""
         if samplers is None or len(samplers) == 0:
             samplers = (InBatchSampler(),)
 
@@ -134,6 +139,7 @@ def _build_prediction_call(
     @property
     def retrieval_scorer(self):
         def find_retrieval_scorer_block(block):
+            """Returns the ItemRetrievalScorer layer"""
             if isinstance(block, ItemRetrievalScorer):
                 return block
 
@@ -156,6 +162,7 @@ def set_retrieval_cache_query(self, value: bool):
         self.retrieval_scorer.cache_query = value
 
     def get_config(self):
+        """Return a Python dict containing the configuration of the model."""
         config = super(ItemRetrievalTask, self).get_config()
         del config["pre"]
         if self.samplers:

From db4483cc961966297ea385e8e65cf2ead754cbe7 Mon Sep 17 00:00:00 2001
From: edknv <109497216+edknv@users.noreply.github.com>
Date: Tue, 23 May 2023 09:43:49 -0700
Subject: [PATCH 3/4] Add docstrings (#1106)

* Add docstrings to Encoder

* Add docstrings to ItemRetrievalScorer

* Add docstrings to Model

* Fix docstring for TwoTowerModel

* Add docstring to YoutubeDNNRetrievalModelV2

* Add docstrings to L2Norm

* Add docstrings to ContinuousFeatures

* Add docstrings to AverageEmbeddingsByWeightFeature

* Add docstrings to ReplaceMaskedEmbeddings

* Add docstrings to SequenceEmbeddingFeatures

* Add docstrings to EmbeddingTable

* lint

* lint

* lint
---
 merlin/models/tf/blocks/retrieval/base.py     |  45 ++-
 merlin/models/tf/core/encoder.py              | 107 ++++++-
 merlin/models/tf/inputs/base.py               |   4 +-
 merlin/models/tf/inputs/continuous.py         |  72 +++++
 merlin/models/tf/inputs/embedding.py          | 274 +++++++++++++++---
 merlin/models/tf/models/base.py               |  96 ++++++
 merlin/models/tf/models/retrieval.py          |  10 +-
 merlin/models/tf/transforms/regularization.py |  28 ++
 merlin/models/tf/transforms/sequence.py       |  19 ++
 9 files changed, 600 insertions(+), 55 deletions(-)

diff --git a/merlin/models/tf/blocks/retrieval/base.py b/merlin/models/tf/blocks/retrieval/base.py
index e40ac648e2..f76ffb1d8a 100644
--- a/merlin/models/tf/blocks/retrieval/base.py
+++ b/merlin/models/tf/blocks/retrieval/base.py
@@ -135,14 +135,15 @@ class ItemRetrievalScorer(Block):
     """Block for ItemRetrieval, which expects query/user and item embeddings as input and
     uses dot product to score the positive item (inputs["item"]) and also sampled negative
     items (during training).
+
     Parameters
     ----------
-    samplers : List[ItemSampler], optional
+    samplers: List[ItemSampler], optional
         List of item samplers that provide negative samples when `training=True`
-    sampling_downscore_false_negatives : bool, optional
+    sampling_downscore_false_negatives: bool, optional
         Identify false negatives (sampled item ids equal to the positive item and downscore them
         to the `sampling_downscore_false_negatives_value`), by default True
-    sampling_downscore_false_negatives_value : int, optional
+    sampling_downscore_false_negatives_value: int, optional
         Value to be used to downscore false negatives when
         `sampling_downscore_false_negatives=True`, by default `np.finfo(np.float32).min / 100.0`
     item_id_feature_name: str
@@ -174,6 +175,7 @@ def __init__(
         store_negative_ids: bool = False,
         **kwargs,
     ):
+        """Initializes the `ItemRetrievalScorer` class."""
         super().__init__(**kwargs)
 
         self.downscore_false_negatives = sampling_downscore_false_negatives
@@ -193,6 +195,13 @@ def __init__(
         self.set_required_features()
 
     def build(self, input_shapes):
+        """Builds the block.
+
+        Parameters
+        ----------
+        input_shapes: tuple or dict
+            Shape of the input tensor.
+        """
         if isinstance(input_shapes, dict):
             query_shape = input_shapes[self.query_name]
             self.context.add_weight(
@@ -206,6 +215,13 @@ def build(self, input_shapes):
         super().build(input_shapes)
 
     def _check_input_from_two_tower(self, inputs):
+        """Checks if the inputs from the two towers (query and item) are correctly provided.
+
+        Parameters
+        ----------
+        inputs: dict
+            Dictionary of inputs.
+        """
         if set(inputs.keys()) != set([self.query_name, self.item_name]):
             raise ValueError(
                 f"Wrong input-names, expected: {[self.query_name, self.item_name]} "
@@ -223,6 +239,7 @@ def call(
             the positive item (inputs["item"]).
             For the sampled-softmax mode, logits are computed by multiplying the query vector
             and the item embeddings matrix (self.context.get_embedding(self.item_domain))
+
         Parameters
         ----------
         inputs : Union[tf.Tensor, TabularData]
@@ -230,6 +247,7 @@ def call(
             where embeddings are 2D tensors (batch size, embedding size)
         training : bool, optional
             Flag that indicates whether in training mode, by default True
+
         Returns
         -------
         tf.Tensor
@@ -273,6 +291,7 @@ def call_outputs(
     ) -> "PredictionOutput":
         """Based on the user/query embedding (inputs[self.query_name]), uses dot product to score
             the positive item and also sampled negative items (during training).
+
         Parameters
         ----------
         inputs : TabularData
@@ -280,6 +299,7 @@ def call_outputs(
             where embeddings are 2D tensors (batch size, embedding size)
         training : bool, optional
             Flag that indicates whether in training mode, by default True
+
         Returns
         -------
         [tf.Tensor,tf.Tensor]
@@ -431,6 +451,7 @@ def _prepare_query_item_vectors_for_sampled_softmax(
         return predictions
 
     def set_required_features(self):
+        """Sets the required features for the samplers."""
         required_features = set()
         if self.downscore_false_negatives:
             required_features.add(self.item_id_feature_name)
@@ -442,6 +463,13 @@ def set_required_features(self):
         self._required_features = list(required_features)
 
     def get_config(self):
+        """Returns the configuration of the model as a dictionary.
+
+        Returns
+        -------
+        dict
+            The configuration of the model.
+        """
         config = super().get_config()
         config = maybe_serialize_keras_objects(self, config, ["samplers"])
         config["sampling_downscore_false_negatives"] = self.downscore_false_negatives
@@ -458,6 +486,17 @@ def get_config(self):
 
     @classmethod
     def from_config(cls, config):
+        """Creates a new instance of the class from its config.
+
+        Parameters
+        ----------
+        config: dict
+            A dictionary, typically the output of get_config.
+
+        Returns
+        -------
+        A new instance of the `ItemRetrievalScorer` class.
+        """
         config = maybe_deserialize_keras_objects(config, ["samplers"])
 
         return super().from_config(config)
diff --git a/merlin/models/tf/core/encoder.py b/merlin/models/tf/core/encoder.py
index 1fcab3c504..ddc936abbb 100644
--- a/merlin/models/tf/core/encoder.py
+++ b/merlin/models/tf/core/encoder.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -88,6 +88,22 @@ def encode(
         batch_size: int,
         **kwargs,
     ) -> merlin.io.Dataset:
+        """Encodes the given dataset and index.
+
+        Parameters
+        ----------
+        dataset: merlin.io.Dataset
+            The dataset to encode.
+        index: Union[str, ColumnSchema, Schema, Tags]
+            The index to use for encoding.
+        batch_size: int
+            The batch size for encoding.
+
+        Returns
+        -------
+        merlin.io.Dataset
+            The encoded dataset.
+        """
         if isinstance(index, Schema):
             output_schema = index
         elif isinstance(index, ColumnSchema):
@@ -117,12 +133,14 @@ def batch_predict(
         **kwargs,
     ) -> merlin.io.Dataset:
         """Batched prediction using Dask.
+
         Parameters
         ----------
         dataset: merlin.io.Dataset
             Dataset to predict on.
         batch_size: int
             Batch size to use for prediction.
+
         Returns
         -------
         merlin.io.Dataset
@@ -166,6 +184,23 @@ def batch_predict(
         return merlin.io.Dataset(predictions)
 
     def call(self, inputs, *, targets=None, training=False, testing=False, **kwargs):
+        """Calls the model on new inputs and returns the outputs as tensors.
+
+        Parameters
+        ----------
+        inputs : tensor-like or dict/tuple of tensors.
+            Tensors or dict/tuple of tensors representing the input batch.
+        targets : tensor-like, optional
+            Tensors representing the target data.
+        training : bool, optional
+            Whether the model is in training mode.
+        testing : bool, optional
+            Whether the model is in testing mode.
+
+        Returns
+        -------
+        A tensor or dict of tensors corresponding to the result of calling the layer.
+        """
         inputs = self._prepare_features(inputs, targets=targets)
         if isinstance(inputs, tuple):
             inputs, targets = inputs
@@ -180,6 +215,17 @@ def call(self, inputs, *, targets=None, training=False, testing=False, **kwargs)
         )
 
     def __call__(self, inputs, **kwargs):
+        """Overrides the default __call__ method to remove "features" from inputs.
+
+        Parameters
+        ----------
+        inputs : tensor-like or dict/tuple of tensors.
+            Tensors or dict/tuple of tensors representing the input batch.
+
+        Returns
+        -------
+        A tensor or dict of tensors corresponding to the result of calling the layer.
+        """
         # We remove features here since we don't expect them at inference time
         # Inside the `call` method, we will add them back by assuming inputs=features
         if "features" in kwargs:
@@ -188,6 +234,13 @@ def __call__(self, inputs, **kwargs):
         return super().__call__(inputs, **kwargs)
 
     def build(self, input_shape):
+        """Creates the variables of the layer.
+
+        Parameters
+        ----------
+        input_shape: Tuple[int]
+            The shape of the input data.
+        """
         self._prepare_features.build(input_shape)
         input_shape = self._prepare_features.compute_output_shape(input_shape)
 
@@ -196,18 +249,40 @@ def build(self, input_shape):
             self._build_input_shape = input_shape
 
     def compute_output_shape(self, input_shape):
+        """Computes the output shape of the layer.
+
+        Parameters
+        ----------
+        input_shape: Tuple[int]
+            The shape of the input data.
+
+        Returns
+        -------
+        Tuple[int]
+            The output shape of the layer.
+        """
         input_shape = self._prepare_features.compute_output_shape(input_shape)
         return combinators.compute_output_shape_sequentially(list(self.to_call), input_shape)
 
     def train_step(self, data):
-        """Train step"""
+        """Performs a training step.
+
+        Train step method is not implemented and Raises an error as the
+        Encoder block is not meant to be trained by itself and can only be
+        trained as part of a model.
+        """
         raise NotImplementedError(
             "This block is not meant to be trained by itself. ",
             "It can only be trained as part of a model.",
         )
 
     def fit(self, *args, **kwargs):
-        """Fit model"""
+        """Fits the model.
+
+        Fit method is not implemented and Raises an error as the Encoder block
+        is not meant to be trained by itself and can only be trained as part
+        of a model.
+        """
         raise NotImplementedError(
             "This block is not meant to be trained by itself. ",
             "It can only be trained as part of a model.",
@@ -245,6 +320,7 @@ def save(
 
     @property
     def to_call(self):
+        """Provides the list of blocks to be called during the execution of the model."""
         if self.pre:
             yield self.pre
 
@@ -256,22 +332,40 @@ def to_call(self):
 
     @property
     def has_schema(self) -> bool:
+        """Returns True as this class does contain a schema."""
         return True
 
     @property
     def schema(self) -> Schema:
+        """Returns the schema of the model."""
         return self._schema
 
     @property
     def first(self):
+        """Returns the first block of the model."""
         return self.blocks[0]
 
     @property
     def last(self):
+        """Returns the last block of the model."""
         return self.blocks[-1]
 
     @classmethod
     def from_config(cls, config, custom_objects=None):
+        """Creates a new instance of the class by deserializing.
+
+        Parameters
+        ----------
+        config: dict
+            A dictionary, typically the output of get_config.
+        custom_objects: dict, optional
+            A dictionary mapping the names of layers to the corresponding
+            functions and classes.
+
+        Returns
+        -------
+        A new instance of Encoder.
+        """
         pre = config.pop("pre", None)
         post = config.pop("post", None)
         layers = [
@@ -291,6 +385,13 @@ def from_config(cls, config, custom_objects=None):
         return output
 
     def get_config(self):
+        """Returns the configuration of the model as a dictionary.
+
+        Returns
+        -------
+        dict
+            The configuration of the model.
+        """
         config = tf_utils.maybe_serialize_keras_objects(self, {}, ["pre", "post"])
         for i, layer in enumerate(self.blocks):
             config[i] = tf.keras.utils.serialize_keras_object(layer)
diff --git a/merlin/models/tf/inputs/base.py b/merlin/models/tf/inputs/base.py
index 7c5c7d6a78..b3924c6741 100644
--- a/merlin/models/tf/inputs/base.py
+++ b/merlin/models/tf/inputs/base.py
@@ -44,7 +44,6 @@ def InputBlock(
     post: Optional[BlockType] = None,
     aggregation: Optional[TabularAggregationType] = None,
     seq: bool = False,
-    max_seq_length: Optional[int] = None,
     add_continuous_branch: bool = True,
     continuous_tags: Optional[Union[TagsType, Tuple[Tags]]] = (Tags.CONTINUOUS,),
     continuous_projection: Optional[Block] = None,
@@ -75,6 +74,9 @@ def InputBlock(
         Next to this, it's also possible to construct it manually.
     branches: Dict[str, Block], optional
         Dictionary of branches to use inside the InputBlock.
+    pre: Optional[BlockType]
+        Transformations to apply on the inputs before the module is
+        called (before 'forward'). Default is None.
     post: Optional[BlockType]
         Transformations to apply on the inputs after the module is
         called (so **after** `forward`).
diff --git a/merlin/models/tf/inputs/continuous.py b/merlin/models/tf/inputs/continuous.py
index 328e84e56e..1360f67f3a 100644
--- a/merlin/models/tf/inputs/continuous.py
+++ b/merlin/models/tf/inputs/continuous.py
@@ -98,9 +98,39 @@ def __init__(
 
     @classmethod
     def from_features(cls, features, **kwargs):
+        """Class method for creating an instance of ContinuousFeatures.
+
+        Parameters
+        ----------
+        features: list
+            List of continuous features to include in this module.
+        kwargs : dict
+            Additional keyword arguments.
+
+        Returns
+        -------
+        ContinuousFeatures
+            An instance of ContinuousFeatures class.
+        """
         return cls(features, **kwargs)
 
     def call(self, inputs, *args, **kwargs):
+        """Processes the specified continuous features from the inputs.
+
+        Parameters
+        ----------
+        inputs: dict
+            The input tensors, as a dictionary of string feature name to tensor.
+        args: tuple
+            Additional arguments, not used.
+        kwargs: dict
+            Additional keyword arguments, not used.
+
+        Returns
+        -------
+        dict
+            A dictionary of the processed continuous features.
+        """
         cont_features = self.filter_features(inputs)
         cont_features = {
             k: tf.expand_dims(v, -1) if len(v.shape) == 1 else v for k, v in cont_features.items()
@@ -108,6 +138,18 @@ def call(self, inputs, *args, **kwargs):
         return cont_features
 
     def compute_call_output_shape(self, input_shapes):
+        """Calculates the output shapes of the processed continuous features.
+
+        Parameters
+        ----------
+        input_shapes: dict
+            The shapes of the input tensors.
+
+        Returns
+        -------
+        dict
+            A dictionary of the output shapes of the processed continuous features.
+        """
         cont_features_sizes = self.filter_features.compute_output_shape(input_shapes)
         cont_features_sizes = {
             k: tf.TensorShape(list(v) + [1]) if len(v) == 1 else v
@@ -116,6 +158,13 @@ def compute_call_output_shape(self, input_shapes):
         return cont_features_sizes
 
     def get_config(self):
+        """Returns a dictionary containing the configuration of the ContinuousFeatures block.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the configuration of the ContinuousFeatures block.
+        """
         config = super().get_config()
 
         config["features"] = self.filter_features.feature_names
@@ -123,10 +172,33 @@ def get_config(self):
         return config
 
     def _get_name(self):
+        """Returns the name of the module.
+
+        Returns
+        -------
+        str
+            The name of the module.
+        """
         return "ContinuousFeatures"
 
     def repr_ignore(self) -> List[str]:
+        """Returns a list of module properties to ignore when creating a string representation
+        of the module.
+
+        Returns
+        -------
+        List[str]
+            A list of module properties to ignore in the string representation.
+        """
         return ["filter_features"]
 
     def repr_extra(self):
+        """Returns a string of additional details to display in the string representation
+        of the module.
+
+        Returns
+        -------
+        str
+            Additional details for the string representation of the module.
+        """
         return ", ".join(sorted(self.filter_features.feature_names))
diff --git a/merlin/models/tf/inputs/embedding.py b/merlin/models/tf/inputs/embedding.py
index 302e446dc1..32af3bc828 100644
--- a/merlin/models/tf/inputs/embedding.py
+++ b/merlin/models/tf/inputs/embedding.py
@@ -156,50 +156,46 @@ class EmbeddingTable(EmbeddingTableBase):
     and tf.SparseTensor which might be 2D (batch_size, 1) for scalars
     or 3d (batch_size, seq_length, 1) for sequential features
 
-     Parameters
-     ----------
-     dim: Dimension of the dense embedding.
-     col_schema: ColumnSchema
-         Schema of the column. This is used to infer the cardinality.
-     embeddings_initializer: Initializer for the `embeddings`
-       matrix (see `keras.initializers`).
-     embeddings_regularizer: Regularizer function applied to
-       the `embeddings` matrix (see `keras.regularizers`).
-     embeddings_constraint: Constraint function applied to
-       the `embeddings` matrix (see `keras.constraints`).
-     mask_zero: Boolean, whether or not the input value 0 is a special "padding"
-       value that should be masked out.
-       This is useful when using recurrent layers
-       which may take variable length input.
-       If this is `True`, then all subsequent layers
-       in the model need to support masking or an exception will be raised.
-       If mask_zero is set to True, as a consequence, index 0 cannot be
-       used in the vocabulary (input_dim should equal size of
-       vocabulary + 1).
-     input_length: Length of input sequences, when it is constant.
-       This argument is required if you are going to connect
-       `Flatten` then `Dense` layers upstream
-       (without it, the shape of the dense outputs cannot be computed).
-    combiner: A string specifying how to combine embedding results for each
-       entry ("mean", "sqrtn" and "sum" are supported) or a layer.
-       Default is None (no combiner used)
-    trainable: Boolean, whether the layer's variables should be trainable.
-    name: String name of the layer.
-    dtype: The dtype of the layer's computations and weights. Can also be a
-       `tf.keras.mixed_precision.Policy`, which allows the computation and weight
-       dtype to differ. Default of `None` means to use
-       `tf.keras.mixed_precision.global_policy()`, which is a float32 policy
-       unless set to different value.
-    dynamic: Set this to `True` if your layer should only be run eagerly, and
-       should not be used to generate a static computation graph.
-       This would be the case for a Tree-RNN or a recursive network,
-       for example, or generally for any layer that manipulates tensors
-       using Python control flow. If `False`, we assume that the layer can
-       safely be used to generate a static computation graph.
-    l2_batch_regularization_factor: float, optional
-        Factor for L2 regularization of the embeddings vectors (from the current batch only)
-        by default 0.0
-    **kwargs: Forwarded Keras Layer parameters
+    Parameters
+    ----------
+    dim : int
+        The dimension of the dense embedding.
+    col_schemas : ColumnSchema
+        The schema of the column(s) used to infer the cardinality.
+    embeddings_initializer : str, optional
+        The initializer for the `embeddings` matrix (see `keras.initializers`),
+        by default "uniform".
+    embeddings_regularizer : str, optional
+        The regularizer function applied to the `embeddings` matrix (see `keras.regularizers`),
+        by default None.
+    embeddings_constraint : str, optional
+        The constraint function applied to the `embeddings` matrix (see `keras.constraints`),
+        by default None.
+    mask_zero : bool, optional
+        Whether or not the input value 0 is a special "padding" value that should be masked out.
+        This is useful when using recurrent layers which may take variable length input,
+        by default False.
+    input_length : int, optional
+        The length of input sequences when it is constant, by default None.
+    sequence_combiner : CombinerType, optional
+        A string specifying how to combine embedding results for each entry ("mean", "sqrtn"
+        and "sum" are supported) or a layer. Default is None (no combiner used).
+    trainable : bool, optional
+        Whether the layer's variables should be trainable, by default True.
+    name : str, optional
+        The name of the layer, by default None.
+    dtype : str, optional
+        The data type of the layer's computations and weights. It can also be a
+        `tf.keras.mixed_precision.Policy`, which allows the computation and weight
+        dtype to differ, by default None.
+    dynamic : bool, optional
+        Set this to `True` if the layer should only be run eagerly and should not be used
+        to generate a static computation graph, by default False.
+    l2_batch_regularization_factor : float, optional
+        The factor for L2 regularization of the embeddings vectors (from the current batch only),
+        by default 0.0.
+    **kwargs:
+        Other keyword arguments forwarded to the Keras Layer.
     """
 
     def __init__(
@@ -353,9 +349,33 @@ def from_dataset(
         )
 
     def to_dataset(self, gpu=None) -> merlin.io.Dataset:
+        """Converts the EmbeddingTable to a merlin.io.Dataset.
+
+        Parameters
+        ----------
+        gpu: bool
+            Whether to use gpu.
+
+        Returns
+        -------
+        merlin.io.Dataset
+            The dataset representation of the EmbeddingTable.
+        """
         return merlin.io.Dataset(self.to_df(gpu=gpu))
 
     def to_df(self, gpu=None):
+        """Converts the EmbeddingTable to a DataFrame.
+
+        Parameters
+        ----------
+        gpu: bool
+            Whether to use gpu.
+
+        Returns
+        -------
+        cudf or pandas DataFrame
+            The DataFrame representation of the EmbeddingTable.
+        """
         return tensor_to_df(self.table.embeddings, gpu=gpu)
 
     def _maybe_build(self, inputs):
@@ -367,6 +387,13 @@ def _maybe_build(self, inputs):
         return super(EmbeddingTable, self)._maybe_build(inputs)
 
     def build(self, input_shapes):
+        """Builds the EmbeddingTable based on the input shapes.
+
+        Parameters
+        ----------
+        input_shapes: tf.TensorShape or dictionary of shapes.
+            The shapes of the input tensors.
+        """
         if not self.table.built:
             self.table.build(input_shapes)
         return super(EmbeddingTable, self).build(input_shapes)
@@ -395,6 +422,13 @@ def call(
         return out
 
     def _call_table(self, inputs, **kwargs):
+        """Performs the lookup operation for the inputs in the embedding table.
+
+        Parameters
+        ----------
+        inputs : tf.Tensor, tf.RaggedTensor, or tf.SparseTensor
+            The input tensors for the lookup operation.
+        """
         if isinstance(inputs, (tf.RaggedTensor, tf.SparseTensor)):
             if self.sequence_combiner and isinstance(self.sequence_combiner, str):
                 if isinstance(inputs, tf.RaggedTensor):
@@ -439,6 +473,18 @@ def _call_table(self, inputs, **kwargs):
     def compute_output_shape(
         self, input_shape: Union[tf.TensorShape, Dict[str, tf.TensorShape]]
     ) -> Union[tf.TensorShape, Dict[str, tf.TensorShape]]:
+        """Computes the shape of the output tensors.
+
+        Parameters
+        ----------
+        input_shape : Union[tf.TensorShape, Dict[str, tf.TensorShape]]
+            The shape of the input tensors.
+
+        Returns
+        -------
+        Union[tf.TensorShape, Dict[str, tf.TensorShape]]
+            The shape of the output tensors.
+        """
         if isinstance(input_shape, dict):
             output_shapes = {}
             for feature_name in self.schema.column_names:
@@ -454,6 +500,18 @@ def compute_output_shape(
     def _compute_output_shape_table(
         self, input_shape: Union[tf.TensorShape, tuple]
     ) -> tf.TensorShape:
+        """Helper method to compute the output shape of a single input tensor.
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            The shape of the input tensor.
+
+        Returns
+        -------
+        tf.TensorShape
+            The shape of the output tensor.
+        """
         first_dims = input_shape
 
         if input_shape.rank > 1:
@@ -468,10 +526,36 @@ def _compute_output_shape_table(
         return output_shapes
 
     def compute_call_output_shape(self, input_shapes):
+        """Computes the shape of the output of a call to this layer.
+
+        Parameters
+        ----------
+        input_shapes: tf.TensorShape or dictionary of shapes.
+            The shapes of the input tensors.
+
+        Returns
+        -------
+        Union[tf.TensorShape, Dict[str, tf.TensorShape]]
+            The shape of the output of a call to this layer.
+        """
         return self.compute_output_shape(input_shapes)
 
     @classmethod
     def from_config(cls, config, table=None):
+        """Creates an EmbeddingTable from its configuration.
+
+        Parameters
+        ----------
+        config : dict
+            Configuration dictionary.
+        table : tf.keras.layers.Embedding, optional
+            An optional embedding layer.
+
+        Returns
+        -------
+        EmbeddingTable
+            A newly created EmbeddingTable.
+        """
         if table:
             config["table"] = table
         else:
@@ -482,6 +566,13 @@ def from_config(cls, config, table=None):
         return super().from_config(config)
 
     def get_config(self):
+        """Returns the configuration of this EmbeddingTable.
+
+        Returns
+        -------
+        dict
+            Configuration dictionary.
+        """
         config = super().get_config()
         config["table"] = tf.keras.layers.serialize(self.table)
         if isinstance(self.sequence_combiner, tf.keras.layers.Layer):
@@ -732,6 +823,25 @@ def __init__(self, weight_feature_name: str, axis=1, **kwargs):
         self.weight_feature_name = weight_feature_name
 
     def call(self, inputs, features):
+        """Performs the weighted average calculation.
+
+        Parameters
+        ----------
+        inputs: tf.Tensor
+            Input tensor.
+        features: dict
+            Dictionary of features, must include the weight feature.
+
+        Returns
+        -------
+        Tensor
+            Output tensor after applying the weighted average calculation.
+
+        Raises
+        ------
+        ValueError
+            If the inputs is a tf.RaggedTensor, the weight feature should also be a tf.RaggedTensor.
+        """
         weight_feature = features[self.weight_feature_name]
         if isinstance(inputs, tf.RaggedTensor) and not isinstance(weight_feature, tf.RaggedTensor):
             raise ValueError(
@@ -751,6 +861,18 @@ def call(self, inputs, features):
         return output
 
     def compute_output_shape(self, input_shape):
+        """Computes the output shape.
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            Shape of the input.
+
+        Returns
+        -------
+        tf.TensorShape
+            Shape of the output, which is the same as the input shape in this case.
+        """
         return input_shape
 
     @staticmethod
@@ -791,6 +913,13 @@ def from_schema_convention(schema: Schema, weight_features_name_suffix: str = "_
         return seq_combiners
 
     def get_config(self):
+        """Returns the configuration of the layer.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the configuration of the layer.
+        """
         config = super().get_config()
         config["axis"] = self.axis
         config["weight_feature_name"] = self.weight_feature_name
@@ -1179,12 +1308,19 @@ def from_config(cls, config):
 class SequenceEmbeddingFeatures(EmbeddingFeatures):
     """Input block for embedding-lookups for categorical features. This module produces 3-D tensors,
     this is useful for sequential models like transformers.
+
     Parameters
     ----------
-    {embedding_features_parameters}
+    feature_config: Dict[str, FeatureConfig]
+        This specifies what TableConfig to use for each feature. For shared embeddings, the same
+        TableConfig can be used for multiple features.
+    mask_zero: bool
+       Whether or not the input value 0 is a special "padding" value that should be masked out.
     padding_idx: int
         The symbol to use for padding.
     {tabular_module_parameters}
+    add_default_pre: bool, default True
+        Whether or not to add a default preprocessing block.
     """
 
     def __init__(
@@ -1200,6 +1336,7 @@ def __init__(
         add_default_pre=True,
         **kwargs,
     ):
+        """Initializes the block."""
         if add_default_pre:
             embedding_pre = [Filter(list(feature_config.keys()))]
             pre = [embedding_pre, pre] if pre else embedding_pre  # type: ignore
@@ -1218,11 +1355,37 @@ def __init__(
         self.mask_zero = mask_zero
 
     def lookup_feature(self, name, val, **kwargs):
+        """Looks up the embedding for a specific feature from the pre-trained embedding tables.
+
+        Parameters
+        ----------
+        name: str
+            The name of the feature to lookup.
+        val: tf.Tensor
+            The tensor of feature values to look up in the embedding tables.
+
+        Returns
+        -------
+        tf.Tensor
+            The corresponding embedding tensor.
+        """
         return super(SequenceEmbeddingFeatures, self).lookup_feature(
             name, val, output_sequence=True
         )
 
     def compute_call_output_shape(self, input_shapes):
+        """Computes the output shapes given the input shapes.
+
+        Parameters
+        ----------
+        input_shapes: dict
+            Dictionary mapping input names to their shapes.
+
+        Returns
+        -------
+        dict
+            Dictionary mapping output names to their shapes.
+        """
         batch_size = self.calculate_batch_size_from_input_shapes(input_shapes)
         sequence_length = input_shapes[list(self.feature_config.keys())[0]][1]
 
@@ -1235,6 +1398,20 @@ def compute_call_output_shape(self, input_shapes):
         return output_shapes
 
     def compute_mask(self, inputs, mask=None):
+        """Computes a mask tensor from the inputs.
+
+        Parameters
+        ----------
+        inputs: dict
+            Dictionary mapping input names to their values.
+        mask: tf.Tensor, optional
+            An optional mask to apply to the inputs.
+
+        Returns
+        -------
+        dict or None
+            A mask tensor, or None if `mask_zero` is False.
+        """
         if not self.mask_zero:
             return None
         outputs = {}
@@ -1244,6 +1421,13 @@ def compute_mask(self, inputs, mask=None):
         return outputs
 
     def get_config(self):
+        """Gets the configuration dictionary for this block.
+
+        Returns
+        -------
+        dict
+            The configuration dictionary.
+        """
         config = super().get_config()
         config["mask_zero"] = self.mask_zero
         config["padding_idx"] = self.padding_idx
diff --git a/merlin/models/tf/models/base.py b/merlin/models/tf/models/base.py
index c2003c68dd..2b3a1c29f3 100644
--- a/merlin/models/tf/models/base.py
+++ b/merlin/models/tf/models/base.py
@@ -1591,8 +1591,13 @@ def save(self, *args, **kwargs):
 class Model(BaseModel):
     """Merlin Model class
 
+    `Model` is the main base class that represents a model in Merlin Models.
+    It can be configured with a number of pre and post processing blocks and can manage a context.
+
     Parameters
     ----------
+    blocks : list
+        List of `Block` instances in the model
     context : Optional[ModelContext], optional
         ModelContext is used to store/retrieve public variables across blocks,
         by default None.
@@ -1618,6 +1623,7 @@ def __init__(
         prep_features: Optional[bool] = True,
         **kwargs,
     ):
+        """Creates a new `Model` instance."""
         super(Model, self).__init__(**kwargs)
 
         context = context or ModelContext()
@@ -1767,6 +1773,27 @@ def build(self, input_shape=None):
         self.built = True
 
     def call(self, inputs, targets=None, training=False, testing=False, output_context=False):
+        """
+        Method for forward pass of the model.
+
+        Parameters
+        ----------
+        inputs : Tensor or dict of Tensor
+            Input Tensor(s) for the model
+        targets : Tensor or dict of Tensor, optional
+            Target Tensor(s) for the model
+        training : bool, optional
+            Flag to indicate whether the model is in training phase
+        testing : bool, optional
+            Flag to indicate whether the model is in testing phase
+        output_context : bool, optional
+            Flag to indicate whether to return the context along with the output
+
+        Returns
+        -------
+        Tensor or tuple of Tensor and ModelContext
+            Output of the model, and optionally the context
+        """
         outputs = inputs
         features = self._prepare_features(inputs, targets=targets)
         if isinstance(features, tuple):
@@ -1827,10 +1854,32 @@ def _call_child(
 
     @property
     def first(self):
+        """
+        The first `Block` in the model.
+
+        This property provides a simple way to quickly access the first `Block` in the model's
+        sequence of blocks.
+
+        Returns
+        -------
+        Block
+            The first `Block` in the model.
+        """
         return self.blocks[0]
 
     @property
     def last(self):
+        """
+        The last `Block` in the model.
+
+        This property provides a simple way to quickly access the last `Block` in the model's
+        sequence of blocks.
+
+        Returns
+        -------
+        Block
+            The last `Block` in the model.
+        """
         return self.blocks[-1]
 
     @classmethod
@@ -1879,6 +1928,25 @@ def from_block(
 
     @classmethod
     def from_config(cls, config, custom_objects=None):
+        """
+        Creates a model from its config.
+
+        This method recreates a model instance from a configuration dictionary and
+        optional custom objects.
+
+        Parameters
+        ----------
+        config : dict
+            The configuration dictionary representing the model.
+        custom_objects : dict, optional
+            Dictionary mapping names to custom classes or functions to be considered
+            during deserialization.
+
+        Returns
+        -------
+        Model
+            The created `Model` instance.
+        """
         pre = config.pop("pre", None)
         post = config.pop("post", None)
         schema = config.pop("schema", None)
@@ -1915,6 +1983,22 @@ def from_config(cls, config, custom_objects=None):
         return model
 
     def get_sample_inputs(self, batch_size=None):
+        """
+        Generates sample inputs for the model.
+
+        This method creates a dictionary of sample inputs for each input feature, useful for
+        testing or initializing the model.
+
+        Parameters
+        ----------
+        batch_size : int, optional
+            The batch size for the sample inputs. If not specified, defaults to 2.
+
+        Returns
+        -------
+        dict
+            A dictionary mapping feature names to sample input tensors.
+        """
         batch_size = batch_size or 2
         if self.input_schema is not None:
             inputs = {}
@@ -1960,6 +2044,18 @@ def get_sample_inputs(self, batch_size=None):
             return inputs
 
     def get_config(self):
+        """
+        Returns the model configuration as a dictionary.
+
+        This method returns a dictionary containing the configuration of the model.
+        The dictionary includes the configuration of each block in the model,
+        as well as additional properties such as `pre` and `post` layers, and the `schema`.
+
+        Returns
+        -------
+        dict
+            The configuration of the model.
+        """
         config = maybe_serialize_keras_objects(self, {}, ["pre", "post"])
         config["schema"] = schema_utils.schema_to_tensorflow_metadata_json(self.schema)
         for i, layer in enumerate(self.blocks):
diff --git a/merlin/models/tf/models/retrieval.py b/merlin/models/tf/models/retrieval.py
index 751c57862c..1b05ed3972 100644
--- a/merlin/models/tf/models/retrieval.py
+++ b/merlin/models/tf/models/retrieval.py
@@ -70,9 +70,6 @@ def MatrixFactorizationModel(
     logits_temperature: float
         Parameter used to reduce model overconfidence, so that logits / T.
         Defaults to 1.
-    loss: Optional[LossType]
-        Loss function.
-        Defaults to `bpr`.
     samplers: List[ItemSampler]
         List of samplers for negative sampling, by default `[InBatchSampler()]`
 
@@ -543,6 +540,8 @@ def YoutubeDNNRetrievalModelV2(
         By default None
     post: Optional[tf.keras.layers.Layer], optional
         The optional layer to apply on top of the query encoder.
+    outputs : Union[ModelOutput, List[ModelOutput]], optional
+        Specifies the model's outputs. If not specified, the outputs will be inferred.
     logits_temperature: float, optional
         Parameter used to reduce model overconfidence, so that logits / T.
         Defaults to 1.
@@ -556,6 +555,11 @@ def YoutubeDNNRetrievalModelV2(
         encoded ids, which are usually reserved for <nulls>,
         out-of-vocabulary or padding.
         By default 0.
+
+    Returns
+    --------
+    RetrievalModelV2
+        The constructed Youtube-DNN based retrieval model
     """
     if not inputs:
         inputs = schema
diff --git a/merlin/models/tf/transforms/regularization.py b/merlin/models/tf/transforms/regularization.py
index e155b21d72..1c01a59693 100644
--- a/merlin/models/tf/transforms/regularization.py
+++ b/merlin/models/tf/transforms/regularization.py
@@ -31,6 +31,21 @@ def __init__(self, **kwargs):
         super(L2Norm, self).__init__(**kwargs)
 
     def call(self, inputs: Union[tf.Tensor, TabularData], axis: int = -1, **kwargs):
+        """
+        Invokes the L2 normalization on the input tensor or dictionary of tensors.
+
+        Parameters
+        ----------
+        inputs: Union[tf.Tensor, TabularData]
+            A Tensor or TabularData input to normalize.
+        axis: int, optional
+            The axis on which to normalize, by default -1.
+
+        Returns
+        -------
+        Union[tf.Tensor, TabularData]
+            The L2-normalized tensor or dictionary of tensors.
+        """
         if isinstance(inputs, dict):
             inputs = {key: self._l2_norm(inp, axis=axis) for key, inp in inputs.items()}
         else:
@@ -67,4 +82,17 @@ def _l2_norm(
         )
 
     def compute_output_shape(self, input_shape):
+        """
+        Compute the output shape of the tensor after normalization.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            A tuple indicating the shape of the input tensor.
+
+        Returns
+        -------
+        tuple
+            The shape of the tensor after L2 normalization.
+        """
         return input_shape
diff --git a/merlin/models/tf/transforms/sequence.py b/merlin/models/tf/transforms/sequence.py
index 51affcf5fd..9dd1ee6f85 100644
--- a/merlin/models/tf/transforms/sequence.py
+++ b/merlin/models/tf/transforms/sequence.py
@@ -973,10 +973,29 @@ class ReplaceMaskedEmbeddings(Block):
     """
 
     def __init__(self, **kwargs):
+        """Initializes the block."""
         super().__init__(**kwargs)
         self.supports_masking = True
 
     def build(self, input_shape):
+        """Builds the block's internal variables.
+
+        This method creates a trainable embedding to replace masked interactions in the input.
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            Shape of the input tensor.
+
+        Returns
+        -------
+        None
+
+        Raises
+        ------
+        ValueError
+            If the last dimension of the input shape is None.
+        """
         self.hidden_size = input_shape[-1]
         if self.hidden_size is None:
             raise ValueError("The last dim of inputs cannot be None")

From 55b46aed25c4a05fe935f01ad3291edbf0a983e3 Mon Sep 17 00:00:00 2001
From: Adam Laiacano <alaiacano@nvidia.com>
Date: Fri, 2 Jun 2023 14:51:45 -0400
Subject: [PATCH 4/4] update docstring syntax

---
 merlin/datasets/ecommerce/dressipi/dataset.py  | 1 +
 merlin/models/tf/blocks/interaction.py         | 1 +
 merlin/models/tf/core/encoder.py               | 1 +
 merlin/models/tf/inputs/embedding.py           | 7 +++++++
 merlin/models/tf/loader.py                     | 3 ++-
 merlin/models/tf/metrics/topk.py               | 2 ++
 merlin/models/tf/models/base.py                | 3 +++
 merlin/models/tf/models/ranking.py             | 1 +
 merlin/models/tf/outputs/base.py               | 1 +
 merlin/models/tf/outputs/block.py              | 3 +++
 merlin/models/tf/outputs/classification.py     | 2 ++
 merlin/models/tf/outputs/sampling/in_batch.py  | 1 +
 merlin/models/tf/outputs/topk.py               | 1 +
 merlin/models/tf/prediction_tasks/next_item.py | 8 +++++++-
 merlin/models/tf/prediction_tasks/retrieval.py | 1 +
 merlin/models/tf/transforms/sequence.py        | 1 +
 merlin/models/tf/utils/testing_utils.py        | 1 +
 merlin/models/tf/utils/tf_utils.py             | 3 +++
 merlin/models/torch/batch.py                   | 4 ++--
 19 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/merlin/datasets/ecommerce/dressipi/dataset.py b/merlin/datasets/ecommerce/dressipi/dataset.py
index 81a7aa5359..3646a28c23 100644
--- a/merlin/datasets/ecommerce/dressipi/dataset.py
+++ b/merlin/datasets/ecommerce/dressipi/dataset.py
@@ -70,6 +70,7 @@ def get_dressipi2022(input_path):
     ----------
     path: Union[str, Path]
         Directory to load the raw data from.
+
     Returns
     -------
     train: merlin.io.Dataset
diff --git a/merlin/models/tf/blocks/interaction.py b/merlin/models/tf/blocks/interaction.py
index 745172c7e8..a83c3cec7f 100644
--- a/merlin/models/tf/blocks/interaction.py
+++ b/merlin/models/tf/blocks/interaction.py
@@ -293,6 +293,7 @@ def FMBlock(
     factors_dim : Optional[int], optional
         If fm_input_block is not provided, the factors_dim is used to define the
         embeddings dim to instantiate InputBlockV2, by default None
+
     Returns
     -------
     tf.Tensor
diff --git a/merlin/models/tf/core/encoder.py b/merlin/models/tf/core/encoder.py
index ddc936abbb..0dd47b5187 100644
--- a/merlin/models/tf/core/encoder.py
+++ b/merlin/models/tf/core/encoder.py
@@ -629,6 +629,7 @@ def fit(self, *args, **kwargs):
 class EmbeddingEncoder(Encoder):
     """Creates an Encoder from an EmbeddingTable.
     Typically used with RetrievalModelV2.
+
     Parameters
     ----------
     schema : Union[ColumnSchema, Schema]
diff --git a/merlin/models/tf/inputs/embedding.py b/merlin/models/tf/inputs/embedding.py
index 32af3bc828..62192e0f45 100644
--- a/merlin/models/tf/inputs/embedding.py
+++ b/merlin/models/tf/inputs/embedding.py
@@ -644,6 +644,7 @@ def Embeddings(
         Factor for L2 regularization of the embeddings vectors (from the current batch only)
         If a dictionary is provided, the keys are feature names and the values are
         regularization factors
+
     Returns
     -------
     ParallelBlock
@@ -1445,6 +1446,7 @@ def ContinuousEmbedding(
 ) -> SequentialBlock:
     """Concatenates all numerical features and project then using the
         specified Block.
+
     Parameters
     ----------
     inputs : Block
@@ -1475,10 +1477,12 @@ def ContinuousEmbedding(
 
 def serialize_table_config(table_config: TableConfig) -> Dict[str, Any]:
     """Serializes a TableConfig instance
+
     Parameters
     ----------
     table_config : TableConfig
         Configuration data for one embedding table
+
     Returns
     -------
     Dict[str, Any]
@@ -1500,6 +1504,7 @@ def deserialize_table_config(table_params: Dict[str, Any]) -> TableConfig:
     ----------
     table_params : Dict[str, Any]
         Dict with the serialized values
+
     Returns
     -------
     TableConfig
@@ -1516,10 +1521,12 @@ def deserialize_table_config(table_params: Dict[str, Any]) -> TableConfig:
 
 def serialize_feature_config(feature_config: FeatureConfig) -> Dict[str, Any]:
     """Serializes a FeatureConfig instance
+
     Parameters
     ----------
     feature_config : FeatureConfig
         Configuration data for one embedding feature
+
     Returns
     -------
     Dict[str, Any]
diff --git a/merlin/models/tf/loader.py b/merlin/models/tf/loader.py
index 2a3095086e..3bc57c4548 100644
--- a/merlin/models/tf/loader.py
+++ b/merlin/models/tf/loader.py
@@ -388,7 +388,8 @@ def sample_batch(
         If enabled, it converts multi-hot/list features to dense or ragged based on the schema.
         It also ensures that scalar features are converted to 2D (batch size, 1).
         P.s. The features are automatically prepared by InputBlockV2 if it is used
-    Returns:
+
+    Returns
     -------
     batch: Dict[tf.tensor]
         dictionary of input tensors.
diff --git a/merlin/models/tf/metrics/topk.py b/merlin/models/tf/metrics/topk.py
index ff470aece1..c76977c772 100644
--- a/merlin/models/tf/metrics/topk.py
+++ b/merlin/models/tf/metrics/topk.py
@@ -74,6 +74,7 @@ def precision_at(
 ) -> tf.Tensor:
     """
     Computes Precision@K metric
+
     Parameters
     ----------
     {METRIC_PARAMETERS_DOCSTRING}
@@ -90,6 +91,7 @@ def average_precision_at(
 ) -> tf.Tensor:
     """
     Computes Mean Average Precision (MAP) @K
+
     Parameters
     ----------
     {METRIC_PARAMETERS_DOCSTRING}
diff --git a/merlin/models/tf/models/base.py b/merlin/models/tf/models/base.py
index 2b3a1c29f3..2d5d975676 100644
--- a/merlin/models/tf/models/base.py
+++ b/merlin/models/tf/models/base.py
@@ -182,6 +182,7 @@ def get_output_schema(export_path: str) -> Schema:
 @tf.keras.utils.register_keras_serializable(package="merlin_models")
 class ModelBlock(Block, tf.keras.Model):
     """Block that extends `tf.keras.Model` to make it saveable.
+
     Parameters
     ----------
     block : Block
@@ -2623,6 +2624,7 @@ def _maybe_convert_merlin_dataset(
 ) -> Any:
     """Converts the Dataset to a Loader with the given
     batch_size and shuffle options
+
     Parameters
     ----------
     data
@@ -2631,6 +2633,7 @@ def _maybe_convert_merlin_dataset(
         Batch size
     shuffle : bool, optional
         Enables data shuffling during loading, by default True
+
     Returns
     -------
     Any
diff --git a/merlin/models/tf/models/ranking.py b/merlin/models/tf/models/ranking.py
index 021672735a..a0b46ba2da 100644
--- a/merlin/models/tf/models/ranking.py
+++ b/merlin/models/tf/models/ranking.py
@@ -238,6 +238,7 @@ def DeepFMModel(
         The prediction tasks to be used, by default this will be inferred from the Schema.
         For custom prediction tasks we recommending using OutputBlock and blocks based
         on ModelOutput than the ones based in PredictionTask (that will be deprecated).
+
     Returns
     -------
     Model
diff --git a/merlin/models/tf/outputs/base.py b/merlin/models/tf/outputs/base.py
index 6957fdeda2..1b16f38d1a 100644
--- a/merlin/models/tf/outputs/base.py
+++ b/merlin/models/tf/outputs/base.py
@@ -290,6 +290,7 @@ def from_config(cls, config):
 @tf.keras.utils.register_keras_serializable(package="merlin_models")
 class DotProduct(Layer):
     """Dot-product between queries & items.
+
     Parameters:
     -----------
     query_name : str, optional
diff --git a/merlin/models/tf/outputs/block.py b/merlin/models/tf/outputs/block.py
index a328df2450..3114846487 100644
--- a/merlin/models/tf/outputs/block.py
+++ b/merlin/models/tf/outputs/block.py
@@ -132,12 +132,14 @@ def OutputBlock(
 
 def _get_col_set_by_tags(schema: Schema, tags) -> Set[str]:
     """Returns a set with the schema column names
+
     Parameters
     ----------
     schema : Schema
         Schema
     tags :
         Tags to filter
+
     Returns
     -------
     Set[str]
@@ -152,6 +154,7 @@ def _set_task_block(
     task_blocks: Optional[Union[Layer, Dict[str, Layer]]] = None,
 ):
     """Creates a tower (task_block) for each task (output).
+
     Parameters
     ----------
     output_block : OutputBlock
diff --git a/merlin/models/tf/outputs/classification.py b/merlin/models/tf/outputs/classification.py
index f06275979e..633926220d 100644
--- a/merlin/models/tf/outputs/classification.py
+++ b/merlin/models/tf/outputs/classification.py
@@ -36,6 +36,7 @@
 
 def default_binary_metrics() -> List[tf.keras.metrics.Metric]:
     """Returns the default binary metrics
+
     Returns
     -------
     List[tf.keras.metrics.Metric]
@@ -52,6 +53,7 @@ def default_binary_metrics() -> List[tf.keras.metrics.Metric]:
 def default_categorical_prediction_metrics(k=10):
     """Returns the default top-k metrics for
     categorical classification
+
     Returns
     -------
     List[tf.keras.metrics.Metric]
diff --git a/merlin/models/tf/outputs/sampling/in_batch.py b/merlin/models/tf/outputs/sampling/in_batch.py
index b6a40d8284..2f99434c2d 100644
--- a/merlin/models/tf/outputs/sampling/in_batch.py
+++ b/merlin/models/tf/outputs/sampling/in_batch.py
@@ -39,6 +39,7 @@ class InBatchSamplerV2(CandidateSampler):
     ----------
     .. [1] Yi, Xinyang, et al. "Sampling-bias-corrected neural modeling for large corpus item
        recommendations." Proceedings of the 13th ACM Conference on Recommender Systems. 2019.
+
     Parameters
     ----------
     batch_size : int, optional
diff --git a/merlin/models/tf/outputs/topk.py b/merlin/models/tf/outputs/topk.py
index 9399130d3f..80d7025dd3 100644
--- a/merlin/models/tf/outputs/topk.py
+++ b/merlin/models/tf/outputs/topk.py
@@ -236,6 +236,7 @@ def compute_output_shape(self, input_shape):
 @tf.keras.utils.register_keras_serializable(package="merlin.models")
 class TopKOutput(ModelOutput):
     """Prediction block for top-k evaluation
+
     Parameters
     ----------
     to_call:  Union[str, TopKLayer]
diff --git a/merlin/models/tf/prediction_tasks/next_item.py b/merlin/models/tf/prediction_tasks/next_item.py
index 4286877881..73d6e22539 100644
--- a/merlin/models/tf/prediction_tasks/next_item.py
+++ b/merlin/models/tf/prediction_tasks/next_item.py
@@ -41,6 +41,7 @@
 class ItemsPredictionWeightTying(Block):
     """Tying the item embedding weights with the output projection layer matrix [1]
     The output logits are obtained by multiplying the output vector by the item-ids embeddings.
+
     Parameters
     ----------
         schema : Schema
@@ -101,6 +102,7 @@ def ItemsPredictionPopSampled(
     That implementation does not require the actual item frequencies/probabilities
     if the item ids are sorted by frequency. The PopularityBasedSampler
     approximates the item probabilities using the log_uniform (zipfian) distribution.
+
     Parameters:
     -----------
         schema: Schema
@@ -114,10 +116,12 @@ def ItemsPredictionPopSampled(
         ignore_false_negatives: bool
             Ignore sampled items that are equal to the target classes
             Defaults to True
-    Returns:
+
+    Returns
     -------
         A SequenceBlock that performs popularity-based sampling of negatives, scores
         the items and applies the logQ correction for sampled softmax
+
     References
     ----------
     .. [1] Yoshua Bengio and Jean-Sébastien Sénécal. 2003. Quick Training of Probabilistic
@@ -169,6 +173,7 @@ def NextItemPredictionTask(
 ) -> MultiClassClassificationTask:
     """
     Function to create the NextItemPrediction task with the right parameters.
+
     Parameters
     ----------
         schema: Schema
@@ -209,6 +214,7 @@ def NextItemPredictionTask(
             Optional extra pre-call block for post-processing the logits, by default None.
             You can for example use `post_logits = mm.PopularitySamplingBlock(item_fequency)`
             for populariy sampling correction.
+
     Returns
     -------
         PredictionTask
diff --git a/merlin/models/tf/prediction_tasks/retrieval.py b/merlin/models/tf/prediction_tasks/retrieval.py
index aa8ee8fd6c..f1ae7c8e5d 100644
--- a/merlin/models/tf/prediction_tasks/retrieval.py
+++ b/merlin/models/tf/prediction_tasks/retrieval.py
@@ -59,6 +59,7 @@ class ItemRetrievalTask(MultiClassClassificationTask):
             Add query embeddings to the context block, by default False
         store_negative_ids: bool
             Returns negative items ids as part of the output, by default False
+
     Returns
     -------
         PredictionTask
diff --git a/merlin/models/tf/transforms/sequence.py b/merlin/models/tf/transforms/sequence.py
index 9dd1ee6f85..312019cd92 100644
--- a/merlin/models/tf/transforms/sequence.py
+++ b/merlin/models/tf/transforms/sequence.py
@@ -40,6 +40,7 @@ class RemovePad3D(Block):
         padding_idx: int
             The padding index value.
             Defaults to 0.
+
     Returns
     -------
         targets: tf.Tensor
diff --git a/merlin/models/tf/utils/testing_utils.py b/merlin/models/tf/utils/testing_utils.py
index 80927af5c0..6b586dcacb 100644
--- a/merlin/models/tf/utils/testing_utils.py
+++ b/merlin/models/tf/utils/testing_utils.py
@@ -213,6 +213,7 @@ def layer_test(
         called in.
       supports_masking: Optional boolean to check the `supports_masking`
         property of the layer. If None, the check will not be performed.
+
     Returns:
       The output data (Numpy array) returned by the layer, for additional
       checks to be done by the calling code.
diff --git a/merlin/models/tf/utils/tf_utils.py b/merlin/models/tf/utils/tf_utils.py
index b7fc8b10e6..6a458a702e 100644
--- a/merlin/models/tf/utils/tf_utils.py
+++ b/merlin/models/tf/utils/tf_utils.py
@@ -180,6 +180,7 @@ def extract_topk(
         by default 1e-6
     seed : int, optional
         Random seed to use for tie breaking
+
     Returns
     -------
     Tuple(tf.Tensor,tf.Tensor,tf.Tensor)
@@ -360,6 +361,7 @@ def get_candidate_probs(
     is_prob_distribution: bool, optional
         If True, the item_freq_probs should be a probability distribution of the items.
         If False, the item frequencies is converted to probabilities
+
     Returns
     -------
         A tensor with the item probability distributon
@@ -483,6 +485,7 @@ def check_inputs_mask_compatible_shape(
     inputs: Union[tf.Tensor, tf.RaggedTensor], mask: Union[tf.Tensor, tf.RaggedTensor]
 ):
     """Check if the shape and the type of the input and mask tensors are compatible.
+
     Parameters
     ----------
     inputs : Union[tf.Tensor, tf.RaggedTensor]
diff --git a/merlin/models/torch/batch.py b/merlin/models/torch/batch.py
index dd5f8eca9e..df1390e65d 100644
--- a/merlin/models/torch/batch.py
+++ b/merlin/models/torch/batch.py
@@ -312,7 +312,7 @@ def sample_batch(
     shuffle: bool
         Whether to sample a random batch or not, by default False.
 
-    Returns:
+    Returns
     -------
     features: Dict[torch.Tensor]
         dictionary of feature tensors.
@@ -352,7 +352,7 @@ def sample_features(
     shuffle: bool
         Whether to sample a random batch or not, by default False.
 
-    Returns:
+    Returns
     -------
     features: Dict[torch.Tensor]
         dictionary of feature tensors.