diff --git a/merlin/models/tf/blocks/retrieval/base.py b/merlin/models/tf/blocks/retrieval/base.py
index e40ac648e2..f76ffb1d8a 100644
--- a/merlin/models/tf/blocks/retrieval/base.py
+++ b/merlin/models/tf/blocks/retrieval/base.py
@@ -135,14 +135,15 @@ class ItemRetrievalScorer(Block):
     """Block for ItemRetrieval, which expects query/user and item embeddings as input and
     uses dot product to score the positive item (inputs["item"]) and also sampled negative
     items (during training).
+
     Parameters
     ----------
-    samplers : List[ItemSampler], optional
+    samplers: List[ItemSampler], optional
         List of item samplers that provide negative samples when `training=True`
-    sampling_downscore_false_negatives : bool, optional
+    sampling_downscore_false_negatives: bool, optional
         Identify false negatives (sampled item ids equal to the positive item and downscore them
         to the `sampling_downscore_false_negatives_value`), by default True
-    sampling_downscore_false_negatives_value : int, optional
+    sampling_downscore_false_negatives_value: int, optional
         Value to be used to downscore false negatives when
         `sampling_downscore_false_negatives=True`, by default `np.finfo(np.float32).min / 100.0`
     item_id_feature_name: str
@@ -174,6 +175,7 @@ def __init__(
         store_negative_ids: bool = False,
         **kwargs,
     ):
+        """Initializes the `ItemRetrievalScorer` class."""
         super().__init__(**kwargs)
 
         self.downscore_false_negatives = sampling_downscore_false_negatives
@@ -193,6 +195,13 @@ def __init__(
         self.set_required_features()
 
     def build(self, input_shapes):
+        """Builds the block.
+
+        Parameters
+        ----------
+        input_shapes: tuple or dict
+            Shape of the input tensor.
+        """
         if isinstance(input_shapes, dict):
             query_shape = input_shapes[self.query_name]
             self.context.add_weight(
@@ -206,6 +215,13 @@ def build(self, input_shapes):
         super().build(input_shapes)
 
     def _check_input_from_two_tower(self, inputs):
+        """Checks if the inputs from the two towers (query and item) are correctly provided.
+
+        Parameters
+        ----------
+        inputs: dict
+            Dictionary of inputs.
+        """
         if set(inputs.keys()) != set([self.query_name, self.item_name]):
             raise ValueError(
                 f"Wrong input-names, expected: {[self.query_name, self.item_name]} "
@@ -223,6 +239,7 @@ def call(
             the positive item (inputs["item"]).
             For the sampled-softmax mode, logits are computed by multiplying the query vector
             and the item embeddings matrix (self.context.get_embedding(self.item_domain))
+
         Parameters
         ----------
         inputs : Union[tf.Tensor, TabularData]
@@ -230,6 +247,7 @@ def call(
             where embeddings are 2D tensors (batch size, embedding size)
         training : bool, optional
             Flag that indicates whether in training mode, by default True
+
         Returns
         -------
         tf.Tensor
@@ -273,6 +291,7 @@ def call_outputs(
     ) -> "PredictionOutput":
         """Based on the user/query embedding (inputs[self.query_name]), uses dot product to score
             the positive item and also sampled negative items (during training).
+
         Parameters
         ----------
         inputs : TabularData
@@ -280,6 +299,7 @@ def call_outputs(
             where embeddings are 2D tensors (batch size, embedding size)
         training : bool, optional
             Flag that indicates whether in training mode, by default True
+
         Returns
         -------
         [tf.Tensor,tf.Tensor]
@@ -431,6 +451,7 @@ def _prepare_query_item_vectors_for_sampled_softmax(
         return predictions
 
     def set_required_features(self):
+        """Sets the required features for the samplers."""
         required_features = set()
         if self.downscore_false_negatives:
             required_features.add(self.item_id_feature_name)
@@ -442,6 +463,13 @@ def set_required_features(self):
         self._required_features = list(required_features)
 
     def get_config(self):
+        """Returns the configuration of the model as a dictionary.
+
+        Returns
+        -------
+        dict
+            The configuration of the model.
+        """
         config = super().get_config()
         config = maybe_serialize_keras_objects(self, config, ["samplers"])
         config["sampling_downscore_false_negatives"] = self.downscore_false_negatives
@@ -458,6 +486,17 @@ def get_config(self):
 
     @classmethod
     def from_config(cls, config):
+        """Creates a new instance of the class from its config.
+
+        Parameters
+        ----------
+        config: dict
+            A dictionary, typically the output of get_config.
+
+        Returns
+        -------
+        A new instance of the `ItemRetrievalScorer` class.
+        """
         config = maybe_deserialize_keras_objects(config, ["samplers"])
 
         return super().from_config(config)
diff --git a/merlin/models/tf/core/encoder.py b/merlin/models/tf/core/encoder.py
index 19ebaae02f..865a8ed845 100644
--- a/merlin/models/tf/core/encoder.py
+++ b/merlin/models/tf/core/encoder.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -88,6 +88,22 @@ def encode(
         batch_size: int,
         **kwargs,
     ) -> merlin.io.Dataset:
+        """Encodes the given dataset and index.
+
+        Parameters
+        ----------
+        dataset: merlin.io.Dataset
+            The dataset to encode.
+        index: Union[str, ColumnSchema, Schema, Tags]
+            The index to use for encoding.
+        batch_size: int
+            The batch size for encoding.
+
+        Returns
+        -------
+        merlin.io.Dataset
+            The encoded dataset.
+        """
         if isinstance(index, Schema):
             output_schema = index
         elif isinstance(index, ColumnSchema):
@@ -117,12 +133,14 @@ def batch_predict(
         **kwargs,
     ) -> merlin.io.Dataset:
         """Batched prediction using Dask.
+
         Parameters
         ----------
         dataset: merlin.io.Dataset
             Dataset to predict on.
         batch_size: int
             Batch size to use for prediction.
+
         Returns
         -------
         merlin.io.Dataset
@@ -166,6 +184,23 @@ def batch_predict(
         return merlin.io.Dataset(predictions)
 
     def call(self, inputs, *, targets=None, training=False, testing=False, **kwargs):
+        """Calls the model on new inputs and returns the outputs as tensors.
+
+        Parameters
+        ----------
+        inputs : tensor-like or dict/tuple of tensors.
+            Tensors or dict/tuple of tensors representing the input batch.
+        targets : tensor-like, optional
+            Tensors representing the target data.
+        training : bool, optional
+            Whether the model is in training mode.
+        testing : bool, optional
+            Whether the model is in testing mode.
+
+        Returns
+        -------
+        A tensor or dict of tensors corresponding to the result of calling the layer.
+        """
         inputs = self._prepare_features(inputs, targets=targets)
         if isinstance(inputs, tuple):
             inputs, targets = inputs
@@ -180,6 +215,17 @@ def call(self, inputs, *, targets=None, training=False, testing=False, **kwargs)
         )
 
     def __call__(self, inputs, **kwargs):
+        """Overrides the default __call__ method to remove "features" from inputs.
+
+        Parameters
+        ----------
+        inputs : tensor-like or dict/tuple of tensors.
+            Tensors or dict/tuple of tensors representing the input batch.
+
+        Returns
+        -------
+        A tensor or dict of tensors corresponding to the result of calling the layer.
+        """
         # We remove features here since we don't expect them at inference time
         # Inside the `call` method, we will add them back by assuming inputs=features
         if "features" in kwargs:
@@ -188,6 +234,13 @@ def __call__(self, inputs, **kwargs):
         return super().__call__(inputs, **kwargs)
 
     def build(self, input_shape):
+        """Creates the variables of the layer.
+
+        Parameters
+        ----------
+        input_shape: Tuple[int]
+            The shape of the input data.
+        """
         self._prepare_features.build(input_shape)
         input_shape = self._prepare_features.compute_output_shape(input_shape)
 
@@ -196,18 +249,40 @@ def build(self, input_shape):
             self._build_input_shape = input_shape
 
     def compute_output_shape(self, input_shape):
+        """Computes the output shape of the layer.
+
+        Parameters
+        ----------
+        input_shape: Tuple[int]
+            The shape of the input data.
+
+        Returns
+        -------
+        Tuple[int]
+            The output shape of the layer.
+        """
         input_shape = self._prepare_features.compute_output_shape(input_shape)
         return combinators.compute_output_shape_sequentially(list(self.to_call), input_shape)
 
     def train_step(self, data):
-        """Train step"""
+        """Performs a training step.
+
+        Train step method is not implemented and Raises an error as the
+        Encoder block is not meant to be trained by itself and can only be
+        trained as part of a model.
+        """
         raise NotImplementedError(
             "This block is not meant to be trained by itself. ",
             "It can only be trained as part of a model.",
         )
 
     def fit(self, *args, **kwargs):
-        """Fit model"""
+        """Fits the model.
+
+        Fit method is not implemented and Raises an error as the Encoder block
+        is not meant to be trained by itself and can only be trained as part
+        of a model.
+        """
         raise NotImplementedError(
             "This block is not meant to be trained by itself. ",
             "It can only be trained as part of a model.",
@@ -245,6 +320,7 @@ def save(
 
     @property
     def to_call(self):
+        """Provides the list of blocks to be called during the execution of the model."""
         if self.pre:
             yield self.pre
 
@@ -256,22 +332,40 @@ def to_call(self):
 
     @property
     def has_schema(self) -> bool:
+        """Returns True as this class does contain a schema."""
         return True
 
     @property
     def schema(self) -> Schema:
+        """Returns the schema of the model."""
         return self._schema
 
     @property
     def first(self):
+        """Returns the first block of the model."""
         return self.blocks[0]
 
     @property
     def last(self):
+        """Returns the last block of the model."""
         return self.blocks[-1]
 
     @classmethod
     def from_config(cls, config, custom_objects=None):
+        """Creates a new instance of the class by deserializing.
+
+        Parameters
+        ----------
+        config: dict
+            A dictionary, typically the output of get_config.
+        custom_objects: dict, optional
+            A dictionary mapping the names of layers to the corresponding
+            functions and classes.
+
+        Returns
+        -------
+        A new instance of Encoder.
+        """
         pre = config.pop("pre", None)
         post = config.pop("post", None)
         layers = [
@@ -291,6 +385,13 @@ def from_config(cls, config, custom_objects=None):
         return output
 
     def get_config(self):
+        """Returns the configuration of the model as a dictionary.
+
+        Returns
+        -------
+        dict
+            The configuration of the model.
+        """
         config = tf_utils.maybe_serialize_keras_objects(self, {}, ["pre", "post"])
         for i, layer in enumerate(self.blocks):
             config[i] = tf.keras.utils.serialize_keras_object(layer)
diff --git a/merlin/models/tf/inputs/base.py b/merlin/models/tf/inputs/base.py
index 7c5c7d6a78..b3924c6741 100644
--- a/merlin/models/tf/inputs/base.py
+++ b/merlin/models/tf/inputs/base.py
@@ -44,7 +44,6 @@ def InputBlock(
     post: Optional[BlockType] = None,
     aggregation: Optional[TabularAggregationType] = None,
     seq: bool = False,
-    max_seq_length: Optional[int] = None,
     add_continuous_branch: bool = True,
     continuous_tags: Optional[Union[TagsType, Tuple[Tags]]] = (Tags.CONTINUOUS,),
     continuous_projection: Optional[Block] = None,
@@ -75,6 +74,9 @@ def InputBlock(
         Next to this, it's also possible to construct it manually.
     branches: Dict[str, Block], optional
         Dictionary of branches to use inside the InputBlock.
+    pre: Optional[BlockType]
+        Transformations to apply on the inputs before the module is
+        called (before 'forward'). Default is None.
     post: Optional[BlockType]
         Transformations to apply on the inputs after the module is
         called (so **after** `forward`).
diff --git a/merlin/models/tf/inputs/continuous.py b/merlin/models/tf/inputs/continuous.py
index 328e84e56e..1360f67f3a 100644
--- a/merlin/models/tf/inputs/continuous.py
+++ b/merlin/models/tf/inputs/continuous.py
@@ -98,9 +98,39 @@ def __init__(
 
     @classmethod
     def from_features(cls, features, **kwargs):
+        """Class method for creating an instance of ContinuousFeatures.
+
+        Parameters
+        ----------
+        features: list
+            List of continuous features to include in this module.
+        kwargs : dict
+            Additional keyword arguments.
+
+        Returns
+        -------
+        ContinuousFeatures
+            An instance of ContinuousFeatures class.
+        """
         return cls(features, **kwargs)
 
     def call(self, inputs, *args, **kwargs):
+        """Processes the specified continuous features from the inputs.
+
+        Parameters
+        ----------
+        inputs: dict
+            The input tensors, as a dictionary of string feature name to tensor.
+        args: tuple
+            Additional arguments, not used.
+        kwargs: dict
+            Additional keyword arguments, not used.
+
+        Returns
+        -------
+        dict
+            A dictionary of the processed continuous features.
+        """
         cont_features = self.filter_features(inputs)
         cont_features = {
             k: tf.expand_dims(v, -1) if len(v.shape) == 1 else v for k, v in cont_features.items()
@@ -108,6 +138,18 @@ def call(self, inputs, *args, **kwargs):
         return cont_features
 
     def compute_call_output_shape(self, input_shapes):
+        """Calculates the output shapes of the processed continuous features.
+
+        Parameters
+        ----------
+        input_shapes: dict
+            The shapes of the input tensors.
+
+        Returns
+        -------
+        dict
+            A dictionary of the output shapes of the processed continuous features.
+        """
         cont_features_sizes = self.filter_features.compute_output_shape(input_shapes)
         cont_features_sizes = {
             k: tf.TensorShape(list(v) + [1]) if len(v) == 1 else v
@@ -116,6 +158,13 @@ def compute_call_output_shape(self, input_shapes):
         return cont_features_sizes
 
     def get_config(self):
+        """Returns a dictionary containing the configuration of the ContinuousFeatures block.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the configuration of the ContinuousFeatures block.
+        """
         config = super().get_config()
 
         config["features"] = self.filter_features.feature_names
@@ -123,10 +172,33 @@ def get_config(self):
         return config
 
     def _get_name(self):
+        """Returns the name of the module.
+
+        Returns
+        -------
+        str
+            The name of the module.
+        """
         return "ContinuousFeatures"
 
     def repr_ignore(self) -> List[str]:
+        """Returns a list of module properties to ignore when creating a string representation
+        of the module.
+
+        Returns
+        -------
+        List[str]
+            A list of module properties to ignore in the string representation.
+        """
         return ["filter_features"]
 
     def repr_extra(self):
+        """Returns a string of additional details to display in the string representation
+        of the module.
+
+        Returns
+        -------
+        str
+            Additional details for the string representation of the module.
+        """
         return ", ".join(sorted(self.filter_features.feature_names))
diff --git a/merlin/models/tf/inputs/embedding.py b/merlin/models/tf/inputs/embedding.py
index 3d461f9569..8bcd49d857 100644
--- a/merlin/models/tf/inputs/embedding.py
+++ b/merlin/models/tf/inputs/embedding.py
@@ -156,50 +156,46 @@ class EmbeddingTable(EmbeddingTableBase):
     and tf.SparseTensor which might be 2D (batch_size, 1) for scalars
     or 3d (batch_size, seq_length, 1) for sequential features
 
-     Parameters
-     ----------
-     dim: Dimension of the dense embedding.
-     col_schema: ColumnSchema
-         Schema of the column. This is used to infer the cardinality.
-     embeddings_initializer: Initializer for the `embeddings`
-       matrix (see `keras.initializers`).
-     embeddings_regularizer: Regularizer function applied to
-       the `embeddings` matrix (see `keras.regularizers`).
-     embeddings_constraint: Constraint function applied to
-       the `embeddings` matrix (see `keras.constraints`).
-     mask_zero: Boolean, whether or not the input value 0 is a special "padding"
-       value that should be masked out.
-       This is useful when using recurrent layers
-       which may take variable length input.
-       If this is `True`, then all subsequent layers
-       in the model need to support masking or an exception will be raised.
-       If mask_zero is set to True, as a consequence, index 0 cannot be
-       used in the vocabulary (input_dim should equal size of
-       vocabulary + 1).
-     input_length: Length of input sequences, when it is constant.
-       This argument is required if you are going to connect
-       `Flatten` then `Dense` layers upstream
-       (without it, the shape of the dense outputs cannot be computed).
-    combiner: A string specifying how to combine embedding results for each
-       entry ("mean", "sqrtn" and "sum" are supported) or a layer.
-       Default is None (no combiner used)
-    trainable: Boolean, whether the layer's variables should be trainable.
-    name: String name of the layer.
-    dtype: The dtype of the layer's computations and weights. Can also be a
-       `tf.keras.mixed_precision.Policy`, which allows the computation and weight
-       dtype to differ. Default of `None` means to use
-       `tf.keras.mixed_precision.global_policy()`, which is a float32 policy
-       unless set to different value.
-    dynamic: Set this to `True` if your layer should only be run eagerly, and
-       should not be used to generate a static computation graph.
-       This would be the case for a Tree-RNN or a recursive network,
-       for example, or generally for any layer that manipulates tensors
-       using Python control flow. If `False`, we assume that the layer can
-       safely be used to generate a static computation graph.
-    l2_batch_regularization_factor: float, optional
-        Factor for L2 regularization of the embeddings vectors (from the current batch only)
-        by default 0.0
-    **kwargs: Forwarded Keras Layer parameters
+    Parameters
+    ----------
+    dim : int
+        The dimension of the dense embedding.
+    col_schemas : ColumnSchema
+        The schema of the column(s) used to infer the cardinality.
+    embeddings_initializer : str, optional
+        The initializer for the `embeddings` matrix (see `keras.initializers`),
+        by default "uniform".
+    embeddings_regularizer : str, optional
+        The regularizer function applied to the `embeddings` matrix (see `keras.regularizers`),
+        by default None.
+    embeddings_constraint : str, optional
+        The constraint function applied to the `embeddings` matrix (see `keras.constraints`),
+        by default None.
+    mask_zero : bool, optional
+        Whether or not the input value 0 is a special "padding" value that should be masked out.
+        This is useful when using recurrent layers which may take variable length input,
+        by default False.
+    input_length : int, optional
+        The length of input sequences when it is constant, by default None.
+    sequence_combiner : CombinerType, optional
+        A string specifying how to combine embedding results for each entry ("mean", "sqrtn"
+        and "sum" are supported) or a layer. Default is None (no combiner used).
+    trainable : bool, optional
+        Whether the layer's variables should be trainable, by default True.
+    name : str, optional
+        The name of the layer, by default None.
+    dtype : str, optional
+        The data type of the layer's computations and weights. It can also be a
+        `tf.keras.mixed_precision.Policy`, which allows the computation and weight
+        dtype to differ, by default None.
+    dynamic : bool, optional
+        Set this to `True` if the layer should only be run eagerly and should not be used
+        to generate a static computation graph, by default False.
+    l2_batch_regularization_factor : float, optional
+        The factor for L2 regularization of the embeddings vectors (from the current batch only),
+        by default 0.0.
+    **kwargs:
+        Other keyword arguments forwarded to the Keras Layer.
     """
 
     def __init__(
@@ -353,9 +349,33 @@ def from_dataset(
         )
 
     def to_dataset(self, gpu=None) -> merlin.io.Dataset:
+        """Converts the EmbeddingTable to a merlin.io.Dataset.
+
+        Parameters
+        ----------
+        gpu: bool
+            Whether to use gpu.
+
+        Returns
+        -------
+        merlin.io.Dataset
+            The dataset representation of the EmbeddingTable.
+        """
         return merlin.io.Dataset(self.to_df(gpu=gpu))
 
     def to_df(self, gpu=None):
+        """Converts the EmbeddingTable to a DataFrame.
+
+        Parameters
+        ----------
+        gpu: bool
+            Whether to use gpu.
+
+        Returns
+        -------
+        cudf or pandas DataFrame
+            The DataFrame representation of the EmbeddingTable.
+        """
         return tensor_to_df(self.table.embeddings, gpu=gpu)
 
     def _maybe_build(self, inputs):
@@ -367,6 +387,13 @@ def _maybe_build(self, inputs):
         return super(EmbeddingTable, self)._maybe_build(inputs)
 
     def build(self, input_shapes):
+        """Builds the EmbeddingTable based on the input shapes.
+
+        Parameters
+        ----------
+        input_shapes: tf.TensorShape or dictionary of shapes.
+            The shapes of the input tensors.
+        """
         if not self.table.built:
             self.table.build(input_shapes)
         return super(EmbeddingTable, self).build(input_shapes)
@@ -395,6 +422,13 @@ def call(
         return out
 
     def _call_table(self, inputs, **kwargs):
+        """Performs the lookup operation for the inputs in the embedding table.
+
+        Parameters
+        ----------
+        inputs : tf.Tensor, tf.RaggedTensor, or tf.SparseTensor
+            The input tensors for the lookup operation.
+        """
         if isinstance(inputs, (tf.RaggedTensor, tf.SparseTensor)):
             if self.sequence_combiner and isinstance(self.sequence_combiner, str):
                 if isinstance(inputs, tf.RaggedTensor):
@@ -439,6 +473,18 @@ def _call_table(self, inputs, **kwargs):
     def compute_output_shape(
         self, input_shape: Union[tf.TensorShape, Dict[str, tf.TensorShape]]
     ) -> Union[tf.TensorShape, Dict[str, tf.TensorShape]]:
+        """Computes the shape of the output tensors.
+
+        Parameters
+        ----------
+        input_shape : Union[tf.TensorShape, Dict[str, tf.TensorShape]]
+            The shape of the input tensors.
+
+        Returns
+        -------
+        Union[tf.TensorShape, Dict[str, tf.TensorShape]]
+            The shape of the output tensors.
+        """
         if isinstance(input_shape, dict):
             output_shapes = {}
             for feature_name in self.schema.column_names:
@@ -454,6 +500,18 @@ def compute_output_shape(
     def _compute_output_shape_table(
         self, input_shape: Union[tf.TensorShape, tuple]
     ) -> tf.TensorShape:
+        """Helper method to compute the output shape of a single input tensor.
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            The shape of the input tensor.
+
+        Returns
+        -------
+        tf.TensorShape
+            The shape of the output tensor.
+        """
         first_dims = input_shape
 
         if input_shape.rank > 1:
@@ -468,10 +526,36 @@ def _compute_output_shape_table(
         return output_shapes
 
     def compute_call_output_shape(self, input_shapes):
+        """Computes the shape of the output of a call to this layer.
+
+        Parameters
+        ----------
+        input_shapes: tf.TensorShape or dictionary of shapes.
+            The shapes of the input tensors.
+
+        Returns
+        -------
+        Union[tf.TensorShape, Dict[str, tf.TensorShape]]
+            The shape of the output of a call to this layer.
+        """
         return self.compute_output_shape(input_shapes)
 
     @classmethod
     def from_config(cls, config, table=None):
+        """Creates an EmbeddingTable from its configuration.
+
+        Parameters
+        ----------
+        config : dict
+            Configuration dictionary.
+        table : tf.keras.layers.Embedding, optional
+            An optional embedding layer.
+
+        Returns
+        -------
+        EmbeddingTable
+            A newly created EmbeddingTable.
+        """
         if table:
             config["table"] = table
         else:
@@ -482,6 +566,13 @@ def from_config(cls, config, table=None):
         return super().from_config(config)
 
     def get_config(self):
+        """Returns the configuration of this EmbeddingTable.
+
+        Returns
+        -------
+        dict
+            Configuration dictionary.
+        """
         config = super().get_config()
         config["table"] = tf.keras.layers.serialize(self.table)
         if isinstance(self.sequence_combiner, tf.keras.layers.Layer):
@@ -732,6 +823,25 @@ def __init__(self, weight_feature_name: str, axis=1, **kwargs):
         self.weight_feature_name = weight_feature_name
 
     def call(self, inputs, features):
+        """Performs the weighted average calculation.
+
+        Parameters
+        ----------
+        inputs: tf.Tensor
+            Input tensor.
+        features: dict
+            Dictionary of features, must include the weight feature.
+
+        Returns
+        -------
+        Tensor
+            Output tensor after applying the weighted average calculation.
+
+        Raises
+        ------
+        ValueError
+            If the inputs is a tf.RaggedTensor, the weight feature should also be a tf.RaggedTensor.
+        """
         weight_feature = features[self.weight_feature_name]
         if isinstance(inputs, tf.RaggedTensor) and not isinstance(weight_feature, tf.RaggedTensor):
             raise ValueError(
@@ -751,6 +861,18 @@ def call(self, inputs, features):
         return output
 
     def compute_output_shape(self, input_shape):
+        """Computes the output shape.
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            Shape of the input.
+
+        Returns
+        -------
+        tf.TensorShape
+            Shape of the output, which is the same as the input shape in this case.
+        """
         return input_shape
 
     @staticmethod
@@ -791,6 +913,13 @@ def from_schema_convention(schema: Schema, weight_features_name_suffix: str = "_
         return seq_combiners
 
     def get_config(self):
+        """Returns the configuration of the layer.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the configuration of the layer.
+        """
         config = super().get_config()
         config["axis"] = self.axis
         config["weight_feature_name"] = self.weight_feature_name
@@ -1179,12 +1308,19 @@ def from_config(cls, config):
 class SequenceEmbeddingFeatures(EmbeddingFeatures):
     """Input block for embedding-lookups for categorical features. This module produces 3-D tensors,
     this is useful for sequential models like transformers.
+
     Parameters
     ----------
-    {embedding_features_parameters}
+    feature_config: Dict[str, FeatureConfig]
+        This specifies what TableConfig to use for each feature. For shared embeddings, the same
+        TableConfig can be used for multiple features.
+    mask_zero: bool
+       Whether or not the input value 0 is a special "padding" value that should be masked out.
     padding_idx: int
         The symbol to use for padding.
     {tabular_module_parameters}
+    add_default_pre: bool, default True
+        Whether or not to add a default preprocessing block.
     """
 
     def __init__(
@@ -1200,6 +1336,7 @@ def __init__(
         add_default_pre=True,
         **kwargs,
     ):
+        """Initializes the block."""
         if add_default_pre:
             embedding_pre = [Filter(list(feature_config.keys()))]
             pre = [embedding_pre, pre] if pre else embedding_pre  # type: ignore
@@ -1218,11 +1355,37 @@ def __init__(
         self.mask_zero = mask_zero
 
     def lookup_feature(self, name, val, **kwargs):
+        """Looks up the embedding for a specific feature from the pre-trained embedding tables.
+
+        Parameters
+        ----------
+        name: str
+            The name of the feature to lookup.
+        val: tf.Tensor
+            The tensor of feature values to look up in the embedding tables.
+
+        Returns
+        -------
+        tf.Tensor
+            The corresponding embedding tensor.
+        """
         return super(SequenceEmbeddingFeatures, self).lookup_feature(
             name, val, output_sequence=True
         )
 
     def compute_call_output_shape(self, input_shapes):
+        """Computes the output shapes given the input shapes.
+
+        Parameters
+        ----------
+        input_shapes: dict
+            Dictionary mapping input names to their shapes.
+
+        Returns
+        -------
+        dict
+            Dictionary mapping output names to their shapes.
+        """
         batch_size = self.calculate_batch_size_from_input_shapes(input_shapes)
         sequence_length = input_shapes[list(self.feature_config.keys())[0]][1]
 
@@ -1235,6 +1398,20 @@ def compute_call_output_shape(self, input_shapes):
         return output_shapes
 
     def compute_mask(self, inputs, mask=None):
+        """Computes a mask tensor from the inputs.
+
+        Parameters
+        ----------
+        inputs: dict
+            Dictionary mapping input names to their values.
+        mask: tf.Tensor, optional
+            An optional mask to apply to the inputs.
+
+        Returns
+        -------
+        dict or None
+            A mask tensor, or None if `mask_zero` is False.
+        """
         if not self.mask_zero:
             return None
         outputs = {}
@@ -1244,6 +1421,13 @@ def compute_mask(self, inputs, mask=None):
         return outputs
 
     def get_config(self):
+        """Gets the configuration dictionary for this block.
+
+        Returns
+        -------
+        dict
+            The configuration dictionary.
+        """
         config = super().get_config()
         config["mask_zero"] = self.mask_zero
         config["padding_idx"] = self.padding_idx
diff --git a/merlin/models/tf/models/base.py b/merlin/models/tf/models/base.py
index 5f1499757b..1fdf2ec29f 100644
--- a/merlin/models/tf/models/base.py
+++ b/merlin/models/tf/models/base.py
@@ -1558,8 +1558,13 @@ def save(self, *args, **kwargs):
 class Model(BaseModel):
     """Merlin Model class
 
+    `Model` is the main base class that represents a model in Merlin Models.
+    It can be configured with a number of pre and post processing blocks and can manage a context.
+
     Parameters
     ----------
+    blocks : list
+        List of `Block` instances in the model
     context : Optional[ModelContext], optional
         ModelContext is used to store/retrieve public variables across blocks,
         by default None.
@@ -1585,6 +1590,7 @@ def __init__(
         prep_features: Optional[bool] = True,
         **kwargs,
     ):
+        """Creates a new `Model` instance."""
         super(Model, self).__init__(**kwargs)
 
         context = context or ModelContext()
@@ -1734,6 +1740,27 @@ def build(self, input_shape=None):
         self.built = True
 
     def call(self, inputs, targets=None, training=False, testing=False, output_context=False):
+        """
+        Method for forward pass of the model.
+
+        Parameters
+        ----------
+        inputs : Tensor or dict of Tensor
+            Input Tensor(s) for the model
+        targets : Tensor or dict of Tensor, optional
+            Target Tensor(s) for the model
+        training : bool, optional
+            Flag to indicate whether the model is in training phase
+        testing : bool, optional
+            Flag to indicate whether the model is in testing phase
+        output_context : bool, optional
+            Flag to indicate whether to return the context along with the output
+
+        Returns
+        -------
+        Tensor or tuple of Tensor and ModelContext
+            Output of the model, and optionally the context
+        """
         outputs = inputs
         features = self._prepare_features(inputs, targets=targets)
         if isinstance(features, tuple):
@@ -1794,10 +1821,32 @@ def _call_child(
 
     @property
     def first(self):
+        """
+        The first `Block` in the model.
+
+        This property provides a simple way to quickly access the first `Block` in the model's
+        sequence of blocks.
+
+        Returns
+        -------
+        Block
+            The first `Block` in the model.
+        """
         return self.blocks[0]
 
     @property
     def last(self):
+        """
+        The last `Block` in the model.
+
+        This property provides a simple way to quickly access the last `Block` in the model's
+        sequence of blocks.
+
+        Returns
+        -------
+        Block
+            The last `Block` in the model.
+        """
         return self.blocks[-1]
 
     @classmethod
@@ -1846,6 +1895,25 @@ def from_block(
 
     @classmethod
     def from_config(cls, config, custom_objects=None):
+        """
+        Creates a model from its config.
+
+        This method recreates a model instance from a configuration dictionary and
+        optional custom objects.
+
+        Parameters
+        ----------
+        config : dict
+            The configuration dictionary representing the model.
+        custom_objects : dict, optional
+            Dictionary mapping names to custom classes or functions to be considered
+            during deserialization.
+
+        Returns
+        -------
+        Model
+            The created `Model` instance.
+        """
         pre = config.pop("pre", None)
         post = config.pop("post", None)
         schema = config.pop("schema", None)
@@ -1882,6 +1950,22 @@ def from_config(cls, config, custom_objects=None):
         return model
 
     def get_sample_inputs(self, batch_size=None):
+        """
+        Generates sample inputs for the model.
+
+        This method creates a dictionary of sample inputs for each input feature, useful for
+        testing or initializing the model.
+
+        Parameters
+        ----------
+        batch_size : int, optional
+            The batch size for the sample inputs. If not specified, defaults to 2.
+
+        Returns
+        -------
+        dict
+            A dictionary mapping feature names to sample input tensors.
+        """
         batch_size = batch_size or 2
         if self.input_schema is not None:
             inputs = {}
@@ -1927,6 +2011,18 @@ def get_sample_inputs(self, batch_size=None):
             return inputs
 
     def get_config(self):
+        """
+        Returns the model configuration as a dictionary.
+
+        This method returns a dictionary containing the configuration of the model.
+        The dictionary includes the configuration of each block in the model,
+        as well as additional properties such as `pre` and `post` layers, and the `schema`.
+
+        Returns
+        -------
+        dict
+            The configuration of the model.
+        """
         config = maybe_serialize_keras_objects(self, {}, ["pre", "post"])
         config["schema"] = schema_utils.schema_to_tensorflow_metadata_json(self.schema)
         for i, layer in enumerate(self.blocks):
diff --git a/merlin/models/tf/models/retrieval.py b/merlin/models/tf/models/retrieval.py
index 751c57862c..1b05ed3972 100644
--- a/merlin/models/tf/models/retrieval.py
+++ b/merlin/models/tf/models/retrieval.py
@@ -70,9 +70,6 @@ def MatrixFactorizationModel(
     logits_temperature: float
         Parameter used to reduce model overconfidence, so that logits / T.
         Defaults to 1.
-    loss: Optional[LossType]
-        Loss function.
-        Defaults to `bpr`.
     samplers: List[ItemSampler]
         List of samplers for negative sampling, by default `[InBatchSampler()]`
 
@@ -543,6 +540,8 @@ def YoutubeDNNRetrievalModelV2(
         By default None
     post: Optional[tf.keras.layers.Layer], optional
         The optional layer to apply on top of the query encoder.
+    outputs : Union[ModelOutput, List[ModelOutput]], optional
+        Specifies the model's outputs. If not specified, the outputs will be inferred.
     logits_temperature: float, optional
         Parameter used to reduce model overconfidence, so that logits / T.
         Defaults to 1.
@@ -556,6 +555,11 @@ def YoutubeDNNRetrievalModelV2(
         encoded ids, which are usually reserved for <nulls>,
         out-of-vocabulary or padding.
         By default 0.
+
+    Returns
+    --------
+    RetrievalModelV2
+        The constructed Youtube-DNN based retrieval model
     """
     if not inputs:
         inputs = schema
diff --git a/merlin/models/tf/transforms/regularization.py b/merlin/models/tf/transforms/regularization.py
index e155b21d72..1c01a59693 100644
--- a/merlin/models/tf/transforms/regularization.py
+++ b/merlin/models/tf/transforms/regularization.py
@@ -31,6 +31,21 @@ def __init__(self, **kwargs):
         super(L2Norm, self).__init__(**kwargs)
 
     def call(self, inputs: Union[tf.Tensor, TabularData], axis: int = -1, **kwargs):
+        """
+        Invokes the L2 normalization on the input tensor or dictionary of tensors.
+
+        Parameters
+        ----------
+        inputs: Union[tf.Tensor, TabularData]
+            A Tensor or TabularData input to normalize.
+        axis: int, optional
+            The axis on which to normalize, by default -1.
+
+        Returns
+        -------
+        Union[tf.Tensor, TabularData]
+            The L2-normalized tensor or dictionary of tensors.
+        """
         if isinstance(inputs, dict):
             inputs = {key: self._l2_norm(inp, axis=axis) for key, inp in inputs.items()}
         else:
@@ -67,4 +82,17 @@ def _l2_norm(
         )
 
     def compute_output_shape(self, input_shape):
+        """
+        Compute the output shape of the tensor after normalization.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            A tuple indicating the shape of the input tensor.
+
+        Returns
+        -------
+        tuple
+            The shape of the tensor after L2 normalization.
+        """
         return input_shape
diff --git a/merlin/models/tf/transforms/sequence.py b/merlin/models/tf/transforms/sequence.py
index 51affcf5fd..9dd1ee6f85 100644
--- a/merlin/models/tf/transforms/sequence.py
+++ b/merlin/models/tf/transforms/sequence.py
@@ -973,10 +973,29 @@ class ReplaceMaskedEmbeddings(Block):
     """
 
     def __init__(self, **kwargs):
+        """Initializes the block."""
         super().__init__(**kwargs)
         self.supports_masking = True
 
     def build(self, input_shape):
+        """Builds the block's internal variables.
+
+        This method creates a trainable embedding to replace masked interactions in the input.
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            Shape of the input tensor.
+
+        Returns
+        -------
+        None
+
+        Raises
+        ------
+        ValueError
+            If the last dimension of the input shape is None.
+        """
         self.hidden_size = input_shape[-1]
         if self.hidden_size is None:
             raise ValueError("The last dim of inputs cannot be None")