NVIDIA-Merlin · edknv · Jun 12, 2023 · May 19, 2023 · May 22, 2023 · May 23, 2023
diff --git a/merlin/datasets/ecommerce/dressipi/dataset.py b/merlin/datasets/ecommerce/dressipi/dataset.py
@@ -70,6 +70,7 @@ def get_dressipi2022(input_path):
     ----------
     path: Union[str, Path]
         Directory to load the raw data from.
+
     Returns
     -------
     train: merlin.io.Dataset

diff --git a/merlin/models/tf/blocks/dlrm.py b/merlin/models/tf/blocks/dlrm.py
@@ -34,52 +34,55 @@ def DLRMBlock(
     *,
     embedding_dim: int = None,
     embedding_options: EmbeddingOptions = None,
+    embeddings: Optional[Block] = None,
     bottom_block: Optional[Block] = None,
     top_block: Optional[Block] = None,
-    embeddings: Optional[Block] = None,
 ) -> SequentialBlock:
     """Builds the DLRM architecture, as proposed in the following
-    `paper https://arxiv.org/pdf/1906.00091.pdf`_ [1]_.
-
-    References
-    ----------
-    .. [1] Naumov, Maxim, et al. "Deep learning recommendation model for
-       personalization and recommendation systems." arXiv preprint arXiv:1906.00091 (2019).
-
-    Parameters
-    ----------
-    schema : Schema
-        The `Schema` with the input features
-    bottom_block : Block
-        The `Block` that combines the continuous features (typically a `MLPBlock`)
-    top_block : Optional[Block], optional
-        The optional `Block` that combines the outputs of bottom layer and of
-        the factorization machine layer, by default None
-    embedding_dim : Optional[int], optional
-        Dimension of the embeddings, by default None
-    embedding_options : EmbeddingOptions
-        Options for the input embeddings.
-        - embedding_dim_default: int - Default dimension of the embedding
-        table, when the feature is not found in ``embedding_dims``, by default 64
-        - infer_embedding_sizes : bool, Automatically defines the embedding
-        dimension from the feature cardinality in the schema, by default False,
-        which needs to be kept False for the DLRM architecture.
-
-    Returns
-    -------
-    SequentialBlock
-        The DLRM block
-
-    Raises
-    ------
-    ValueError
-        The schema is required by DLRM
-    ValueError
-        The bottom_block is required by DLRM
-    ValueError
-        The embedding_dim (X) needs to match the last layer of bottom MLP (Y).
-    ValueError
-        Only one-of `embeddings` or `embedding_options` can be used.
+     `paper https://arxiv.org/pdf/1906.00091.pdf`_ [1]_.
+
+     References
+     ----------
+     .. [1] Naumov, Maxim, et al. "Deep learning recommendation model for
+        personalization and recommendation systems." arXiv preprint arXiv:1906.00091 (2019).
+
+     Parameters
+     ----------
+     schema : Schema
+         The `Schema` with the input features
+     embedding_dim : Optional[int], optional
+         Dimension of the embeddings, by default None
+     embedding_options : EmbeddingOptions
+         Options for the input embeddings.
+         - embedding_dim_default: int - Default dimension of the embedding
+         table, when the feature is not found in ``embedding_dims``, by default 64
+         - infer_embedding_sizes : bool, Automatically defines the embedding
+         dimension from the feature cardinality in the schema, by default False,
+         which needs to be kept False for the DLRM architecture.
+    embeddings: Optional[Block]
+         If provided creates a ParallelBlock with an EmbeddingTable for each
+         categorical feature in the schema.
+     bottom_block : Block
+         The `Block` that combines the continuous features (typically a `MLPBlock`)
+     top_block : Optional[Block], optional
+         The optional `Block` that combines the outputs of bottom layer and of
+         the factorization machine layer, by default None
+
+     Returns
+     -------
+     SequentialBlock
+         The DLRM block
+
+     Raises
+     ------
+     ValueError
+         The schema is required by DLRM
+     ValueError
+         The bottom_block is required by DLRM
+     ValueError
+         The embedding_dim (X) needs to match the last layer of bottom MLP (Y).
+     ValueError
+         Only one-of `embeddings` or `embedding_options` can be used.
     """
     if schema is None:
         raise ValueError("The schema is required by DLRM")

diff --git a/merlin/models/tf/blocks/interaction.py b/merlin/models/tf/blocks/interaction.py
@@ -236,6 +236,18 @@ def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor:
         return 0.5 * tf.subtract(summed_square, squared_sum)
 
     def compute_output_shape(self, input_shapes):
+        """Computes the output shape based on the input shapes
+
+        Parameters
+        ----------
+        input_shapes : tf.TensorShape
+            The input shapes
+
+        Returns
+        -------
+        tf.TensorShape
+            The output shape
+        """
         if len(input_shapes) != 3:
             raise ValueError("Found shape {} without 3 dimensions".format(input_shapes))
         return (input_shapes[0], input_shapes[2])
@@ -281,6 +293,7 @@ def FMBlock(
     factors_dim : Optional[int], optional
         If fm_input_block is not provided, the factors_dim is used to define the
         embeddings dim to instantiate InputBlockV2, by default None
+
     Returns
     -------
     tf.Tensor

diff --git a/merlin/models/tf/blocks/mlp.py b/merlin/models/tf/blocks/mlp.py
@@ -224,6 +224,37 @@ def __init__(
         dense=None,
         **kwargs,
     ):
+        """A Dense layer that aggregates features before projection
+        Parameters
+        ----------
+        units : _type_
+            _description_
+        activation : Optional[Union[str,tf.keras.layers.Layer]], optional
+            The activation function to use. By default None
+        use_bias : bool, optional
+            Whether to use a bias in the MLP, by default True
+        kernel_initializer: InitializerType
+            Initializer for the kernel weights matrix. Defaults to "glorot_uniform".
+        bias_initializer: InitializerType
+            Initializer for the bias vector. Default to "zeros".
+        kernel_regularizer: Optional[RegularizerType]
+            Regularizer function applied to the kernel weights matrix. Default to None.
+        bias_regularizer: Optional[RegularizerType]
+            Regularizer function applied to the bias vector.  Default to None.
+        activity_regularizer : optional
+            Regularizer function applied to the output of the layer (its "activation"),
+            by default None
+        kernel_constraint : optional
+            Constraint function applied to the kernel weights matrix, by default None
+        bias_constraint : optional
+            Constraint function applied to the bias vector, by default None
+        pre_aggregation : str, optional
+            If provided, aggregates inputs before the dense projection, by default "concat"
+        dense : _type_, optional
+            A tf.keras.layers.Layer that can be used to project the inputs.
+            Typically used when deserializing the layer. By default None
+        """
+
         super(_Dense, self).__init__(**kwargs)
         self.dense = dense or tf.keras.layers.Dense(
             units,

diff --git a/merlin/models/tf/blocks/retrieval/base.py b/merlin/models/tf/blocks/retrieval/base.py
@@ -135,14 +135,15 @@ class ItemRetrievalScorer(Block):
     """Block for ItemRetrieval, which expects query/user and item embeddings as input and
     uses dot product to score the positive item (inputs["item"]) and also sampled negative
     items (during training).
+
     Parameters
     ----------
-    samplers : List[ItemSampler], optional
+    samplers: List[ItemSampler], optional
         List of item samplers that provide negative samples when `training=True`
-    sampling_downscore_false_negatives : bool, optional
+    sampling_downscore_false_negatives: bool, optional
         Identify false negatives (sampled item ids equal to the positive item and downscore them
         to the `sampling_downscore_false_negatives_value`), by default True
-    sampling_downscore_false_negatives_value : int, optional
+    sampling_downscore_false_negatives_value: int, optional
         Value to be used to downscore false negatives when
         `sampling_downscore_false_negatives=True`, by default `np.finfo(np.float32).min / 100.0`
     item_id_feature_name: str
@@ -174,6 +175,7 @@ def __init__(
         store_negative_ids: bool = False,
         **kwargs,
     ):
+        """Initializes the `ItemRetrievalScorer` class."""
         super().__init__(**kwargs)
 
         self.downscore_false_negatives = sampling_downscore_false_negatives
@@ -193,6 +195,13 @@ def __init__(
         self.set_required_features()
 
     def build(self, input_shapes):
+        """Builds the block.
+
+        Parameters
+        ----------
+        input_shapes: tuple or dict
+            Shape of the input tensor.
+        """
         if isinstance(input_shapes, dict):
             query_shape = input_shapes[self.query_name]
             self.context.add_weight(
@@ -206,6 +215,13 @@ def build(self, input_shapes):
         super().build(input_shapes)
 
     def _check_input_from_two_tower(self, inputs):
+        """Checks if the inputs from the two towers (query and item) are correctly provided.
+
+        Parameters
+        ----------
+        inputs: dict
+            Dictionary of inputs.
+        """
         if set(inputs.keys()) != set([self.query_name, self.item_name]):
             raise ValueError(
                 f"Wrong input-names, expected: {[self.query_name, self.item_name]} "
@@ -223,13 +239,15 @@ def call(
             the positive item (inputs["item"]).
             For the sampled-softmax mode, logits are computed by multiplying the query vector
             and the item embeddings matrix (self.context.get_embedding(self.item_domain))
+
         Parameters
         ----------
         inputs : Union[tf.Tensor, TabularData]
             Dict with the query and item embeddings (e.g. `{"query": <emb>}, "item": <emb>}`),
             where embeddings are 2D tensors (batch size, embedding size)
         training : bool, optional
             Flag that indicates whether in training mode, by default True
+
         Returns
         -------
         tf.Tensor
@@ -273,13 +291,15 @@ def call_outputs(
     ) -> "PredictionOutput":
         """Based on the user/query embedding (inputs[self.query_name]), uses dot product to score
             the positive item and also sampled negative items (during training).
+
         Parameters
         ----------
         inputs : TabularData
             Dict with the query and item embeddings (e.g. `{"query": <emb>}, "item": <emb>}`),
             where embeddings are 2D tensors (batch size, embedding size)
         training : bool, optional
             Flag that indicates whether in training mode, by default True
+
         Returns
         -------
         [tf.Tensor,tf.Tensor]
@@ -431,6 +451,7 @@ def _prepare_query_item_vectors_for_sampled_softmax(
         return predictions
 
     def set_required_features(self):
+        """Sets the required features for the samplers."""
         required_features = set()
         if self.downscore_false_negatives:
             required_features.add(self.item_id_feature_name)
@@ -442,6 +463,13 @@ def set_required_features(self):
         self._required_features = list(required_features)
 
     def get_config(self):
+        """Returns the configuration of the model as a dictionary.
+
+        Returns
+        -------
+        dict
+            The configuration of the model.
+        """
         config = super().get_config()
         config = maybe_serialize_keras_objects(self, config, ["samplers"])
         config["sampling_downscore_false_negatives"] = self.downscore_false_negatives
@@ -458,6 +486,17 @@ def get_config(self):
 
     @classmethod
     def from_config(cls, config):
+        """Creates a new instance of the class from its config.
+
+        Parameters
+        ----------
+        config: dict
+            A dictionary, typically the output of get_config.
+
+        Returns
+        -------
+        A new instance of the `ItemRetrievalScorer` class.
+        """
         config = maybe_deserialize_keras_objects(config, ["samplers"])
 
         return super().from_config(config)
diff --git a/merlin/models/tf/core/combinators.py b/merlin/models/tf/core/combinators.py
@@ -702,6 +702,11 @@ def from_config(cls, config, **kwargs):
 
 @tf.keras.utils.register_keras_serializable(package="merlin.models")
 class ResidualBlock(WithShortcut):
+    """
+    Creates a shortcut connection where the residuals are
+    summed to the output of the block
+    """
+
     def __init__(
         self,
         block: Union[tf.keras.layers.Layer, Block],
-Original file line number
+Diff line change
@@ Expand Up / @@ -70,6 +70,7 @@ def get_dressipi2022(input_path): @@
         ----------
         path: Union[str, Path]
             Directory to load the raw data from.
         Returns
         -------
         train: merlin.io.Dataset
@@ Expand Down @@