Skip to content

Commit

Permalink
Add docstrings (#1106)
Browse files Browse the repository at this point in the history
* Add docstrings to Encoder

* Add docstrings to ItemRetrievalScorer

* Add docstrings to Model

* Fix docstring for TwoTowerModel

* Add docstring to YoutubeDNNRetrievalModelV2

* Add docstrings to L2Norm

* Add docstrings to ContinuousFeatures

* Add docstrings to AverageEmbeddingsByWeightFeature

* Add docstrings to ReplaceMaskedEmbeddings

* Add docstrings to SequenceEmbeddingFeatures

* Add docstrings to EmbeddingTable

* lint

* lint

* lint
  • Loading branch information
edknv authored May 23, 2023
1 parent 7ee8e84 commit db4483c
Show file tree
Hide file tree
Showing 9 changed files with 600 additions and 55 deletions.
45 changes: 42 additions & 3 deletions merlin/models/tf/blocks/retrieval/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,15 @@ class ItemRetrievalScorer(Block):
"""Block for ItemRetrieval, which expects query/user and item embeddings as input and
uses dot product to score the positive item (inputs["item"]) and also sampled negative
items (during training).
Parameters
----------
samplers : List[ItemSampler], optional
samplers: List[ItemSampler], optional
List of item samplers that provide negative samples when `training=True`
sampling_downscore_false_negatives : bool, optional
sampling_downscore_false_negatives: bool, optional
Identify false negatives (sampled item ids equal to the positive item and downscore them
to the `sampling_downscore_false_negatives_value`), by default True
sampling_downscore_false_negatives_value : int, optional
sampling_downscore_false_negatives_value: int, optional
Value to be used to downscore false negatives when
`sampling_downscore_false_negatives=True`, by default `np.finfo(np.float32).min / 100.0`
item_id_feature_name: str
Expand Down Expand Up @@ -174,6 +175,7 @@ def __init__(
store_negative_ids: bool = False,
**kwargs,
):
"""Initializes the `ItemRetrievalScorer` class."""
super().__init__(**kwargs)

self.downscore_false_negatives = sampling_downscore_false_negatives
Expand All @@ -193,6 +195,13 @@ def __init__(
self.set_required_features()

def build(self, input_shapes):
"""Builds the block.
Parameters
----------
input_shapes: tuple or dict
Shape of the input tensor.
"""
if isinstance(input_shapes, dict):
query_shape = input_shapes[self.query_name]
self.context.add_weight(
Expand All @@ -206,6 +215,13 @@ def build(self, input_shapes):
super().build(input_shapes)

def _check_input_from_two_tower(self, inputs):
"""Checks if the inputs from the two towers (query and item) are correctly provided.
Parameters
----------
inputs: dict
Dictionary of inputs.
"""
if set(inputs.keys()) != set([self.query_name, self.item_name]):
raise ValueError(
f"Wrong input-names, expected: {[self.query_name, self.item_name]} "
Expand All @@ -223,13 +239,15 @@ def call(
the positive item (inputs["item"]).
For the sampled-softmax mode, logits are computed by multiplying the query vector
and the item embeddings matrix (self.context.get_embedding(self.item_domain))
Parameters
----------
inputs : Union[tf.Tensor, TabularData]
Dict with the query and item embeddings (e.g. `{"query": <emb>}, "item": <emb>}`),
where embeddings are 2D tensors (batch size, embedding size)
training : bool, optional
Flag that indicates whether in training mode, by default True
Returns
-------
tf.Tensor
Expand Down Expand Up @@ -273,13 +291,15 @@ def call_outputs(
) -> "PredictionOutput":
"""Based on the user/query embedding (inputs[self.query_name]), uses dot product to score
the positive item and also sampled negative items (during training).
Parameters
----------
inputs : TabularData
Dict with the query and item embeddings (e.g. `{"query": <emb>}, "item": <emb>}`),
where embeddings are 2D tensors (batch size, embedding size)
training : bool, optional
Flag that indicates whether in training mode, by default True
Returns
-------
[tf.Tensor,tf.Tensor]
Expand Down Expand Up @@ -431,6 +451,7 @@ def _prepare_query_item_vectors_for_sampled_softmax(
return predictions

def set_required_features(self):
"""Sets the required features for the samplers."""
required_features = set()
if self.downscore_false_negatives:
required_features.add(self.item_id_feature_name)
Expand All @@ -442,6 +463,13 @@ def set_required_features(self):
self._required_features = list(required_features)

def get_config(self):
"""Returns the configuration of the model as a dictionary.
Returns
-------
dict
The configuration of the model.
"""
config = super().get_config()
config = maybe_serialize_keras_objects(self, config, ["samplers"])
config["sampling_downscore_false_negatives"] = self.downscore_false_negatives
Expand All @@ -458,6 +486,17 @@ def get_config(self):

@classmethod
def from_config(cls, config):
"""Creates a new instance of the class from its config.
Parameters
----------
config: dict
A dictionary, typically the output of get_config.
Returns
-------
A new instance of the `ItemRetrievalScorer` class.
"""
config = maybe_deserialize_keras_objects(config, ["samplers"])

return super().from_config(config)
107 changes: 104 additions & 3 deletions merlin/models/tf/core/encoder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -88,6 +88,22 @@ def encode(
batch_size: int,
**kwargs,
) -> merlin.io.Dataset:
"""Encodes the given dataset and index.
Parameters
----------
dataset: merlin.io.Dataset
The dataset to encode.
index: Union[str, ColumnSchema, Schema, Tags]
The index to use for encoding.
batch_size: int
The batch size for encoding.
Returns
-------
merlin.io.Dataset
The encoded dataset.
"""
if isinstance(index, Schema):
output_schema = index
elif isinstance(index, ColumnSchema):
Expand Down Expand Up @@ -117,12 +133,14 @@ def batch_predict(
**kwargs,
) -> merlin.io.Dataset:
"""Batched prediction using Dask.
Parameters
----------
dataset: merlin.io.Dataset
Dataset to predict on.
batch_size: int
Batch size to use for prediction.
Returns
-------
merlin.io.Dataset
Expand Down Expand Up @@ -166,6 +184,23 @@ def batch_predict(
return merlin.io.Dataset(predictions)

def call(self, inputs, *, targets=None, training=False, testing=False, **kwargs):
"""Calls the model on new inputs and returns the outputs as tensors.
Parameters
----------
inputs : tensor-like or dict/tuple of tensors.
Tensors or dict/tuple of tensors representing the input batch.
targets : tensor-like, optional
Tensors representing the target data.
training : bool, optional
Whether the model is in training mode.
testing : bool, optional
Whether the model is in testing mode.
Returns
-------
A tensor or dict of tensors corresponding to the result of calling the layer.
"""
inputs = self._prepare_features(inputs, targets=targets)
if isinstance(inputs, tuple):
inputs, targets = inputs
Expand All @@ -180,6 +215,17 @@ def call(self, inputs, *, targets=None, training=False, testing=False, **kwargs)
)

def __call__(self, inputs, **kwargs):
"""Overrides the default __call__ method to remove "features" from inputs.
Parameters
----------
inputs : tensor-like or dict/tuple of tensors.
Tensors or dict/tuple of tensors representing the input batch.
Returns
-------
A tensor or dict of tensors corresponding to the result of calling the layer.
"""
# We remove features here since we don't expect them at inference time
# Inside the `call` method, we will add them back by assuming inputs=features
if "features" in kwargs:
Expand All @@ -188,6 +234,13 @@ def __call__(self, inputs, **kwargs):
return super().__call__(inputs, **kwargs)

def build(self, input_shape):
"""Creates the variables of the layer.
Parameters
----------
input_shape: Tuple[int]
The shape of the input data.
"""
self._prepare_features.build(input_shape)
input_shape = self._prepare_features.compute_output_shape(input_shape)

Expand All @@ -196,18 +249,40 @@ def build(self, input_shape):
self._build_input_shape = input_shape

def compute_output_shape(self, input_shape):
"""Computes the output shape of the layer.
Parameters
----------
input_shape: Tuple[int]
The shape of the input data.
Returns
-------
Tuple[int]
The output shape of the layer.
"""
input_shape = self._prepare_features.compute_output_shape(input_shape)
return combinators.compute_output_shape_sequentially(list(self.to_call), input_shape)

def train_step(self, data):
"""Train step"""
"""Performs a training step.
Train step method is not implemented and Raises an error as the
Encoder block is not meant to be trained by itself and can only be
trained as part of a model.
"""
raise NotImplementedError(
"This block is not meant to be trained by itself. ",
"It can only be trained as part of a model.",
)

def fit(self, *args, **kwargs):
"""Fit model"""
"""Fits the model.
Fit method is not implemented and Raises an error as the Encoder block
is not meant to be trained by itself and can only be trained as part
of a model.
"""
raise NotImplementedError(
"This block is not meant to be trained by itself. ",
"It can only be trained as part of a model.",
Expand Down Expand Up @@ -245,6 +320,7 @@ def save(

@property
def to_call(self):
"""Provides the list of blocks to be called during the execution of the model."""
if self.pre:
yield self.pre

Expand All @@ -256,22 +332,40 @@ def to_call(self):

@property
def has_schema(self) -> bool:
"""Returns True as this class does contain a schema."""
return True

@property
def schema(self) -> Schema:
"""Returns the schema of the model."""
return self._schema

@property
def first(self):
"""Returns the first block of the model."""
return self.blocks[0]

@property
def last(self):
"""Returns the last block of the model."""
return self.blocks[-1]

@classmethod
def from_config(cls, config, custom_objects=None):
"""Creates a new instance of the class by deserializing.
Parameters
----------
config: dict
A dictionary, typically the output of get_config.
custom_objects: dict, optional
A dictionary mapping the names of layers to the corresponding
functions and classes.
Returns
-------
A new instance of Encoder.
"""
pre = config.pop("pre", None)
post = config.pop("post", None)
layers = [
Expand All @@ -291,6 +385,13 @@ def from_config(cls, config, custom_objects=None):
return output

def get_config(self):
"""Returns the configuration of the model as a dictionary.
Returns
-------
dict
The configuration of the model.
"""
config = tf_utils.maybe_serialize_keras_objects(self, {}, ["pre", "post"])
for i, layer in enumerate(self.blocks):
config[i] = tf.keras.utils.serialize_keras_object(layer)
Expand Down
4 changes: 3 additions & 1 deletion merlin/models/tf/inputs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def InputBlock(
post: Optional[BlockType] = None,
aggregation: Optional[TabularAggregationType] = None,
seq: bool = False,
max_seq_length: Optional[int] = None,
add_continuous_branch: bool = True,
continuous_tags: Optional[Union[TagsType, Tuple[Tags]]] = (Tags.CONTINUOUS,),
continuous_projection: Optional[Block] = None,
Expand Down Expand Up @@ -75,6 +74,9 @@ def InputBlock(
Next to this, it's also possible to construct it manually.
branches: Dict[str, Block], optional
Dictionary of branches to use inside the InputBlock.
pre: Optional[BlockType]
Transformations to apply on the inputs before the module is
called (before 'forward'). Default is None.
post: Optional[BlockType]
Transformations to apply on the inputs after the module is
called (so **after** `forward`).
Expand Down
Loading

0 comments on commit db4483c

Please sign in to comment.