Skip to content

Commit

Permalink
Merge branch 'main' into gpu-tests-rapids-runner
Browse files Browse the repository at this point in the history
  • Loading branch information
marcromeyn authored Jul 4, 2023
2 parents 8cd5537 + 1782b44 commit d963dba
Show file tree
Hide file tree
Showing 8 changed files with 433 additions and 15 deletions.
2 changes: 2 additions & 0 deletions merlin/models/torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from merlin.models.torch.inputs.select import SelectFeatures, SelectKeys
from merlin.models.torch.inputs.tabular import TabularInputBlock
from merlin.models.torch.models.base import Model
from merlin.models.torch.models.ranking import DLRMModel
from merlin.models.torch.outputs.base import ModelOutput
from merlin.models.torch.outputs.classification import BinaryOutput
from merlin.models.torch.outputs.regression import RegressionOutput
Expand Down Expand Up @@ -55,4 +56,5 @@
"Stack",
"schema",
"DLRMBlock",
"DLRMModel",
]
189 changes: 189 additions & 0 deletions merlin/models/torch/blocks/cross.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
from copy import deepcopy
from typing import Dict, Optional, Union

import torch
from torch import nn
from torch.nn.modules.lazy import LazyModuleMixin

from merlin.models.torch.block import Block
from merlin.models.torch.transforms.agg import Concat
from merlin.models.utils.doc_utils import docstring_parameter

_DCNV2_REF = """
References
----------
.. [1]. Wang, Ruoxi, et al. "DCN V2: Improved deep & cross network and
practical lessons for web-scale learning to rank systems." Proceedings
of the Web Conference 2021. 2021. https://arxiv.org/pdf/2008.13535.pdf
"""


class LazyMirrorLinear(LazyModuleMixin, nn.Linear):
"""A :class:`torch.nn.Linear` module where both
`in_features` & `out_features` are inferred. (i.e. `out_features` = `in_features`)
Parameters
----------
bias:
If set to ``False``, the layer will not learn an additive bias.
Default: ``True``
"""

cls_to_become = nn.Linear # type: ignore[assignment]
weight: nn.parameter.UninitializedParameter
bias: nn.parameter.UninitializedParameter # type: ignore[assignment]

def __init__(self, bias: bool = True, device=None, dtype=None) -> None:
# This code is taken from torch.nn.LazyLinear.__init__
factory_kwargs = {"device": device, "dtype": dtype}
# bias is hardcoded to False to avoid creating tensor
# that will soon be overwritten.
super().__init__(0, 0, False)
self.weight = nn.parameter.UninitializedParameter(**factory_kwargs)
if bias:
self.bias = nn.parameter.UninitializedParameter(**factory_kwargs)

def reset_parameters(self) -> None:
if not self.has_uninitialized_params() and self.in_features != 0:
super().reset_parameters()

def initialize_parameters(self, input) -> None: # type: ignore[override]
if self.has_uninitialized_params():
with torch.no_grad():
self.in_features = input.shape[-1]
if not hasattr(self, "out_features") or self.out_features == 0:
self.out_features = self.in_features
self.weight.materialize((self.out_features, self.in_features))
if self.bias is not None:
self.bias.materialize((self.out_features,))
self.reset_parameters()


@docstring_parameter(dcn_reference=_DCNV2_REF)
class CrossBlock(Block):
"""
This block provides a way to create high-order feature interactions
by a number of stacked Cross Layers, from DCN V2: Improved Deep & Cross Network [1].
See Eq. (1) for full-rank and Eq. (2) for low-rank version.
Parameters
----------
*module : nn.Module
Variable length argument list of PyTorch modules to be contained in the block.
name : Optional[str], default = None
The name of the block. If None, no name is assigned.
{dcn_reference}
"""

def __init__(self, *module, name: Optional[str] = None):
super().__init__(*module, name=name)
self.concat = Concat()
self.init_hook_handle = self.register_forward_pre_hook(self.initialize)

@classmethod
def with_depth(cls, depth: int) -> "CrossBlock":
"""Creates a CrossBlock with a given depth.
Parameters
----------
depth : int
Depth of the CrossBlock.
Returns
-------
CrossBlock
A CrossBlock of the given depth.
Raises
------
ValueError
If depth is less than or equal to 0.
"""
if not depth > 0:
raise ValueError(f"`depth` must be greater than 0, got {depth}")

return cls(*Block(LazyMirrorLinear()).repeat(depth))

@classmethod
def with_low_rank(cls, depth: int, low_rank: nn.Module) -> "CrossBlock":
"""
Creates a CrossBlock with a given depth and low rank. See Eq. (2) in [1].
Parameters
----------
depth : int
Depth of the CrossBlock.
low_rank : nn.Module
Low rank module to include in the CrossBlock.
Returns
-------
CrossBlock
A CrossBlock of the given depth and low rank.
"""

return cls(*(Block(deepcopy(low_rank), *block) for block in cls.with_depth(depth)))

def forward(self, inputs: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> torch.Tensor:
"""Forward-pass of the cross-block.
Parameters
----------
inputs : Union[torch.Tensor, Dict[str, torch.Tensor]]
The input data. It could be either a tensor or a dictionary of tensors.
Returns
-------
torch.Tensor
The output tensor after the forward pass.
Raises
------
RuntimeError
If the output from a module is not a Tensor.
"""

if torch.jit.isinstance(inputs, Dict[str, torch.Tensor]):
x = self.concat(inputs)
else:
x = inputs

x0 = x
current = x
for module in self.values:
module_out = module(current)
if not isinstance(module_out, torch.Tensor):
raise RuntimeError("CrossBlock expects a Tensor as output")

current = x0 * module_out + current

return current

def initialize(self, module, inputs):
"""
Initialize the block by setting the output features of all LazyMirrorLinear children.
This is meant to be used as a forward pre-hook.
Parameters
----------
module : nn.Module
The module to initialize.
inputs : tuple
The inputs to the forward method.
"""

if torch.jit.isinstance(inputs[0], Dict[str, torch.Tensor]):
_inputs = self.concat(inputs[0])
else:
_inputs = inputs[0]

def set_out_features_lazy_mirror_linear(m):
if isinstance(m, LazyMirrorLinear):
m.out_features = _inputs.shape[-1]

self.apply(set_out_features_lazy_mirror_linear)
self.init_hook_handle.remove() # Clear hook once block is initialized
5 changes: 5 additions & 0 deletions merlin/models/torch/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,11 @@ def compute_loss(
else:
raise ValueError(f"Unknown 'predictions' type: {type(predictions)}")

if _targets.size() != _predictions.size():
_targets = _targets.view(_predictions.size())
if _targets.type() != _predictions.type():
_targets = _targets.type_as(_predictions)

results["loss"] = results["loss"] + model_out.loss(_predictions, _targets) / len(
model_outputs
)
Expand Down
76 changes: 76 additions & 0 deletions merlin/models/torch/models/ranking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from typing import Optional

from torch import nn

from merlin.models.torch.block import Block
from merlin.models.torch.blocks.dlrm import DLRMBlock
from merlin.models.torch.models.base import Model
from merlin.models.torch.outputs.tabular import TabularOutputBlock
from merlin.schema import Schema


class DLRMModel(Model):
"""
The Deep Learning Recommendation Model (DLRM) as proposed in Naumov, et al. [1]
Parameters
----------
schema : Schema
The schema to use for selection.
dim : int
The dimensionality of the output vectors.
bottom_block : Block
Block to pass the continuous features to.
Note that, the output dimensionality of this block must be equal to ``dim``.
top_block : Block, optional
An optional upper-level block of the model.
interaction : nn.Module, optional
Interaction module for DLRM.
If not provided, DLRMInteraction will be used by default.
output_block : Block, optional
The output block of the model, by default None.
If None, a TabularOutputBlock with schema and default initializations is used.
Returns
-------
Model
An instance of Model class representing the fully formed DLRM.
Example usage
-------------
>>> model = mm.DLRMModel(
... schema,
... dim=64,
... bottom_block=mm.MLPBlock([256, 64]),
... output_block=BinaryOutput(ColumnSchema("target")))
>>> trainer = pl.Trainer()
>>> model.initialize(dataloader)
>>> trainer.fit(model, dataloader)
References
----------
[1] Naumov, Maxim, et al. "Deep learning recommendation model for
personalization and recommendation systems." arXiv preprint arXiv:1906.00091 (2019).
"""

def __init__(
self,
schema: Schema,
dim: int,
bottom_block: Block,
top_block: Optional[Block] = None,
interaction: Optional[nn.Module] = None,
output_block: Optional[Block] = None,
) -> None:
if output_block is None:
output_block = TabularOutputBlock(schema, init="defaults")

dlrm_body = DLRMBlock(
schema,
dim,
bottom_block,
top_block=top_block,
interaction=interaction,
)

super().__init__(dlrm_body, output_block)
2 changes: 1 addition & 1 deletion merlin/models/torch/utils/module_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def initialize(module, data: Union[Dataset, Loader, Batch], dtype=torch.float32)
if hasattr(module, "model_outputs"):
for model_out in module.model_outputs():
for metric in model_out.metrics:
metric.to(batch.device())
metric.to(device=batch.device())

from merlin.models.torch import schema

Expand Down
Loading

0 comments on commit d963dba

Please sign in to comment.