From bb8b7bd87fac0984dee14814f475e19fea33a4f4 Mon Sep 17 00:00:00 2001 From: Gabriel Moreira Date: Tue, 4 Jul 2023 11:37:40 -0300 Subject: [PATCH 01/34] Fixes retrieval encoders when query / item features have dense list features (#1169) * Making retrieval encoders more trustworthy by setting the map_partitions(meta) with the expected output dataframe schema. This fixes the issue when multi-hot features were used in the user / item tower encoding * Changing back the schema of music_streaming_data fixture and doing the required changes within test_two_tower_v2_export_item_tower_embeddings_with_seq_item_features --- .../entertainment/music_streaming/schema.json | 3 +- merlin/models/tf/core/encoder.py | 19 ++++++++-- merlin/models/tf/core/index.py | 10 ++++- merlin/models/tf/models/base.py | 24 ++++++++++-- merlin/models/tf/utils/batch_utils.py | 12 +----- tests/unit/tf/models/test_retrieval.py | 37 +++++++++++++++++-- 6 files changed, 83 insertions(+), 22 deletions(-) diff --git a/merlin/datasets/entertainment/music_streaming/schema.json b/merlin/datasets/entertainment/music_streaming/schema.json index cdef74879a..bfe3a14530 100644 --- a/merlin/datasets/entertainment/music_streaming/schema.json +++ b/merlin/datasets/entertainment/music_streaming/schema.json @@ -98,7 +98,8 @@ "annotation": { "tag": [ "categorical", - "user_id" + "user_id", + "user" ] } }, diff --git a/merlin/models/tf/core/encoder.py b/merlin/models/tf/core/encoder.py index 0dd47b5187..3f32a73f67 100644 --- a/merlin/models/tf/core/encoder.py +++ b/merlin/models/tf/core/encoder.py @@ -31,6 +31,7 @@ from merlin.models.tf.outputs.topk import TopKOutput from merlin.models.tf.transforms.features import PrepareFeatures from merlin.models.tf.utils import tf_utils +from merlin.models.tf.utils.batch_utils import TFModelEncode from merlin.schema import ColumnSchema, Schema, Tags @@ -171,13 +172,18 @@ def batch_predict( if hasattr(dataset, "to_ddf"): dataset = dataset.to_ddf() - from merlin.models.tf.utils.batch_utils import TFModelEncode - model_encode = TFModelEncode(self, batch_size=batch_size, **kwargs) + encode_kwargs = {} if output_schema: encode_kwargs["filter_input_columns"] = output_schema.column_names - predictions = dataset.map_partitions(model_encode, **encode_kwargs) + + # Processing a sample of the dataset with the model encoder + # to get the output dataframe dtypes + sample_output = model_encode(dataset.head(), **encode_kwargs) + output_dtypes = sample_output.dtypes.to_dict() + + predictions = dataset.map_partitions(model_encode, meta=output_dtypes, **encode_kwargs) if index: predictions = predictions.set_index(index) @@ -613,7 +619,12 @@ def batch_predict( if output_schema: encode_kwargs["filter_input_columns"] = output_schema.column_names - predictions = dataset.map_partitions(model_encode, **encode_kwargs) + # Processing a sample of the dataset with the model encoder + # to get the output dataframe dtypes + sample_output = model_encode(dataset.head(), **encode_kwargs) + output_dtypes = sample_output.dtypes.to_dict() + + predictions = dataset.map_partitions(model_encode, meta=output_dtypes, **encode_kwargs) return merlin.io.Dataset(predictions) diff --git a/merlin/models/tf/core/index.py b/merlin/models/tf/core/index.py index fc36f1a114..ba2632f70e 100644 --- a/merlin/models/tf/core/index.py +++ b/merlin/models/tf/core/index.py @@ -113,7 +113,15 @@ def get_candidates_dataset( model_encode = TFModelEncode(model=block, output_concat_func=np.concatenate) data = data.to_ddf() - embedding_ddf = data.map_partitions(model_encode, filter_input_columns=[id_column]) + + # Processing a sample of the dataset with the model encoder + # to get the output dataframe dtypes + sample_output = model_encode(data.head(), filter_input_columns=[id_column]) + output_dtypes = sample_output.dtypes.to_dict() + + embedding_ddf = data.map_partitions( + model_encode, meta=output_dtypes, filter_input_columns=[id_column] + ) embedding_df = embedding_ddf.compute(scheduler="synchronous") embedding_df.set_index(id_column, inplace=True) diff --git a/merlin/models/tf/models/base.py b/merlin/models/tf/models/base.py index dd8e96a440..365c8fe9d9 100644 --- a/merlin/models/tf/models/base.py +++ b/merlin/models/tf/models/base.py @@ -1579,7 +1579,13 @@ def batch_predict( from merlin.models.tf.utils.batch_utils import TFModelEncode model_encode = TFModelEncode(self, batch_size=batch_size, **kwargs) - predictions = dataset.map_partitions(model_encode) + + # Processing a sample of the dataset with the model encoder + # to get the output dataframe dtypes + sample_output = model_encode(dataset.head()) + output_dtypes = sample_output.dtypes.to_dict() + + predictions = dataset.map_partitions(model_encode, meta=output_dtypes) return merlin.io.Dataset(predictions) @@ -2354,7 +2360,13 @@ def query_embeddings( get_user_emb = QueryEmbeddings(self, batch_size=batch_size) dataset = unique_rows_by_features(dataset, query_tag, query_id_tag).to_ddf() - embeddings = dataset.map_partitions(get_user_emb) + + # Processing a sample of the dataset with the model encoder + # to get the output dataframe dtypes + sample_output = get_user_emb(dataset.head()) + output_dtypes = sample_output.dtypes.to_dict() + + embeddings = dataset.map_partitions(get_user_emb, meta=output_dtypes) return merlin.io.Dataset(embeddings) @@ -2389,7 +2401,13 @@ def item_embeddings( get_item_emb = ItemEmbeddings(self, batch_size=batch_size) dataset = unique_rows_by_features(dataset, item_tag, item_id_tag).to_ddf() - embeddings = dataset.map_partitions(get_item_emb) + + # Processing a sample of the dataset with the model encoder + # to get the output dataframe dtypes + sample_output = get_item_emb(dataset.head()) + output_dtypes = sample_output.dtypes.to_dict() + + embeddings = dataset.map_partitions(get_item_emb, meta=output_dtypes) return merlin.io.Dataset(embeddings) diff --git a/merlin/models/tf/utils/batch_utils.py b/merlin/models/tf/utils/batch_utils.py index 9ffce6b649..bc48da9ddd 100644 --- a/merlin/models/tf/utils/batch_utils.py +++ b/merlin/models/tf/utils/batch_utils.py @@ -8,8 +8,7 @@ from merlin.models.tf.core.base import Block from merlin.models.tf.loader import Loader from merlin.models.tf.models.base import Model, RetrievalModel, get_task_names_from_outputs -from merlin.models.utils.schema_utils import select_targets -from merlin.schema import Schema, Tags +from merlin.schema import Schema class ModelEncode: @@ -176,17 +175,10 @@ def encode_output(output: tf.Tensor): def data_iterator_func(schema, batch_size: int = 512): import merlin.io.dataset - cat_cols = schema.select_by_tag(Tags.CATEGORICAL).excluding_by_tag(Tags.TARGET).column_names - cont_cols = schema.select_by_tag(Tags.CONTINUOUS).excluding_by_tag(Tags.TARGET).column_names - targets = select_targets(schema).column_names - def data_iterator(dataset): return Loader( - merlin.io.dataset.Dataset(dataset), + merlin.io.dataset.Dataset(dataset, schema=schema), batch_size=batch_size, - cat_names=cat_cols, - cont_names=cont_cols, - label_names=targets, shuffle=False, ) diff --git a/tests/unit/tf/models/test_retrieval.py b/tests/unit/tf/models/test_retrieval.py index 4fd51525bd..252a666fba 100644 --- a/tests/unit/tf/models/test_retrieval.py +++ b/tests/unit/tf/models/test_retrieval.py @@ -884,7 +884,7 @@ def test_youtube_dnn_retrieval_v2(sequence_testing_data: Dataset, run_eagerly, t assert losses is not None -def test_two_tower_v2_export_embeddings( +def test_two_tower_v2_export_item_tower_embeddings( ecommerce_data: Dataset, ): user_schema = ecommerce_data.schema.select_by_tag(Tags.USER_ID) @@ -907,7 +907,38 @@ def test_two_tower_v2_export_embeddings( _check_embeddings(candidates, 100, 8, "item_id") -def test_mf_v2_export_embeddings( +def test_two_tower_v2_export_item_tower_embeddings_with_seq_item_features( + music_streaming_data: Dataset, +): + # Changing the schema of the multi-hot "item_genres" feature to be + # dense (not ragged) + music_streaming_data.schema["item_genres"] = music_streaming_data.schema[ + "item_genres" + ].with_shape(((0, None), (4, 4))) + schema = music_streaming_data.schema + user_schema = schema.select_by_tag(Tags.USER) + candidate_schema = schema.select_by_tag(Tags.ITEM) + + query = mm.Encoder(user_schema, mm.MLPBlock([8])) + candidate = mm.Encoder(candidate_schema, mm.MLPBlock([8])) + model = mm.TwoTowerModelV2( + query_tower=query, candidate_tower=candidate, negative_samplers=["in-batch"] + ) + + model, _ = testing_utils.model_test(model, music_streaming_data, reload_model=False) + + queries = model.query_embeddings( + music_streaming_data, batch_size=16, index=Tags.USER_ID + ).compute() + _check_embeddings(queries, 100, 8, "user_id") + + candidates = model.candidate_embeddings( + music_streaming_data, batch_size=16, index=Tags.ITEM_ID + ).compute() + _check_embeddings(candidates, 100, 8, "item_id") + + +def test_mf_v2_export_item_tower_embeddings( ecommerce_data: Dataset, ): model = mm.MatrixFactorizationModelV2( @@ -939,7 +970,7 @@ def _check_embeddings(embeddings, extected_len, num_dim=8, index_name=None): assert embeddings.index.name == index_name -def test_youtube_dnn_v2_export_embeddings(sequence_testing_data: Dataset): +def test_youtube_dnn_v2_export_item_embeddings(sequence_testing_data: Dataset): to_remove = ["event_timestamp"] + ( sequence_testing_data.schema.select_by_tag(Tags.SEQUENCE) .select_by_tag(Tags.CONTINUOUS) From 4fbf82161118c0c7a36652046f4a903091c2de3f Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Tue, 4 Jul 2023 18:19:14 +0200 Subject: [PATCH 02/34] Change default loss in BinaryOutput to BCELoss (#1175) * Change default loss in BinaryOutput to BCELoss * Some commit * Fixing failing tests --------- Co-authored-by: edknv <109497216+edknv@users.noreply.github.com> --- merlin/models/torch/outputs/classification.py | 2 +- tests/unit/torch/models/test_base.py | 33 ++++++++++--------- .../unit/torch/outputs/test_classification.py | 2 +- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/merlin/models/torch/outputs/classification.py b/merlin/models/torch/outputs/classification.py index 2ca36143f7..6d0d411ed3 100644 --- a/merlin/models/torch/outputs/classification.py +++ b/merlin/models/torch/outputs/classification.py @@ -36,7 +36,7 @@ class BinaryOutput(ModelOutput): The metrics used for evaluation. Default includes Accuracy, AUROC, Precision, and Recall. """ - DEFAULT_LOSS_CLS = nn.BCEWithLogitsLoss + DEFAULT_LOSS_CLS = nn.BCELoss DEFAULT_METRICS_CLS = (Accuracy, AUROC, Precision, Recall) def __init__( diff --git a/tests/unit/torch/models/test_base.py b/tests/unit/torch/models/test_base.py index d51589b8f1..9abfe2eef7 100644 --- a/tests/unit/torch/models/test_base.py +++ b/tests/unit/torch/models/test_base.py @@ -125,7 +125,7 @@ def test_training_step_values(self): loss = model.training_step((features, targets), 0) (weights, bias) = model.parameters() expected_outputs = nn.Sigmoid()(torch.matmul(features["feature"], weights.T) + bias) - expected_loss = nn.BCEWithLogitsLoss()(expected_outputs, targets["target"]) + expected_loss = nn.BCELoss()(expected_outputs, targets["target"]) assert torch.allclose(loss, expected_loss) def test_training_step_with_dataloader(self): @@ -228,11 +228,11 @@ def test_train_classification_with_lightning_trainer(self, music_streaming_data, class TestComputeLoss: def test_tensor_inputs(self): - predictions = torch.randn(2, 1) + predictions = torch.sigmoid(torch.randn(2, 1)) targets = torch.randint(2, (2, 1), dtype=torch.float32) model_outputs = [mm.BinaryOutput(ColumnSchema("a"))] results = compute_loss(predictions, targets, model_outputs) - expected_loss = nn.BCEWithLogitsLoss()(predictions, targets) + expected_loss = nn.BCELoss()(predictions, targets) expected_auroc = AUROC(task="binary")(predictions, targets) expected_acc = Accuracy(task="binary")(predictions, targets) expected_prec = Precision(task="binary")(predictions, targets) @@ -253,48 +253,49 @@ def test_tensor_inputs(self): assert torch.allclose(results["binary_recall"], expected_rec) def test_no_metrics(self): - predictions = torch.randn(2, 1) + predictions = torch.sigmoid(torch.randn(2, 1)) targets = torch.randint(2, (2, 1), dtype=torch.float32) model_outputs = [mm.BinaryOutput(ColumnSchema("a"))] results = compute_loss(predictions, targets, model_outputs, compute_metrics=False) assert sorted(results.keys()) == ["loss"] def test_dict_inputs(self): - predictions = {"a": torch.randn(2, 1)} + outputs = mm.ParallelBlock({"a": mm.BinaryOutput(ColumnSchema("a"))}) + predictions = outputs(torch.randn(2, 1)) targets = {"a": torch.randint(2, (2, 1), dtype=torch.float32)} - model_outputs = (mm.BinaryOutput(ColumnSchema("a")),) - results = compute_loss(predictions, targets, model_outputs) - expected_loss = nn.BCEWithLogitsLoss()(predictions["a"], targets["a"]) + + results = compute_loss(predictions, targets, outputs.find(mm.ModelOutput)) + expected_loss = nn.BCELoss()(predictions["a"], targets["a"]) assert torch.allclose(results["loss"], expected_loss) def test_mixed_inputs(self): predictions = {"a": torch.randn(2, 1)} targets = torch.randint(2, (2, 1), dtype=torch.float32) - model_outputs = (mm.BinaryOutput(ColumnSchema("a")),) + model_outputs = (mm.RegressionOutput(ColumnSchema("a")),) results = compute_loss(predictions, targets, model_outputs) - expected_loss = nn.BCEWithLogitsLoss()(predictions["a"], targets) + expected_loss = nn.MSELoss()(predictions["a"], targets) assert torch.allclose(results["loss"], expected_loss) def test_single_model_output(self): predictions = {"foo": torch.randn(2, 1)} targets = {"foo": torch.randint(2, (2, 1), dtype=torch.float32)} - model_outputs = [mm.BinaryOutput(ColumnSchema("foo"))] + model_outputs = [mm.RegressionOutput(ColumnSchema("foo"))] results = compute_loss(predictions, targets, model_outputs) - expected_loss = nn.BCEWithLogitsLoss()(predictions["foo"], targets["foo"]) + expected_loss = nn.MSELoss()(predictions["foo"], targets["foo"]) assert torch.allclose(results["loss"], expected_loss) def test_tensor_input_no_targets(self): predictions = torch.randn(2, 1) - binary_output = mm.BinaryOutput(ColumnSchema("foo")) + binary_output = mm.RegressionOutput(ColumnSchema("foo")) results = compute_loss(predictions, None, (binary_output,)) - expected_loss = nn.BCEWithLogitsLoss()(predictions, torch.zeros(2, 1)) + expected_loss = nn.MSELoss()(predictions, torch.zeros(2, 1)) assert torch.allclose(results["loss"], expected_loss) def test_dict_input_no_targets(self): predictions = {"foo": torch.randn(2, 1)} - binary_output = mm.BinaryOutput(ColumnSchema("foo")) + binary_output = mm.RegressionOutput(ColumnSchema("foo")) results = compute_loss(predictions, None, (binary_output,)) - expected_loss = nn.BCEWithLogitsLoss()(predictions["foo"], torch.zeros(2, 1)) + expected_loss = nn.MSELoss()(predictions["foo"], torch.zeros(2, 1)) assert torch.allclose(results["loss"], expected_loss) def test_no_target_raises_error(self): diff --git a/tests/unit/torch/outputs/test_classification.py b/tests/unit/torch/outputs/test_classification.py index 755d465350..038b2586d4 100644 --- a/tests/unit/torch/outputs/test_classification.py +++ b/tests/unit/torch/outputs/test_classification.py @@ -30,7 +30,7 @@ def test_init(self): binary_output = mm.BinaryOutput() assert isinstance(binary_output, mm.BinaryOutput) - assert isinstance(binary_output.loss, nn.BCEWithLogitsLoss) + assert isinstance(binary_output.loss, nn.BCELoss) assert binary_output.metrics == [ Accuracy(task="binary"), AUROC(task="binary"), From 0b1c198a8def67b2125b006a63de95c9391ac4d1 Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Tue, 4 Jul 2023 19:13:22 +0200 Subject: [PATCH 03/34] Adding ToTuple (#1178) * Adding ToTuple * Add Schema to ToTuple-classes to ensure correct order --- merlin/models/torch/schema.py | 108 +++++++++++++ merlin/models/torch/transforms/tuple.py | 183 ++++++++++++++++++++++ tests/unit/torch/transforms/test_tuple.py | 25 +++ 3 files changed, 316 insertions(+) create mode 100644 merlin/models/torch/transforms/tuple.py create mode 100644 tests/unit/torch/transforms/test_tuple.py diff --git a/merlin/models/torch/schema.py b/merlin/models/torch/schema.py index 937a4a869b..d140eff670 100644 --- a/merlin/models/torch/schema.py +++ b/merlin/models/torch/schema.py @@ -517,6 +517,114 @@ def _(input): @output.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor]) @input.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) @output.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) +@input.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) +@output.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) +@input.register_tensor( + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] +) +@output.register_tensor( + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] +) +@input.register_tensor( + Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ] +) +@output.register_tensor( + Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ] +) +@input.register_tensor( + Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ] +) +@output.register_tensor( + Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ] +) +@input.register_tensor( + Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ] +) +@output.register_tensor( + Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ] +) +@input.register_tensor( + Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ] +) +@output.register_tensor( + Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ] +) def _(input): output = Schema() diff --git a/merlin/models/torch/transforms/tuple.py b/merlin/models/torch/transforms/tuple.py new file mode 100644 index 0000000000..d674bf16dd --- /dev/null +++ b/merlin/models/torch/transforms/tuple.py @@ -0,0 +1,183 @@ +from typing import Dict, List, Optional, Tuple + +import torch +from torch import nn + +from merlin.schema import Schema + + +class _ToTuple(nn.Module): + def __init__(self, input_schema: Optional[Schema] = None): + super().__init__() + if input_schema is not None: + self.setup_schema(input_schema) + + def setup_schema(self, input_schema: Schema): + self._input_schema = input_schema + self._column_names = input_schema.column_names + + def value_list(self, inputs: Dict[str, torch.Tensor]) -> List[torch.Tensor]: + outputs: List[torch.Tensor] = [] + + if not hasattr(self, "_column_names"): + raise RuntimeError("setup_schema() must be called before value_list()") + + for col in self._column_names: + outputs.append(inputs[col]) + + return outputs + + +class ToTuple1(_ToTuple): + """Converts a dictionary of tensors of length=1 to a tuple of tensors.""" + + def forward(self, inputs: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor]: + _list = list(inputs.values()) + return (_list[0],) + + +class ToTuple2(_ToTuple): + """Converts a dictionary of tensors of length=2 to a tuple of tensors.""" + + def forward(self, inputs: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]: + _list = self.value_list(inputs) + return (_list[0], _list[1]) + + +class ToTuple3(_ToTuple): + """Converts a dictionary of tensors of length=3 to a tuple of tensors.""" + + def forward( + self, inputs: Dict[str, torch.Tensor] + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + _list = self.value_list(inputs) + return (_list[0], _list[1], _list[2]) + + +class ToTuple4(_ToTuple): + """Converts a dictionary of tensors of length=4 to a tuple of tensors.""" + + def forward( + self, inputs: Dict[str, torch.Tensor] + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + _list = self.value_list(inputs) + return (_list[0], _list[1], _list[2], _list[3]) + + +class ToTuple5(_ToTuple): + """Converts a dictionary of tensors of length=5 to a tuple of tensors.""" + + def forward( + self, inputs: Dict[str, torch.Tensor] + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + _list = self.value_list(inputs) + return (_list[0], _list[1], _list[2], _list[3], _list[4]) + + +class ToTuple6(_ToTuple): + """Converts a dictionary of tensors of length=6 to a tuple of tensors.""" + + def forward( + self, inputs: Dict[str, torch.Tensor] + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + _list = self.value_list(inputs) + return (_list[0], _list[1], _list[2], _list[3], _list[4], _list[5]) + + +class ToTuple7(_ToTuple): + """Converts a dictionary of tensors of length=7 to a tuple of tensors.""" + + def forward( + self, inputs: Dict[str, torch.Tensor] + ) -> Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ]: + _list = self.value_list(inputs) + return (_list[0], _list[1], _list[2], _list[3], _list[4], _list[5], _list[6]) + + +class ToTuple8(_ToTuple): + """Converts a dictionary of tensors of length=8 to a tuple of tensors.""" + + def forward( + self, inputs: Dict[str, torch.Tensor] + ) -> Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ]: + _list = self.value_list(inputs) + return (_list[0], _list[1], _list[2], _list[3], _list[4], _list[5], _list[6], _list[7]) + + +class ToTuple9(_ToTuple): + """Converts a dictionary of tensors of length=9 to a tuple of tensors.""" + + def forward( + self, inputs: Dict[str, torch.Tensor] + ) -> Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ]: + _list = list(inputs.values()) + return ( + _list[0], + _list[1], + _list[2], + _list[3], + _list[4], + _list[5], + _list[6], + _list[7], + _list[8], + ) + + +class ToTuple10(_ToTuple): + """Converts a dictionary of tensors of length=10 to a tuple of tensors.""" + + def forward( + self, inputs: Dict[str, torch.Tensor] + ) -> Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + ]: + _list = list(inputs.values()) + return ( + _list[0], + _list[1], + _list[2], + _list[3], + _list[4], + _list[5], + _list[6], + _list[7], + _list[8], + _list[9], + ) diff --git a/tests/unit/torch/transforms/test_tuple.py b/tests/unit/torch/transforms/test_tuple.py new file mode 100644 index 0000000000..c5157741c6 --- /dev/null +++ b/tests/unit/torch/transforms/test_tuple.py @@ -0,0 +1,25 @@ +import pytest +import torch + +from merlin.models.torch.transforms import tuple +from merlin.models.torch.utils import module_utils +from merlin.schema import Schema + + +class TestToTuple: + @pytest.mark.parametrize("length", [i + 1 for i in range(10)]) + def test_with_length(self, length): + schema = Schema([str(i) for i in range(length)]) + to_tuple = getattr(tuple, f"ToTuple{length}")(schema) + + inputs = {str(i): torch.randn(2, 3) for i in range(length)} + outputs = module_utils.module_test(to_tuple, inputs) + + assert len(outputs) == length + + def test_exception(self): + to_tuple = tuple.ToTuple2() + + inputs = {"0": torch.randn(2, 3), "1": torch.randn(2, 3)} + with pytest.raises(RuntimeError): + module_utils.module_test(to_tuple, inputs) From e2930f8396920656eebf61f7f612782e978c6cf4 Mon Sep 17 00:00:00 2001 From: Sara Rabhi Date: Tue, 4 Jul 2023 13:58:32 -0400 Subject: [PATCH 04/34] Add padding operator to the PyTorch API (#1177) * add padding op * remove unused mask prefix * Apply suggestions from code review Co-authored-by: Marc Romeyn * Add test for tracing the model with torchscript * fix linting * add module_test to test_padded_targets --------- Co-authored-by: Marc Romeyn Co-authored-by: edknv <109497216+edknv@users.noreply.github.com> --- merlin/models/torch/transforms/sequences.py | 175 ++++++++++++++++++ tests/unit/torch/transforms/test_sequences.py | 97 ++++++++++ 2 files changed, 272 insertions(+) create mode 100644 merlin/models/torch/transforms/sequences.py create mode 100644 tests/unit/torch/transforms/test_sequences.py diff --git a/merlin/models/torch/transforms/sequences.py b/merlin/models/torch/transforms/sequences.py new file mode 100644 index 0000000000..c3343ef85b --- /dev/null +++ b/merlin/models/torch/transforms/sequences.py @@ -0,0 +1,175 @@ +# +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Dict, List, Optional, Union + +import torch +import torch.nn.functional as F +from torch import nn + +from merlin.models.torch.batch import Batch, Sequence +from merlin.schema import Schema, Tags + + +class TabularPadding(nn.Module): + """A PyTorch module for padding tabular sequence data. + + Parameters + ---------- + schema : Schema + The schema of the tabular data, which defines the column names of input features. + max_sequence_length : Optional[int], default=None + The maximum length of the sequences after padding. + If None, sequences will be padded to the maximum length in the current batch. + + Example usage:: + features = { + 'feature1': torch.tensor([[4, 3], [5, 2]), + 'feature2': torch.tensor([[3,8], [7,9]]) + } + schema = Schema(["feature1", "feature2"]) + _max_sequence_length = 10 + padding_op = TabularBatchPadding( + schema=schema, max_sequence_length=_max_sequence_length + ) + padded_batch = padding_op(Batch(feaures)) + + Notes: + - If the schema contains continuous list features, + ensure that they are normalized within the range of [0, 1]. + This is necessary because we will be padding them + to a max_sequence_length using the minimum value of 0.0. + - The current class only supports right padding. + """ + + def __init__( + self, + schema: Schema, + max_sequence_length: Optional[int] = None, + ): + super().__init__() + self.schema = schema + self.max_sequence_length = max_sequence_length + self.features: List[str] = self.schema.column_names + self.sparse_features = self.schema.select_by_tag(Tags.SEQUENCE).column_names + self.padding_idx = 0 + + def forward(self, inputs: Union[torch.Tensor, Dict[str, torch.Tensor]], batch: Batch) -> Batch: + _max_sequence_length = self.max_sequence_length + if not _max_sequence_length: + # Infer the maximum length from the current batch + batch_max_sequence_length = 0 + for key, val in batch.features.items(): + if key.endswith("__offsets"): + offsets = val + max_row_length = int(torch.max(offsets[1:] - offsets[:-1])) + batch_max_sequence_length = max(max_row_length, batch_max_sequence_length) + _max_sequence_length = batch_max_sequence_length + + # Store the non-padded lengths of list features + seq_inputs_lengths = self._get_sequence_lengths(batch.features) + seq_shapes: List[torch.Tensor] = list(seq_inputs_lengths.values()) + if not torch.all(torch.stack([torch.all(x == seq_shapes[0]) for x in seq_shapes])): + raise ValueError( + "The sequential inputs must have the same length for each row in the batch, " + f"but they are different: {seq_shapes}" + ) + # Pad the features of the batch + batch_padded = {} + for key, value in batch.features.items(): + if key.endswith("__offsets"): + col_name = key[: -len("__offsets")] + if col_name in self.features: + padded_values = self._pad_ragged_tensor( + batch.features[f"{col_name}__values"], value, _max_sequence_length + ) + batch_padded[col_name] = padded_values + elif key.endswith("__values"): + continue + else: + col_name = key + if col_name in self.features and seq_inputs_lengths.get(col_name) is not None: + # pad dense list features + batch_padded[col_name] = self._pad_dense_tensor(value, _max_sequence_length) + + # Pad targets of the batch + targets_padded = None + if batch.targets is not None: + targets_padded = {} + for key, value in batch.targets.items(): + if key.endswith("__offsets"): + col_name = key[: -len("__offsets")] + padded_values = self._pad_ragged_tensor( + batch.targets[f"{col_name}__values"], value, _max_sequence_length + ) + targets_padded[col_name] = padded_values + elif key.endswith("__values"): + continue + else: + targets_padded[key] = value + + return Batch( + features=batch_padded, targets=targets_padded, sequences=Sequence(seq_inputs_lengths) + ) + + def _get_sequence_lengths(self, sequences: Dict[str, torch.Tensor]): + """Compute the effective length of each sequence in a dictionary of sequences.""" + seq_inputs_lengths = {} + for key, val in sequences.items(): + if key.endswith("__offsets"): + seq_inputs_lengths[key[: -len("__offsets")]] = val[1:] - val[:-1] + elif key in self.sparse_features: + seq_inputs_lengths[key] = (val != self.padding_idx).sum(-1) + return seq_inputs_lengths + + def _squeeze(self, tensor: torch.Tensor): + """Squeeze a tensor of shape (N,1) to shape (N).""" + if len(tensor.shape) == 2: + return tensor.squeeze(1) + return tensor + + def _get_indices(self, offsets: torch.Tensor, diff_offsets: torch.Tensor): + """Compute indices for a sparse tensor from offsets and their differences.""" + row_ids = torch.arange(len(offsets) - 1, device=offsets.device) + row_ids_repeated = torch.repeat_interleave(row_ids, diff_offsets) + row_offset_repeated = torch.repeat_interleave(offsets[:-1], diff_offsets) + col_ids = ( + torch.arange(len(row_offset_repeated), device=offsets.device) - row_offset_repeated + ) + indices = torch.cat([row_ids_repeated.unsqueeze(-1), col_ids.unsqueeze(-1)], dim=1) + return indices + + def _pad_ragged_tensor(self, values: torch.Tensor, offsets: torch.Tensor, padding_length: int): + """Pad a ragged features represented by "values" and "offsets" to a dense tensor + of length `padding_length`. + """ + values = self._squeeze(values) + offsets = self._squeeze(offsets) + num_rows = len(offsets) - 1 + diff_offsets = offsets[1:] - offsets[:-1] + max_length = int(diff_offsets.max()) + indices = self._get_indices(offsets, diff_offsets) + sparse_tensor = torch.sparse_coo_tensor( + indices.T, values, torch.Size([num_rows, max_length]), device=values.device + ) + + return self._pad_dense_tensor(sparse_tensor.to_dense(), padding_length) + + def _pad_dense_tensor(self, tensor: torch.Tensor, length: int) -> torch.Tensor: + """Pad a dense tensor along its second dimension to a specified length.""" + if len(tensor.shape) == 2: + pad_diff = length - tensor.shape[1] + return F.pad(input=tensor, pad=(0, pad_diff, 0, 0)) + return tensor diff --git a/tests/unit/torch/transforms/test_sequences.py b/tests/unit/torch/transforms/test_sequences.py new file mode 100644 index 0000000000..76286400a9 --- /dev/null +++ b/tests/unit/torch/transforms/test_sequences.py @@ -0,0 +1,97 @@ +from itertools import accumulate + +import pytest +import torch + +from merlin.models.torch.batch import Batch +from merlin.models.torch.transforms.sequences import TabularPadding +from merlin.models.torch.utils import module_utils +from merlin.schema import ColumnSchema, Schema, Tags + + +def _get_values_offsets(data): + values = [] + row_lengths = [] + for row in data: + row_lengths.append(len(row)) + values += row + offsets = [0] + list(accumulate(row_lengths)) + return torch.tensor(values), torch.tensor(offsets) + + +class TestPadBatch: + @pytest.fixture + def sequence_batch(self): + a_values, a_offsets = _get_values_offsets(data=[[1, 2], [], [3, 4, 5]]) + b_values, b_offsets = _get_values_offsets([[34, 30], [], [33, 23, 50]]) + features = { + "a__values": a_values, + "a__offsets": a_offsets, + "b__values": b_values, + "b__offsets": b_offsets, + "c_dense": torch.Tensor([[1, 2, 0], [0, 0, 0], [4, 5, 6]]), + "d_context": torch.Tensor([1, 2, 3]), + } + targets = None + return Batch(features, targets) + + @pytest.fixture + def sequence_schema(self): + return Schema( + [ + ColumnSchema("a", tags=[Tags.SEQUENCE]), + ColumnSchema("b", tags=[Tags.SEQUENCE]), + ColumnSchema("c_dense", tags=[Tags.SEQUENCE]), + ColumnSchema("d_context", tags=[Tags.CONTEXT]), + ] + ) + + def test_padded_features(self, sequence_batch, sequence_schema): + _max_sequence_length = 8 + padding_op = TabularPadding( + schema=sequence_schema, max_sequence_length=_max_sequence_length + ) + padded_batch = module_utils.module_test(padding_op, sequence_batch) + + assert torch.equal(padded_batch.sequences.length("a"), torch.Tensor([2, 0, 3])) + assert set(padded_batch.features.keys()) == set(["a", "b", "c_dense"]) + for feature in ["a", "b", "c_dense"]: + assert padded_batch.features[feature].shape[1] == _max_sequence_length + + def test_batch_invalid_lengths(self): + # Test when targets is not a tensor nor a dictionary of tensors + a_values, a_offsets = _get_values_offsets(data=[[1, 2], [], [3, 4, 5]]) + b_values, b_offsets = _get_values_offsets([[34], [23, 56], [33, 23, 50, 4]]) + + with pytest.raises( + ValueError, + match="The sequential inputs must have the same length for each row in the batch", + ): + padding_op = TabularPadding(schema=Schema(["a", "b"])) + padding_op( + inputs=None, + batch=Batch( + { + "a__values": a_values, + "a__offsets": a_offsets, + "b__values": b_values, + "b__offsets": b_offsets, + } + ), + ) + + def test_padded_targets(self, sequence_batch, sequence_schema): + _max_sequence_length = 8 + target_values, target_offsets = _get_values_offsets([[10, 11], [], [12, 13, 14]]) + sequence_batch.targets = { + "target_1": torch.Tensor([3, 4, 6]), + "target_2__values": target_values, + "target_2__offsets": target_offsets, + } + padding_op = TabularPadding( + schema=sequence_schema, max_sequence_length=_max_sequence_length + ) + padded_batch = module_utils.module_test(padding_op, sequence_batch) + + assert padded_batch.targets["target_2"].shape[1] == _max_sequence_length + assert torch.equal(padded_batch.targets["target_1"], sequence_batch.targets["target_1"]) From a64fd32ebe9353ccb5cb2fa12d16f6a49de969b3 Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Wed, 5 Jul 2023 10:36:36 +0200 Subject: [PATCH 05/34] Adding validation_step and test_step to Model (#1181) --- .gitignore | 3 +++ merlin/models/torch/models/base.py | 19 +++++++++++++++++++ tests/unit/torch/models/test_base.py | 10 ++++++++-- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index e5f5cb5bcc..ed7bef7e91 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,6 @@ dmypy.json # Experiment files _test.py + +# Lightning +**/lightning_logs/ \ No newline at end of file diff --git a/merlin/models/torch/models/base.py b/merlin/models/torch/models/base.py index df1826746c..9bf7271dfc 100644 --- a/merlin/models/torch/models/base.py +++ b/merlin/models/torch/models/base.py @@ -102,6 +102,25 @@ def training_step(self, batch, batch_idx): return loss_and_metrics["loss"] + def validation_step(self, batch, batch_idx): + return self._val_step(batch, batch_idx, type="val") + + def test_step(self, batch, batch_idx): + return self._val_step(batch, batch_idx, type="test") + + def _val_step(self, batch, batch_idx, type="val"): + del batch_idx + if not isinstance(batch, Batch): + batch = Batch(features=batch[0], targets=batch[1]) + + predictions = self(batch.features, batch=batch) + + loss_and_metrics = compute_loss(predictions, batch.targets, self.model_outputs()) + for name, value in loss_and_metrics.items(): + self.log(f"{type}_{name}", value) + + return loss_and_metrics + def configure_optimizers(self): """Configures the optimizer for the model.""" return self.optimizer(self.parameters()) diff --git a/tests/unit/torch/models/test_base.py b/tests/unit/torch/models/test_base.py index 9abfe2eef7..2ee931989d 100644 --- a/tests/unit/torch/models/test_base.py +++ b/tests/unit/torch/models/test_base.py @@ -128,7 +128,7 @@ def test_training_step_values(self): expected_loss = nn.BCELoss()(expected_outputs, targets["target"]) assert torch.allclose(loss, expected_loss) - def test_training_step_with_dataloader(self): + def test_step_with_dataloader(self): model = mm.Model( mm.Concat(), mm.BinaryOutput(ColumnSchema("target")), @@ -144,8 +144,11 @@ def test_training_step_with_dataloader(self): loss = model.training_step(batch, 0) assert loss > 0.0 + assert torch.equal( + model.validation_step(batch, 0)["loss"], model.test_step(batch, 0)["loss"] + ) - def test_training_step_with_batch(self): + def test_step_with_batch(self): model = mm.Model( mm.Concat(), mm.BinaryOutput(ColumnSchema("target")), @@ -156,6 +159,9 @@ def test_training_step_with_batch(self): model.initialize(batch) loss = model.training_step(batch, 0) assert loss > 0.0 + assert torch.equal( + model.validation_step(batch, 0)["loss"], model.test_step(batch, 0)["loss"] + ) def test_training_step_missing_output(self): model = mm.Model(mm.Block()) From eddc2ab6212ef10ccee9f9b304887300a28f03e8 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy Date: Wed, 5 Jul 2023 13:43:42 +0100 Subject: [PATCH 06/34] Rename Schema Inspection names in the torch schema module (#1179) * Create input_schema and output_schema methods on Block * Expose schema functions from merlin.models.torch * Rename input/output schema and add to top level torch module * Revert input/output schema methods on Block * Remove whitespace form torch `__init__.py` * Re-format schema.py --- merlin/models/torch/__init__.py | 9 +++ merlin/models/torch/batch.py | 6 +- merlin/models/torch/block.py | 26 ++++----- merlin/models/torch/blocks/mlp.py | 12 ++-- merlin/models/torch/inputs/select.py | 4 +- merlin/models/torch/schema.py | 74 +++++++++++++------------ tests/unit/torch/inputs/test_select.py | 4 +- tests/unit/torch/inputs/test_tabular.py | 14 ++--- tests/unit/torch/models/test_base.py | 4 +- tests/unit/torch/test_block.py | 12 ++-- tests/unit/torch/test_router.py | 2 +- tests/unit/torch/test_schema.py | 12 ++-- 12 files changed, 96 insertions(+), 83 deletions(-) diff --git a/merlin/models/torch/__init__.py b/merlin/models/torch/__init__.py index 025c8ba0dc..603d6e2892 100644 --- a/merlin/models/torch/__init__.py +++ b/merlin/models/torch/__init__.py @@ -31,6 +31,11 @@ from merlin.models.torch.router import RouterBlock from merlin.models.torch.transforms.agg import Concat, Stack +input_schema = schema.input_schema +output_schema = schema.output_schema +target_schema = schema.target_schema +feature_schema = schema.feature_schema + __all__ = [ "Batch", "BinaryOutput", @@ -55,6 +60,10 @@ "Concat", "Stack", "schema", + "input_schema", + "output_schema", + "feature_schema", + "target_schema", "DLRMBlock", "DLRMModel", ] diff --git a/merlin/models/torch/batch.py b/merlin/models/torch/batch.py index 72d813c00c..14e21ec5af 100644 --- a/merlin/models/torch/batch.py +++ b/merlin/models/torch/batch.py @@ -375,10 +375,10 @@ def sample_features( return sample_batch(data, batch_size, shuffle).features -@schema.output.register_tensor(Batch) +@schema.output_schema.register_tensor(Batch) def _(input): output_schema = Schema() - output_schema += schema.output.tensors(input.features) - output_schema += schema.output.tensors(input.targets) + output_schema += schema.output_schema.tensors(input.features) + output_schema += schema.output_schema.tensors(input.targets) return output_schema diff --git a/merlin/models/torch/block.py b/merlin/models/torch/block.py index 42dede5b9b..cf5bea6f29 100644 --- a/merlin/models/torch/block.py +++ b/merlin/models/torch/block.py @@ -588,31 +588,31 @@ def set_pre(module: nn.Module, pre: BlockContainer): return set_pre(module[0], pre) -@schema.input.register(BlockContainer) +@schema.input_schema.register(BlockContainer) def _(module: BlockContainer, input: Schema): - return schema.input(module[0], input) if module else input + return schema.input_schema(module[0], input) if module else input -@schema.input.register(ParallelBlock) +@schema.input_schema.register(ParallelBlock) def _(module: ParallelBlock, input: Schema): if module.pre: - return schema.input(module.pre) + return schema.input_schema(module.pre) out_schema = Schema() for branch in module.branches.values(): - out_schema += schema.input(branch, input) + out_schema += schema.input_schema(branch, input) return out_schema -@schema.output.register(ParallelBlock) +@schema.output_schema.register(ParallelBlock) def _(module: ParallelBlock, input: Schema): if module.post: - return schema.output(module.post, input) + return schema.output_schema(module.post, input) output = Schema() for name, branch in module.branches.items(): - branch_schema = schema.output(branch, input) + branch_schema = schema.output_schema(branch, input) if len(branch_schema) == 1 and branch_schema.first.name == "output": branch_schema = Schema([branch_schema.first.with_name(name)]) @@ -622,9 +622,9 @@ def _(module: ParallelBlock, input: Schema): return output -@schema.output.register(BlockContainer) +@schema.output_schema.register(BlockContainer) def _(module: BlockContainer, input: Schema): - return schema.output(module[-1], input) if module else input + return schema.output_schema(module[-1], input) if module else input BlockT = TypeVar("BlockT", bound=BlockContainer) @@ -720,13 +720,13 @@ def _extract_block(main, selection, route, name=None): if isinstance(main, ParallelBlock): return _extract_parallel(main, selection, route=route, name=name) - main_schema = schema.input(main) - route_schema = schema.input(route) + main_schema = schema.input_schema(main) + route_schema = schema.input_schema(route) if main_schema == route_schema: from merlin.models.torch.inputs.select import SelectFeatures - out_schema = schema.output(main, main_schema) + out_schema = schema.output_schema(main, main_schema) if len(out_schema) == 1 and out_schema.first.name == "output": out_schema = Schema([out_schema.first.with_name(name)]) diff --git a/merlin/models/torch/blocks/mlp.py b/merlin/models/torch/blocks/mlp.py index e7e9c1334d..8038dc89f7 100644 --- a/merlin/models/torch/blocks/mlp.py +++ b/merlin/models/torch/blocks/mlp.py @@ -4,7 +4,7 @@ from torch import nn from merlin.models.torch.block import Block -from merlin.models.torch.schema import Schema, output +from merlin.models.torch.schema import Schema, output_schema from merlin.models.torch.transforms.agg import Concat, MaybeAgg @@ -84,8 +84,8 @@ def __init__( super().__init__(*modules) -@output.register(nn.LazyLinear) -@output.register(nn.Linear) -@output.register(MLPBlock) -def _output_schema_block(module: nn.LazyLinear, input: Schema): - return output.tensors(torch.ones((1, module.out_features), dtype=float)) +@output_schema.register(nn.LazyLinear) +@output_schema.register(nn.Linear) +@output_schema.register(MLPBlock) +def _output_schema_block(module: nn.LazyLinear, inputs: Schema): + return output_schema.tensors(torch.ones((1, module.out_features), dtype=float)) diff --git a/merlin/models/torch/inputs/select.py b/merlin/models/torch/inputs/select.py index 6456e807a6..a2c04261af 100644 --- a/merlin/models/torch/inputs/select.py +++ b/merlin/models/torch/inputs/select.py @@ -201,8 +201,8 @@ def forward(self, inputs, batch: Batch) -> Dict[str, torch.Tensor]: @schema.extract.register(SelectKeys) def _(main, selection, route, name=None): - main_schema = schema.input(main) - route_schema = schema.input(route) + main_schema = schema.input_schema(main) + route_schema = schema.input_schema(route) diff = main_schema.excluding_by_name(route_schema.column_names) diff --git a/merlin/models/torch/schema.py b/merlin/models/torch/schema.py index d140eff670..f76e99f91d 100644 --- a/merlin/models/torch/schema.py +++ b/merlin/models/torch/schema.py @@ -97,7 +97,7 @@ def __call__(self, module: nn.Module, inputs: Optional[Schema] = None) -> Schema return super().__call__(module, inputs) except NotImplementedError: raise ValueError( - f"Could not get output schema of {module} " "please call mm.trace_schema first." + f"Could not get output schema of {module} " "please call `mm.schema.trace` first." ) def trace( @@ -127,7 +127,7 @@ def _func(module: nn.Module, input: Schema) -> Schema: def __call__(self, module: nn.Module, inputs: Optional[Schema] = None) -> Schema: try: - _inputs = input(module) + _inputs = input_schema(module) inputs = _inputs except ValueError: pass @@ -156,7 +156,7 @@ def __call__(self, module: nn.Module, inputs: Optional[Schema] = None) -> Schema return super().__call__(module, inputs) except NotImplementedError: raise ValueError( - f"Could not get output schema of {module} " "please call mm.trace_schema first." + f"Could not get output schema of {module} " "please call `mm.schema.trace` first." ) def trace( @@ -165,7 +165,7 @@ def trace( inputs: Union[torch.Tensor, Dict[str, torch.Tensor], Schema], outputs: Union[torch.Tensor, Dict[str, torch.Tensor], Schema], ) -> Schema: - _input_schema = input.get_schema(inputs) + _input_schema = input_schema.get_schema(inputs) _output_schema = self.get_schema(outputs) try: @@ -207,8 +207,8 @@ def extract(self, module: nn.Module, selection: Selection, route: nn.Module, nam return fn(module, selection, route, name=name) -input = _InputSchemaDispatch("input_schema") -output = _OutputSchemaDispatch("output_schema") +input_schema = _InputSchemaDispatch("input_schema") +output_schema = _OutputSchemaDispatch("output_schema") select = _SelectDispatch("selection") extract = _ExtractDispatch("extract") @@ -240,13 +240,13 @@ def _hook(mod: nn.Module, inputs: Tuple[torch.Tensor], outputs: torch.Tensor): mod.__input_schemas = () mod.__output_schemas = () - _input_schema = input.trace(mod, inputs[0]) + _input_schema = input_schema.trace(mod, inputs[0]) if _input_schema not in mod.__input_schemas: mod.__input_schemas += (_input_schema,) - mod.__output_schemas += (output.trace(mod, _input_schema, outputs),) + mod.__output_schemas += (output_schema.trace(mod, _input_schema, outputs),) def add_hook(m): - custom_modules = list(output.dispatcher.registry.keys()) + custom_modules = list(output_schema.dispatcher.registry.keys()) if m and isinstance(m, tuple(custom_modules[1:])): return @@ -261,7 +261,7 @@ def add_hook(m): return module_out -def features(module: nn.Module) -> Schema: +def feature_schema(module: nn.Module) -> Schema: """Extract the feature schema from a PyTorch Module. This function operates by applying the `get_feature_schema` method @@ -293,7 +293,7 @@ def get_feature_schema(module): return feature_schema -def targets(module: nn.Module) -> Schema: +def target_schema(module: nn.Module) -> Schema: """ Extract the target schema from a PyTorch Module. @@ -484,7 +484,7 @@ def select(self, selection: Selection) -> "Selectable": raise NotImplementedError() -@output.register_tensor(torch.Tensor) +@output_schema.register_tensor(torch.Tensor) def _tensor_to_schema(input, name="output"): kwargs = dict(dims=input.shape[1:], dtype=input.dtype) @@ -494,13 +494,13 @@ def _tensor_to_schema(input, name="output"): return Schema([ColumnSchema(name, **kwargs)]) -@input.register_tensor(torch.Tensor) +@input_schema.register_tensor(torch.Tensor) def _(input): return _tensor_to_schema(input, "input") -@input.register_tensor(Dict[str, torch.Tensor]) -@output.register_tensor(Dict[str, torch.Tensor]) +@input_schema.register_tensor(Dict[str, torch.Tensor]) +@output_schema.register_tensor(Dict[str, torch.Tensor]) def _(input): output = Schema() for k, v in sorted(input.items()): @@ -509,23 +509,27 @@ def _(input): return output -@input.register_tensor(Tuple[torch.Tensor]) -@output.register_tensor(Tuple[torch.Tensor]) -@input.register_tensor(Tuple[torch.Tensor, torch.Tensor]) -@output.register_tensor(Tuple[torch.Tensor, torch.Tensor]) -@input.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor]) -@output.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor]) -@input.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) -@output.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) -@input.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) -@output.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) -@input.register_tensor( +@input_schema.register_tensor(Tuple[torch.Tensor]) +@output_schema.register_tensor(Tuple[torch.Tensor]) +@input_schema.register_tensor(Tuple[torch.Tensor, torch.Tensor]) +@output_schema.register_tensor(Tuple[torch.Tensor, torch.Tensor]) +@input_schema.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor]) +@output_schema.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor]) +@input_schema.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) +@output_schema.register_tensor(Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]) +@input_schema.register_tensor( + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] +) +@output_schema.register_tensor( + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] +) +@input_schema.register_tensor( Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] ) -@output.register_tensor( +@output_schema.register_tensor( Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] ) -@input.register_tensor( +@input_schema.register_tensor( Tuple[ torch.Tensor, torch.Tensor, @@ -536,7 +540,7 @@ def _(input): torch.Tensor, ] ) -@output.register_tensor( +@output_schema.register_tensor( Tuple[ torch.Tensor, torch.Tensor, @@ -547,7 +551,7 @@ def _(input): torch.Tensor, ] ) -@input.register_tensor( +@input_schema.register_tensor( Tuple[ torch.Tensor, torch.Tensor, @@ -559,7 +563,7 @@ def _(input): torch.Tensor, ] ) -@output.register_tensor( +@output_schema.register_tensor( Tuple[ torch.Tensor, torch.Tensor, @@ -571,7 +575,7 @@ def _(input): torch.Tensor, ] ) -@input.register_tensor( +@input_schema.register_tensor( Tuple[ torch.Tensor, torch.Tensor, @@ -584,7 +588,7 @@ def _(input): torch.Tensor, ] ) -@output.register_tensor( +@output_schema.register_tensor( Tuple[ torch.Tensor, torch.Tensor, @@ -597,7 +601,7 @@ def _(input): torch.Tensor, ] ) -@input.register_tensor( +@input_schema.register_tensor( Tuple[ torch.Tensor, torch.Tensor, @@ -611,7 +615,7 @@ def _(input): torch.Tensor, ] ) -@output.register_tensor( +@output_schema.register_tensor( Tuple[ torch.Tensor, torch.Tensor, diff --git a/tests/unit/torch/inputs/test_select.py b/tests/unit/torch/inputs/test_select.py index 37c15c784f..7dcac2114e 100644 --- a/tests/unit/torch/inputs/test_select.py +++ b/tests/unit/torch/inputs/test_select.py @@ -72,8 +72,8 @@ def test_forward(self): outputs = mm.schema.trace(block, self.batch.features["session_id"], batch=self.batch) assert len(outputs) == 5 - assert mm.schema.input(block).column_names == ["input"] - assert mm.schema.features(block).column_names == [ + assert mm.input_schema(block).column_names == ["input"] + assert mm.feature_schema(block).column_names == [ "user_id", "country", "user_age", diff --git a/tests/unit/torch/inputs/test_tabular.py b/tests/unit/torch/inputs/test_tabular.py index a3ee2fb0a3..e81fe44ce4 100644 --- a/tests/unit/torch/inputs/test_tabular.py +++ b/tests/unit/torch/inputs/test_tabular.py @@ -68,27 +68,27 @@ def test_extract_route_two_tower(self): "item_recency", "item_genres", } - assert set(mm.schema.input(towers).column_names) == input_cols - assert mm.schema.output(towers).column_names == ["user", "item"] + assert set(mm.input_schema(towers).column_names) == input_cols + assert mm.output_schema(towers).column_names == ["user", "item"] categorical = towers.select(Tags.CATEGORICAL) outputs = module_utils.module_test(towers, self.batch) assert mm.schema.extract(towers, Tags.CATEGORICAL)[1] == categorical - assert set(mm.schema.input(towers).column_names) == input_cols - assert mm.schema.output(towers).column_names == ["user", "item"] + assert set(mm.input_schema(towers).column_names) == input_cols + assert mm.output_schema(towers).column_names == ["user", "item"] outputs = towers(self.batch.features) assert outputs["user"].shape == (10, 10) assert outputs["item"].shape == (10, 10) new_inputs, route = mm.schema.extract(towers, Tags.USER) - assert mm.schema.output(new_inputs).column_names == ["user", "item"] + assert mm.output_schema(new_inputs).column_names == ["user", "item"] assert "user" in new_inputs.branches assert new_inputs.branches["user"][0].select_keys.column_names == ["user"] assert "user" in route.branches - assert mm.schema.output(route).select_by_tag(Tags.EMBEDDING).column_names == ["user"] + assert mm.output_schema(route).select_by_tag(Tags.EMBEDDING).column_names == ["user"] def test_extract_route_embeddings(self): input_block = mm.TabularInputBlock(self.schema, init="defaults", agg="concat") @@ -97,7 +97,7 @@ def test_extract_route_embeddings(self): assert outputs.shape == (10, 107) no_embs, emb_route = mm.schema.extract(input_block, Tags.CATEGORICAL) - output_schema = mm.schema.output(emb_route) + output_schema = mm.output_schema(emb_route) assert len(output_schema.select_by_tag(Tags.USER)) == 3 assert len(output_schema.select_by_tag(Tags.ITEM)) == 3 diff --git a/tests/unit/torch/models/test_base.py b/tests/unit/torch/models/test_base.py index 2ee931989d..c67c65f2f8 100644 --- a/tests/unit/torch/models/test_base.py +++ b/tests/unit/torch/models/test_base.py @@ -197,7 +197,7 @@ def test_output_schema(self): "b": torch.tensor([[5.0, 6.0], [7.0, 8.0]]), } outputs = mm.schema.trace(model, inputs) - schema = mm.schema.output(model) + schema = mm.output_schema(model) for name in outputs: assert name in schema.column_names assert schema[name].dtype.name == str(outputs[name].dtype).split(".")[-1] @@ -205,7 +205,7 @@ def test_output_schema(self): def test_no_output_schema(self): model = mm.Model(PlusOne()) with pytest.raises(ValueError, match="Could not get output schema of PlusOne()"): - mm.schema.output(model) + mm.output_schema(model) def test_train_classification_with_lightning_trainer(self, music_streaming_data, batch_size=16): schema = music_streaming_data.schema.select_by_name( diff --git a/tests/unit/torch/test_block.py b/tests/unit/torch/test_block.py index ea36aaa412..02eb342f61 100644 --- a/tests/unit/torch/test_block.py +++ b/tests/unit/torch/test_block.py @@ -57,7 +57,7 @@ def test_identity(self): outputs = module_utils.module_test(block, inputs, batch=Batch(inputs)) assert torch.equal(inputs, outputs) - assert mm.schema.output(block) == mm.schema.output.tensors(inputs) + assert mm.output_schema(block) == mm.output_schema.tensors(inputs) def test_insertion(self): block = Block() @@ -158,7 +158,7 @@ def test_schema_tracking(self): inputs = torch.randn(1, 3) outputs = mm.schema.trace(pb, inputs) - schema = mm.schema.output(pb) + schema = mm.output_schema(pb) for name in outputs: assert name in schema.column_names @@ -258,9 +258,9 @@ def test_set_pre(self): def test_input_schema_pre(self): pb = ParallelBlock({"a": PlusOne(), "b": PlusOne()}) outputs = mm.schema.trace(pb, torch.randn(1, 3)) - input_schema = mm.schema.input(pb) + input_schema = mm.input_schema(pb) assert len(input_schema) == 1 - assert len(mm.schema.output(pb)) == 2 + assert len(mm.output_schema(pb)) == 2 assert len(outputs) == 2 pb2 = ParallelBlock({"a": PlusOne(), "b": PlusOne()}) @@ -270,8 +270,8 @@ def test_input_schema_pre(self): assert get_pre(pb2)[0] == pb pb2.append(pb) - assert input_schema == mm.schema.input(pb2) - assert mm.schema.output(pb2) == mm.schema.output(pb) + assert input_schema == mm.input_schema(pb2) + assert mm.output_schema(pb2) == mm.output_schema(pb) def test_leaf(self): block = ParallelBlock({"a": PlusOne()}) diff --git a/tests/unit/torch/test_router.py b/tests/unit/torch/test_router.py index 89f9292f6c..76459ea8db 100644 --- a/tests/unit/torch/test_router.py +++ b/tests/unit/torch/test_router.py @@ -162,4 +162,4 @@ def test_nested(self): outputs = module_utils.module_test(nested, self.batch.features) assert list(outputs.keys()) == ["user_age"] - assert "user_age" in mm.schema.output(nested).column_names + assert "user_age" in mm.output_schema(nested).column_names diff --git a/tests/unit/torch/test_schema.py b/tests/unit/torch/test_schema.py index d6919c2617..78ca1811ec 100644 --- a/tests/unit/torch/test_schema.py +++ b/tests/unit/torch/test_schema.py @@ -19,11 +19,11 @@ from merlin.models.torch.schema import ( Selectable, - features, + feature_schema, select, select_schema, selection_name, - targets, + target_schema, ) from merlin.schema import ColumnSchema, Schema, Tags @@ -122,8 +122,8 @@ def test_features(self): schema = Schema([ColumnSchema("a"), ColumnSchema("b")]) module = MockModule(feature_schema=schema) - assert features(module) == schema - assert targets(module) == Schema() + assert feature_schema(module) == schema + assert target_schema(module) == Schema() class TestTargets: @@ -131,5 +131,5 @@ def test_targets(self): schema = Schema([ColumnSchema("a"), ColumnSchema("b")]) module = MockModule(target_schema=schema) - assert targets(module) == schema - assert features(module) == Schema() + assert target_schema(module) == schema + assert feature_schema(module) == Schema() From afc1149c71472f1d64f8a93b6c6d61e11e34e08b Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Wed, 5 Jul 2023 16:33:04 +0200 Subject: [PATCH 07/34] Adding ToTuple function that uses the length of the schema to get the right ToTupleModule (#1182) --- merlin/models/torch/transforms/tuple.py | 66 +++++++++++++++++++---- tests/unit/torch/transforms/test_tuple.py | 7 ++- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/merlin/models/torch/transforms/tuple.py b/merlin/models/torch/transforms/tuple.py index d674bf16dd..0f9d20913e 100644 --- a/merlin/models/torch/transforms/tuple.py +++ b/merlin/models/torch/transforms/tuple.py @@ -1,3 +1,4 @@ +import sys from typing import Dict, List, Optional, Tuple import torch @@ -6,7 +7,50 @@ from merlin.schema import Schema -class _ToTuple(nn.Module): +def ToTuple(schema: Schema) -> "ToTupleModule": + """ + Creates a ToTupleModule for a given schema. + + This function is especially useful for serving models with Triton, + as Triton doesn't allow models that output a dictionary. Instead, + by using this function, models can be modified to output tuples. + + Parameters + ---------- + schema : Schema + Input schema for which a ToTupleModule is to be created. + + Returns + ------- + ToTupleModule + A ToTupleModule corresponding to the length of the given schema. + The output can vary from ToTuple1 to ToTuple10. + + Raises + ------ + ValueError + If the length of the schema is more than 10, + a ValueError is raised with an appropriate error message. + + Example usage :: + >>> import torch + >>> schema = Schema(["a", "b", "c"]) + >>> ToTupleModule = ToTuple(schema) + >>> tensor_dict = {'a': torch.tensor([1]), 'b': torch.tensor([2.]), 'c': torch.tensor([2.])} + >>> output = ToTupleModule(tensor_dict) + >>> print(output) + (tensor([1]), tensor([2.]), tensor([2.])) + """ + schema_length = len(schema) + + if schema_length <= 10: + ToTupleClass = getattr(sys.modules[__name__], f"ToTuple{schema_length}") + return ToTupleClass(input_schema=schema) + else: + raise ValueError(f"Cannot convert schema of length {schema_length} to a tuple") + + +class ToTupleModule(nn.Module): def __init__(self, input_schema: Optional[Schema] = None): super().__init__() if input_schema is not None: @@ -28,7 +72,7 @@ def value_list(self, inputs: Dict[str, torch.Tensor]) -> List[torch.Tensor]: return outputs -class ToTuple1(_ToTuple): +class ToTuple1(ToTupleModule): """Converts a dictionary of tensors of length=1 to a tuple of tensors.""" def forward(self, inputs: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor]: @@ -36,7 +80,7 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor]: return (_list[0],) -class ToTuple2(_ToTuple): +class ToTuple2(ToTupleModule): """Converts a dictionary of tensors of length=2 to a tuple of tensors.""" def forward(self, inputs: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]: @@ -44,7 +88,7 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, torch. return (_list[0], _list[1]) -class ToTuple3(_ToTuple): +class ToTuple3(ToTupleModule): """Converts a dictionary of tensors of length=3 to a tuple of tensors.""" def forward( @@ -54,7 +98,7 @@ def forward( return (_list[0], _list[1], _list[2]) -class ToTuple4(_ToTuple): +class ToTuple4(ToTupleModule): """Converts a dictionary of tensors of length=4 to a tuple of tensors.""" def forward( @@ -64,7 +108,7 @@ def forward( return (_list[0], _list[1], _list[2], _list[3]) -class ToTuple5(_ToTuple): +class ToTuple5(ToTupleModule): """Converts a dictionary of tensors of length=5 to a tuple of tensors.""" def forward( @@ -74,7 +118,7 @@ def forward( return (_list[0], _list[1], _list[2], _list[3], _list[4]) -class ToTuple6(_ToTuple): +class ToTuple6(ToTupleModule): """Converts a dictionary of tensors of length=6 to a tuple of tensors.""" def forward( @@ -84,7 +128,7 @@ def forward( return (_list[0], _list[1], _list[2], _list[3], _list[4], _list[5]) -class ToTuple7(_ToTuple): +class ToTuple7(ToTupleModule): """Converts a dictionary of tensors of length=7 to a tuple of tensors.""" def forward( @@ -102,7 +146,7 @@ def forward( return (_list[0], _list[1], _list[2], _list[3], _list[4], _list[5], _list[6]) -class ToTuple8(_ToTuple): +class ToTuple8(ToTupleModule): """Converts a dictionary of tensors of length=8 to a tuple of tensors.""" def forward( @@ -121,7 +165,7 @@ def forward( return (_list[0], _list[1], _list[2], _list[3], _list[4], _list[5], _list[6], _list[7]) -class ToTuple9(_ToTuple): +class ToTuple9(ToTupleModule): """Converts a dictionary of tensors of length=9 to a tuple of tensors.""" def forward( @@ -151,7 +195,7 @@ def forward( ) -class ToTuple10(_ToTuple): +class ToTuple10(ToTupleModule): """Converts a dictionary of tensors of length=10 to a tuple of tensors.""" def forward( diff --git a/tests/unit/torch/transforms/test_tuple.py b/tests/unit/torch/transforms/test_tuple.py index c5157741c6..05dd4a6f2c 100644 --- a/tests/unit/torch/transforms/test_tuple.py +++ b/tests/unit/torch/transforms/test_tuple.py @@ -10,7 +10,8 @@ class TestToTuple: @pytest.mark.parametrize("length", [i + 1 for i in range(10)]) def test_with_length(self, length): schema = Schema([str(i) for i in range(length)]) - to_tuple = getattr(tuple, f"ToTuple{length}")(schema) + to_tuple = tuple.ToTuple(schema) + assert isinstance(to_tuple, getattr(tuple, f"ToTuple{length}")) inputs = {str(i): torch.randn(2, 3) for i in range(length)} outputs = module_utils.module_test(to_tuple, inputs) @@ -18,8 +19,10 @@ def test_with_length(self, length): assert len(outputs) == length def test_exception(self): - to_tuple = tuple.ToTuple2() + with pytest.raises(ValueError): + tuple.ToTuple(Schema([str(i) for i in range(11)])) + to_tuple = tuple.ToTuple2() inputs = {"0": torch.randn(2, 3), "1": torch.randn(2, 3)} with pytest.raises(RuntimeError): module_utils.module_test(to_tuple, inputs) From 8c6bf054b9631bc1edde7a134b1f6f7ec529003a Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Wed, 5 Jul 2023 18:07:01 +0200 Subject: [PATCH 08/34] Adding ElementWiseSum (#1183) --- merlin/models/torch/transforms/agg.py | 47 +++++++++++++++++++++++++ tests/unit/torch/transforms/test_agg.py | 26 +++++++++++++- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/merlin/models/torch/transforms/agg.py b/merlin/models/torch/transforms/agg.py index 552fcf1d32..f3b1159899 100644 --- a/merlin/models/torch/transforms/agg.py +++ b/merlin/models/torch/transforms/agg.py @@ -193,6 +193,53 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor: return torch.stack(sorted_tensors, dim=self.dim).float() +@registry.register("element-wise-sum") +class ElementWiseSum(AggModule): + """Element-wise sum of tensors. + + The input dictionary will be sorted by name before concatenation. + The sum is computed along the first dimension (default for Stack class). + + Example usage:: + >>> ewsum = ElementWiseSum() + >>> feature1 = torch.tensor([[1, 2], [3, 4]]) # Shape: [batch_size, feature_dim] + >>> feature2 = torch.tensor([[5, 6], [7, 8]]) # Shape: [batch_size, feature_dim] + >>> input_dict = {"feature1": feature1, "feature2": feature2} + >>> output = ewsum(input_dict) + >>> print(output) + tensor([[ 6, 8], + [10, 12]]) # Shape: [batch_size, feature_dim] + + """ + + def __init__(self): + super().__init__() + self.stack = Stack(dim=0) + + def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor: + """ + Performs an element-wise sum of input tensors. + + Parameters + ---------- + inputs : Dict[str, torch.Tensor] + A dictionary where keys are the names of the tensors + and values are the tensors to be summed. + + Returns + ------- + torch.Tensor + A tensor that is the result of performing an element-wise sum + of the input tensors. + + Raises + ------ + RuntimeError + If the input tensor shapes don't match for stacking. + """ + return self.stack(inputs).sum(dim=0) + + class MaybeAgg(BlockContainer): """ This class is designed to conditionally apply an aggregation operation diff --git a/tests/unit/torch/transforms/test_agg.py b/tests/unit/torch/transforms/test_agg.py index 597226fb96..ae0177526a 100644 --- a/tests/unit/torch/transforms/test_agg.py +++ b/tests/unit/torch/transforms/test_agg.py @@ -2,7 +2,7 @@ import torch from merlin.models.torch.block import Block -from merlin.models.torch.transforms.agg import Concat, MaybeAgg, Stack +from merlin.models.torch.transforms.agg import Concat, ElementWiseSum, MaybeAgg, Stack from merlin.models.torch.utils import module_utils from merlin.schema import Schema @@ -95,6 +95,30 @@ def test_from_registry(self): assert output.shape == (2, 2, 3) +class TestElementWiseSum: + def setup_class(self): + self.ewsum = ElementWiseSum() + self.feature1 = torch.tensor([[1, 2], [3, 4]]) # Shape: [batch_size, feature_dim] + self.feature2 = torch.tensor([[5, 6], [7, 8]]) # Shape: [batch_size, feature_dim] + self.input_dict = {"feature1": self.feature1, "feature2": self.feature2} + + def test_forward(self): + output = self.ewsum(self.input_dict) + expected_output = torch.tensor([[6, 8], [10, 12]]) # Shape: [batch_size, feature_dim] + assert torch.equal(output, expected_output) + + def test_input_tensor_shape_mismatch(self): + feature_mismatch = torch.tensor([1, 2, 3]) # Different shape + input_dict_mismatch = {"feature1": self.feature1, "feature_mismatch": feature_mismatch} + with pytest.raises(RuntimeError): + self.ewsum(input_dict_mismatch) + + def test_empty_input_dict(self): + empty_dict = {} + with pytest.raises(RuntimeError): + self.ewsum(empty_dict) + + class TestMaybeAgg: def test_with_single_tensor(self): tensor = torch.tensor([1, 2, 3]) From 83211414c54d1462d6b82bf366a1f3b48bc40d18 Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Wed, 5 Jul 2023 19:25:22 +0200 Subject: [PATCH 09/34] Add CategoricalOutput (#1158) * Adding extra test for leaf * Some fixes * First commit * Adding logits_temperature scaling * move schema and metrics from output base to children * add docstrings * delay class initialization in add_route_for_each * add unit tests * add docstrings * add more unit tests * lint * 100% test coverage in torch/outputs * In-complete commit * First pass over proposed-API for weight-tying * Increase test-coverage for new design w.r.t. weight-tying * Fixing rebase bugs * Change default metrics of CategoricalOutput to retrieval-metrics * Running linting * Remove default_metrics method from RegressionOutput * Fixing metrics of BinaryOutput * Fixing contents of ValueError in feature_weights * Fixing failing tests * Fixing failing tests * Fixing failing tests --------- Co-authored-by: edknv Co-authored-by: edknv <109497216+edknv@users.noreply.github.com> --- merlin/models/torch/__init__.py | 11 +- merlin/models/torch/inputs/embedding.py | 8 + merlin/models/torch/outputs/base.py | 37 +- merlin/models/torch/outputs/classification.py | 395 +++++++++++++++++- merlin/models/torch/outputs/regression.py | 8 +- merlin/models/torch/outputs/tabular.py | 6 +- merlin/models/torch/router.py | 5 +- merlin/models/torch/transforms/bias.py | 52 +++ merlin/models/torch/utils/traversal_utils.py | 3 + tests/unit/torch/outputs/test_base.py | 41 +- .../unit/torch/outputs/test_classification.py | 197 ++++++++- tests/unit/torch/outputs/test_regression.py | 3 +- tests/unit/torch/outputs/test_tabular.py | 37 +- tests/unit/torch/transforms/test_bias.py | 36 ++ .../unit/torch/utils/test_traversal_utils.py | 8 +- 15 files changed, 790 insertions(+), 57 deletions(-) create mode 100644 merlin/models/torch/transforms/bias.py create mode 100644 tests/unit/torch/transforms/test_bias.py diff --git a/merlin/models/torch/__init__.py b/merlin/models/torch/__init__.py index 603d6e2892..293be536e2 100644 --- a/merlin/models/torch/__init__.py +++ b/merlin/models/torch/__init__.py @@ -25,7 +25,12 @@ from merlin.models.torch.models.base import Model from merlin.models.torch.models.ranking import DLRMModel from merlin.models.torch.outputs.base import ModelOutput -from merlin.models.torch.outputs.classification import BinaryOutput +from merlin.models.torch.outputs.classification import ( + BinaryOutput, + CategoricalOutput, + CategoricalTarget, + EmbeddingTablePrediction, +) from merlin.models.torch.outputs.regression import RegressionOutput from merlin.models.torch.outputs.tabular import TabularOutputBlock from merlin.models.torch.router import RouterBlock @@ -40,6 +45,7 @@ "Batch", "BinaryOutput", "Block", + "DLRMBlock", "MLPBlock", "Model", "EmbeddingTable", @@ -60,6 +66,9 @@ "Concat", "Stack", "schema", + "CategoricalOutput", + "CategoricalTarget", + "EmbeddingTablePrediction", "input_schema", "output_schema", "feature_schema", diff --git a/merlin/models/torch/inputs/embedding.py b/merlin/models/torch/inputs/embedding.py index fe8cf28170..8d238d4581 100644 --- a/merlin/models/torch/inputs/embedding.py +++ b/merlin/models/torch/inputs/embedding.py @@ -374,6 +374,14 @@ def update_feature(self, col_schema: ColumnSchema) -> "EmbeddingTable": return self + def feature_weights(self, name: str): + if name not in self.domains: + raise ValueError(f"{name} not found in table: {self}") + + domain = self.domains[name] + + return self.table.weight[int(domain.min) : int(domain.max)] + def select(self, selection: Selection) -> Selectable: selected = select(self.input_schema, selection) diff --git a/merlin/models/torch/outputs/base.py b/merlin/models/torch/outputs/base.py index 8ff5f15e9c..8f3b78acb0 100644 --- a/merlin/models/torch/outputs/base.py +++ b/merlin/models/torch/outputs/base.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import inspect from copy import deepcopy from typing import Optional, Sequence @@ -21,7 +22,7 @@ from torchmetrics import Metric from merlin.models.torch.block import Block -from merlin.schema import ColumnSchema, Schema +from merlin.models.torch.transforms.bias import LogitsTemperatureScaler class ModelOutput(Block): @@ -47,12 +48,13 @@ class ModelOutput(Block): Parameters ---------- - schema: Optional[ColumnSchema] - The schema defining the column properties. loss: nn.Module The loss function used for training. metrics: Sequence[Metric] The metrics used for evaluation. + logits_temperature: float, optional + Parameter used to reduce model overconfidence, so that logits / T. + by default 1.0 name: Optional[str] The name of the model output. """ @@ -60,9 +62,9 @@ class ModelOutput(Block): def __init__( self, *module: nn.Module, - schema: Optional[ColumnSchema] = None, loss: Optional[nn.Module] = None, - metrics: Sequence[Metric] = (), + metrics: Optional[Sequence[Metric]] = None, + logits_temperature: float = 1.0, name: Optional[str] = None, ): """Initializes a ModelOutput object.""" @@ -70,21 +72,10 @@ def __init__( self.loss = loss self.metrics = metrics - self.output_schema: Schema = Schema() - if schema: - self.setup_schema(schema) self.create_target_buffer() - - def setup_schema(self, schema: Optional[ColumnSchema]): - """Set up the schema for the output. - - Parameters - ---------- - schema: ColumnSchema or None - The schema defining the column properties. - """ - self.output_schema = Schema([schema]) + if logits_temperature != 1.0: + self.append(LogitsTemperatureScaler(logits_temperature)) def create_target_buffer(self): self.register_buffer("target", torch.zeros(1, dtype=torch.float32)) @@ -103,18 +94,24 @@ def eval(self): return self.train(False) def copy(self): - metrics = self.metrics + metrics = deepcopy(self.metrics) self.metrics = [] output = deepcopy(self) copied_metrics = [] for metric in metrics: - m = metric.__class__() + params = inspect.signature(metric.__class__.__init__).parameters + kwargs = {} + for arg_name, arg_value in params.items(): + if arg_name in metric.__dict__: + kwargs[arg_name] = metric.__dict__[arg_name] + m = metric.__class__(**kwargs) m.load_state_dict(metric.state_dict()) copied_metrics.append(m) self.metrics = metrics output.metrics = copied_metrics + output.loss = deepcopy(self.loss) return output diff --git a/merlin/models/torch/outputs/classification.py b/merlin/models/torch/outputs/classification.py index 6d0d411ed3..98c7bc934a 100644 --- a/merlin/models/torch/outputs/classification.py +++ b/merlin/models/torch/outputs/classification.py @@ -13,14 +13,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from typing import Optional, Sequence, Union +import inspect +from typing import List, Optional, Sequence, Type, Union +import torch +import torchmetrics as tm from torch import nn -from torchmetrics import AUROC, Accuracy, Metric, Precision, Recall import merlin.dtypes as md +from merlin.models.torch import schema +from merlin.models.torch.inputs.embedding import EmbeddingTable from merlin.models.torch.outputs.base import ModelOutput -from merlin.schema import ColumnSchema, Schema +from merlin.schema import ColumnSchema, Schema, Tags class BinaryOutput(ModelOutput): @@ -28,7 +32,7 @@ class BinaryOutput(ModelOutput): Parameters ---------- - schema: Optional[ColumnSchema]) + schema: Union[ColumnSchema, Schema], optional The schema defining the column properties. Default is None. loss: nn.Module The loss function used for training. Default is nn.BCEWithLogitsLoss(). @@ -37,22 +41,26 @@ class BinaryOutput(ModelOutput): """ DEFAULT_LOSS_CLS = nn.BCELoss - DEFAULT_METRICS_CLS = (Accuracy, AUROC, Precision, Recall) + DEFAULT_METRICS_CLS = (tm.Accuracy, tm.AUROC, tm.Precision, tm.Recall) def __init__( self, schema: Optional[ColumnSchema] = None, loss: Optional[nn.Module] = None, - metrics: Sequence[Metric] = (), + metrics: Sequence[tm.Metric] = (), ): """Initializes a BinaryOutput object.""" super().__init__( nn.LazyLinear(1), nn.Sigmoid(), - schema=schema, loss=loss or self.DEFAULT_LOSS_CLS(), metrics=metrics or [m(task="binary") for m in self.DEFAULT_METRICS_CLS], ) + if schema: + self.setup_schema(schema) + + if not self.metrics: + self.metrics = self.default_metrics() def setup_schema(self, target: Optional[Union[ColumnSchema, Schema]]): """Set up the schema for the output. @@ -75,3 +83,376 @@ def setup_schema(self, target: Optional[Union[ColumnSchema, Schema]]): ) self.output_schema = Schema([_target]) + + @classmethod + def schema_selection(cls, schema: Schema) -> Schema: + """Returns a schema containing all binary targets.""" + output = Schema() + output += schema.select_by_tag([Tags.BINARY_CLASSIFICATION, Tags.BINARY]) + for col in schema.select_by_tag([Tags.CATEGORICAL]): + if col.int_domain and col.int_domain.max == 1: + output += Schema([col]) + + return output + + +class CategoricalOutput(ModelOutput): + """ + A prediction block for categorical targets. + + Parameters + ---------- + schema: Union[ColumnSchema, Schema], optional + The schema defining the column properties. Default is None. + loss : nn.Module, optional + The loss function to use for the output model, defaults to + torch.nn.CrossEntropyLoss. + metrics : Optional[Sequence[Metric]], optional + The metrics to evaluate the model output. + logits_temperature: float, optional + Parameter used to reduce model overconfidence, so that logits / T. + by default 1.0 + """ + + DEFAULT_LOSS_CLS = nn.CrossEntropyLoss + DEFAULT_METRICS_CLS = ( + tm.RetrievalHitRate, + tm.RetrievalNormalizedDCG, + tm.RetrievalPrecision, + tm.RetrievalRecall, + ) + DEFAULT_K = (5,) + + def __init__( + self, + schema: Optional[Union[ColumnSchema, Schema]] = None, + loss: Optional[nn.Module] = None, + metrics: Optional[Sequence[tm.Metric]] = None, + logits_temperature: float = 1.0, + ): + super().__init__( + loss=loss or self.DEFAULT_LOSS_CLS(), + metrics=metrics or create_retrieval_metrics(self.DEFAULT_METRICS_CLS, self.DEFAULT_K), + logits_temperature=logits_temperature, + ) + + if schema: + self.setup_schema(schema) + + @classmethod + def with_weight_tying( + cls, + block: nn.Module, + selection: Optional[schema.Selection] = None, + loss: nn.Module = nn.CrossEntropyLoss(), + metrics: Optional[Sequence[tm.Metric]] = None, + logits_temperature: float = 1.0, + ) -> "CategoricalOutput": + self = cls(loss=loss, metrics=metrics, logits_temperature=logits_temperature) + self = self.tie_weights(block, selection) + if not self.metrics: + self.metrics = self.default_metrics(self.num_classes) + + return self + + def tie_weights( + self, block: nn.Module, selection: Optional[schema.Selection] = None + ) -> "CategoricalOutput": + prediction = EmbeddingTablePrediction.with_weight_tying(block, selection) + self.num_classes = prediction.num_classes + if self: + self[0] = prediction + else: + self.prepend(prediction) + + return self + + def setup_schema(self, target: Optional[Union[ColumnSchema, Schema]]): + """Set up the schema for the output. + + Parameters + ---------- + target: Optional[ColumnSchema] + The schema defining the column properties. + """ + if not isinstance(target, (ColumnSchema, Schema)): + raise ValueError(f"Target must be a ColumnSchema or Schema, got {target}.") + + if isinstance(target, Schema): + if len(target) != 1: + raise ValueError("Schema must contain exactly one column.") + + target = target.first + + to_call = CategoricalTarget(target) + self.num_classes = to_call.num_classes + self.prepend(to_call) + + @classmethod + def schema_selection(cls, schema: Schema) -> Schema: + """Returns a schema containing all categorical targets.""" + output = Schema() + for col in schema.select_by_tag([Tags.CATEGORICAL]): + if col.int_domain and col.int_domain.max > 1: + output += Schema([col]) + + return output + + +class CategoricalTarget(nn.Module): + """Prediction of a categorical feature. + + Parameters + -------------- + feature: Union[ColumnSchema, Schema], optional + Schema of the column being targeted. The schema must contain an + 'int_domain' specifying the maximum integer value representing the + categorical classes. + activation: callable, optional + Activation function to be applied to the output of the linear layer. + If None, no activation function is applied. + bias: bool, default=True + If set to False, the layer will not learn an additive bias. + + Returns + --------- + torch.Tensor + The tensor output of the forward method. + """ + + def __init__( + self, + feature: Optional[Union[Schema, ColumnSchema]] = None, + activation=None, + bias: bool = True, + ): + super().__init__() + + if isinstance(feature, Schema): + assert len(feature) == 1, "Schema can have max 1 feature" + col_schema = feature.first + else: + col_schema = feature + + self.target_name = col_schema.name + self.num_classes = col_schema.int_domain.max + 1 + self.output_schema = categorical_output_schema(col_schema, self.num_classes) + + self.linear = nn.LazyLinear(self.num_classes, bias=bias) + self.activation = activation + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + """ + Computes the forward pass of the module and applies the activation function if present. + + Parameters + -------------- + inputs: torch.Tensor + Input tensor for the forward pass. + + Returns + --------- + torch.Tensor + Output tensor from the forward pass of the model. + """ + output = self.linear(inputs) + if self.activation is not None: + output = self.activation(output) + + return output + + def embedding_lookup(self, ids: torch.Tensor) -> torch.Tensor: + """ + Selects the embeddings for the given indices. + + Parameters + -------------- + ids: torch.Tensor + Tensor containing indices for which embeddings are to be returned. + + Returns + --------- + torch.Tensor + The corresponding embeddings. + """ + return torch.index_select(self.embeddings(), 1, ids).t() + + def embeddings(self) -> nn.Parameter: + """ + Returns the embeddings from the weight matrix. + + Returns + --------- + nn.Parameter + The embeddings. + """ + return self.linear.weight.t() + + +class EmbeddingTablePrediction(nn.Module): + """Prediction of a categorical feature using weight-sharing [1] with an embedding table. + + Parameters + ---------- + table : EmbeddingTable + The embedding table to use as the weight matrix. + + References: + ---------- + [1] Hakan Inan, Khashayar Khosravi, and Richard Socher. 2016. Tying word vectors + and word classifiers: A loss framework for language modeling. arXiv preprint + arXiv:1611.01462 (2016). + """ + + def __init__(self, table: EmbeddingTable, selection: Optional[schema.Selection] = None): + super().__init__() + self.table = table + if len(table.domains) > 1: + if not selection: + raise ValueError( + f"Table {table} has multiple columns. ", + "Must specify selection to choose column.", + ) + self.add_selection(selection) + else: + self.num_classes = table.num_embeddings + self.col_schema = table.input_schema.first + self.col_name = self.col_schema.name + self.bias = nn.Parameter( + torch.zeros(self.num_classes, dtype=torch.float32, device=self.embeddings().device) + ) + self.output_schema = categorical_output_schema(self.col_schema, self.num_classes) + + @classmethod + def with_weight_tying( + cls, + block: nn.Module, + selection: Optional[schema.Selection] = None, + ) -> "EmbeddingTablePrediction": + if isinstance(block, EmbeddingTable): + table = block + else: + if not selection: + raise ValueError( + "Must specify a `selection` when providing a block that isn't a table." + ) + + try: + selected = schema.select(block, selection) + table = selected.leaf() + except Exception as e: + raise ValueError("Could not find embedding table in block.") from e + + return cls(table, selection) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + """Forward pass of the model using input tensor. + + Parameters + ---------- + inputs : torch.Tensor + Input tensor for the forward pass. + + Returns + ---------- + torch.Tensor + Output tensor of the forward pass. + """ + return nn.functional.linear(inputs, self.embeddings(), self.bias) + + def add_selection(self, selection: schema.Selection): + selected = schema.select(self.table.input_schema, selection) + if not len(selected) == 1: + raise ValueError("Schema must contain exactly one column. ", f"got: {selected}") + self.col_schema = selected.first + self.col_name = self.col_schema.name + self.num_classes = self.col_schema.int_domain.max + 1 + self.output_schema = categorical_output_schema(self.col_schema, self.num_classes) + + return self + + def embeddings(self) -> nn.Parameter: + """Fetch the weight matrix from the embedding table. + + Returns + ---------- + nn.Parameter + Weight matrix from the embedding table. + """ + if len(self.table.domains) > 1: + return self.table.feature_weights(self.col_name) + + return self.table.table.weight + + def embedding_lookup(self, inputs: torch.Tensor) -> torch.Tensor: + """Fetch the embeddings for given indices from the embedding table. + + Parameters + ---------- + ids : torch.Tensor + Tensor containing indices for which embeddings are to be returned. + + Returns + ---------- + torch.Tensor + The corresponding embeddings. + """ + return self.table({self.col_name: inputs})[self.col_name] + + +def categorical_output_schema(target: ColumnSchema, num_classes: int) -> Schema: + """Return the output schema given the target column schema.""" + _target = target.with_dtype(md.float32) + _target = _target.with_properties( + {"domain": {"min": 0, "max": 1, "name": _target.name}}, + ) + if "value_count" not in target.properties: + _target = _target.with_properties( + {"value_count": {"min": num_classes, "max": num_classes}}, + ) + + return Schema([_target]) + + +def create_retrieval_metrics( + metrics: Sequence[Type[tm.Metric]], ks: Sequence[int] +) -> List[tm.Metric]: + """ + Create a list of retrieval metrics given metric types and a list of integers. + For each integer in `ks`, a metric is created for each type in `metrics`. + + Parameters + ---------- + metrics : Sequence[Type[tm.Metric]] + The types of metrics to create. Each type should be a callable that + accepts a single integer parameter `k` to instantiate a new metric. + ks : Sequence[int] + A list of integers to use as the `k` or `top_k` parameter when creating each metric. + + Returns + ------- + List[tm.Metric] + A list of metrics. The length of the list is equal to the product of + the lengths of `metrics` and `ks`. The metrics are ordered first by + the values in `ks`, then by the order in `metrics`. + """ + + outputs = [] + + for k in ks: + for metric in metrics: + # check the parameters of the callable metric + params = inspect.signature(metric).parameters + + # the argument name could be 'k' or 'top_k' + arg_name = "top_k" if "top_k" in params else "k" if "k" in params else None + + if arg_name is not None: + outputs.append(metric(**{arg_name: k})) + else: + raise ValueError( + "Expected a callable that accepts either ", + f"a 'k' or a 'top_k' parameter, but got {metric}", + ) + + return outputs diff --git a/merlin/models/torch/outputs/regression.py b/merlin/models/torch/outputs/regression.py index e3b2f97b09..44056a429a 100644 --- a/merlin/models/torch/outputs/regression.py +++ b/merlin/models/torch/outputs/regression.py @@ -48,10 +48,14 @@ def __init__( """Initializes a RegressionOutput object.""" super().__init__( nn.LazyLinear(1), - schema=schema, loss=loss or self.DEFAULT_LOSS_CLS(), metrics=metrics or [m() for m in self.DEFAULT_METRICS_CLS], ) + if schema: + self.setup_schema(schema) + + if not self.metrics: + self.metrics = self.default_metrics() def setup_schema(self, target: Optional[Union[ColumnSchema, Schema]]): """Set up the schema for the output. @@ -62,7 +66,7 @@ def setup_schema(self, target: Optional[Union[ColumnSchema, Schema]]): The schema defining the column properties. """ if isinstance(target, Schema): - if len(target) != 1: + if len(target) > 1: raise ValueError("Schema must contain exactly one column.") target = target.first diff --git a/merlin/models/torch/outputs/tabular.py b/merlin/models/torch/outputs/tabular.py index 61df85fe00..bab73d9be8 100644 --- a/merlin/models/torch/outputs/tabular.py +++ b/merlin/models/torch/outputs/tabular.py @@ -16,7 +16,7 @@ from typing import Any, Callable, Optional, Union -from merlin.models.torch.outputs.classification import BinaryOutput +from merlin.models.torch.outputs.classification import BinaryOutput, CategoricalOutput from merlin.models.torch.outputs.regression import RegressionOutput from merlin.models.torch.router import RouterBlock from merlin.models.torch.schema import Selection, select @@ -96,9 +96,11 @@ def defaults(block: TabularOutputBlock): This function adds a route for each of the following tags: - Tags.CONTINUOUS/Tags.REGRESSION -> RegressionOutput - Tags.BINARY_CLASSIFICATION/Tags.BINARY -> BinaryOutput + - Tags.MULTI_CLASS_CLASSIFICATION/Tags.CATEGORICAL -> CategoricalOutput Args: block (TabularOutputBlock): The block to initialize. """ block.add_route_for_each([Tags.CONTINUOUS, Tags.REGRESSION], RegressionOutput()) - block.add_route_for_each([Tags.BINARY_CLASSIFICATION, Tags.BINARY], BinaryOutput()) + block.add_route_for_each(BinaryOutput.schema_selection, BinaryOutput()) + block.add_route_for_each(CategoricalOutput.schema_selection, CategoricalOutput()) diff --git a/merlin/models/torch/router.py b/merlin/models/torch/router.py index 326064c68c..087068b6e0 100644 --- a/merlin/models/torch/router.py +++ b/merlin/models/torch/router.py @@ -15,6 +15,7 @@ # from copy import deepcopy +from inspect import isclass from typing import Optional from torch import nn @@ -152,7 +153,9 @@ def add_route_for_each( if shared: col_module = module else: - if hasattr(module, "copy"): + if isclass(module): + col_module = module(col) + elif hasattr(module, "copy"): col_module = module.copy() else: col_module = deepcopy(module) diff --git a/merlin/models/torch/transforms/bias.py b/merlin/models/torch/transforms/bias.py new file mode 100644 index 0000000000..67ba7eb6f2 --- /dev/null +++ b/merlin/models/torch/transforms/bias.py @@ -0,0 +1,52 @@ +import torch +from torch import nn + + +class LogitsTemperatureScaler(nn.Module): + """ + A PyTorch Module for scaling logits with a given temperature value. + + This module is useful for implementing temperature scaling in neural networks, + a technique often used to soften or sharpen the output distribution of a classifier. + A temperature value closer to 0 makes the output probabilities more extreme + (either closer to 0 or 1), while a value closer to 1 makes the distribution + closer to uniform. + + Parameters + ---------- + temperature : float + The temperature value used for scaling. Must be a positive float in the range (0.0, 1.0]. + + Raises + ------ + ValueError + If the temperature value is not a float or is out of the range (0.0, 1.0]. + """ + + def __init__(self, temperature: float): + super().__init__() + + if not isinstance(temperature, float): + raise ValueError(f"Invalid temperature type: {type(temperature)}") + if not 0.0 < temperature <= 1.0: + raise ValueError( + f"Invalid temperature value: {temperature} ", "Must be in the range (0.0, 1.0]" + ) + + self.temperature = temperature + + def forward(self, logits: torch.Tensor) -> torch.Tensor: + """ + Apply temperature scaling to the input logits. + + Parameters + ---------- + logits : torch.Tensor + The input logits to be scaled. + + Returns + ------- + torch.Tensor + The scaled logits. + """ + return logits / self.temperature diff --git a/merlin/models/torch/utils/traversal_utils.py b/merlin/models/torch/utils/traversal_utils.py index d458980cce..51c9ed3cdb 100644 --- a/merlin/models/torch/utils/traversal_utils.py +++ b/merlin/models/torch/utils/traversal_utils.py @@ -127,6 +127,9 @@ def leaf(module) -> nn.Module: return child.leaf() return leaf(child) else: + if isinstance(module, containers) and not hasattr(module, "items"): + return module[-1] + # If more than one child, throw an exception. raise ValueError( f"Module {module} has multiple children, cannot determine the deepest child." diff --git a/tests/unit/torch/outputs/test_base.py b/tests/unit/torch/outputs/test_base.py index a156a39a13..9f38c52033 100644 --- a/tests/unit/torch/outputs/test_base.py +++ b/tests/unit/torch/outputs/test_base.py @@ -13,14 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import numpy as np +import pytest import torch from torch import nn from torchmetrics import AUROC, Accuracy import merlin.models.torch as mm from merlin.models.torch.utils import module_utils -from merlin.schema import ColumnSchema, Schema, Tags class TestModelOutput: @@ -31,8 +30,8 @@ def test_init(self): assert isinstance(model_output, mm.ModelOutput) assert model_output.loss is loss - assert model_output.metrics == () - assert model_output.output_schema == Schema() + assert model_output.metrics is None + assert not mm.schema.output_schema(model_output) def test_identity(self): block = mm.Block() @@ -47,20 +46,11 @@ def test_identity(self): def test_setup_metrics(self): block = mm.Block() loss = nn.BCEWithLogitsLoss() - metrics = (Accuracy(task="binary"), AUROC(task="binary")) + metrics = [Accuracy(task="binary"), AUROC(task="binary")] model_output = mm.ModelOutput(block, loss=loss, metrics=metrics) assert model_output.metrics == metrics - def test_setup_schema(self): - block = mm.Block() - loss = nn.BCEWithLogitsLoss() - schema = ColumnSchema("feature", dtype=np.int32, tags=[Tags.CONTINUOUS]) - model_output = mm.ModelOutput(block, loss=loss, schema=schema) - - assert isinstance(model_output.output_schema, Schema) - assert model_output.output_schema.first == schema - def test_eval_resets_target(self): block = mm.Block() loss = nn.BCEWithLogitsLoss() @@ -71,3 +61,26 @@ def test_eval_resets_target(self): assert torch.equal(model_output.target, torch.ones(1)) model_output.eval() assert torch.equal(model_output.target, torch.zeros(1)) + + def test_copy(self): + block = mm.Block() + loss = nn.BCEWithLogitsLoss() + metrics = [Accuracy(task="multiclass", num_classes=11)] + model_output = mm.ModelOutput(block, loss=loss, metrics=metrics) + + model_copy = model_output.copy() + assert model_copy.loss is not loss + assert isinstance(model_copy.loss, nn.BCEWithLogitsLoss) + assert model_copy.metrics[0] is not metrics[0] + assert model_copy.metrics[0].__class__.__name__ == "MulticlassAccuracy" + assert model_copy.metrics[0].num_classes == 11 + + @pytest.mark.parametrize("logits_temperature", [0.1, 0.9]) + def test_logits_temperature_scaler(self, logits_temperature): + block = mm.Block() + model_output = mm.ModelOutput(block, logits_temperature=logits_temperature) + inputs = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) + + outputs = module_utils.module_test(model_output, inputs) + + assert torch.allclose(inputs / logits_temperature, outputs) diff --git a/tests/unit/torch/outputs/test_classification.py b/tests/unit/torch/outputs/test_classification.py index 038b2586d4..a8e411d1e6 100644 --- a/tests/unit/torch/outputs/test_classification.py +++ b/tests/unit/torch/outputs/test_classification.py @@ -15,14 +15,16 @@ # import pytest import torch +import torchmetrics as tm from torch import nn from torchmetrics import AUROC, Accuracy, Precision, Recall from torchmetrics.classification import BinaryF1Score import merlin.dtypes as md import merlin.models.torch as mm +from merlin.models.torch.outputs.classification import CategoricalTarget, EmbeddingTablePrediction from merlin.models.torch.utils import module_utils -from merlin.schema import ColumnSchema, Schema +from merlin.schema import ColumnSchema, Schema, Tags class TestBinaryOutput: @@ -37,7 +39,8 @@ def test_init(self): Precision(task="binary"), Recall(task="binary"), ] - assert binary_output.output_schema == Schema() + with pytest.raises(ValueError): + mm.output_schema(binary_output) def test_identity(self): binary_output = mm.BinaryOutput() @@ -83,3 +86,193 @@ def test_cutom_metrics(self): binary_output.metrics[0](outputs, targets), BinaryF1Score()(outputs, targets), ) + + +class TestCategoricalOutput: + def test_init(self): + int_domain_max = 3 + schema = ( + ColumnSchema("foo") + .with_dtype(md.int32) + .with_properties({"domain": {"name": "bar", "min": 0, "max": int_domain_max}}) + ) + categorical_output = mm.CategoricalOutput(schema) + + assert isinstance(categorical_output, mm.CategoricalOutput) + assert isinstance(categorical_output.loss, nn.CrossEntropyLoss) + assert isinstance(categorical_output.metrics[0], tm.RetrievalHitRate) + assert isinstance(categorical_output.metrics[1], tm.RetrievalNormalizedDCG) + assert isinstance(categorical_output.metrics[2], tm.RetrievalPrecision) + assert isinstance(categorical_output.metrics[3], tm.RetrievalRecall) + + output_schema = categorical_output[0].output_schema.first + assert output_schema.dtype == md.float32 + assert output_schema.properties["domain"]["min"] == 0 + assert output_schema.properties["domain"]["max"] == 1 + assert ( + output_schema.properties["value_count"]["min"] + == output_schema.properties["value_count"]["max"] + == int_domain_max + 1 + ) + assert mm.output_schema(categorical_output) == categorical_output[0].output_schema + + def test_called_with_schema(self): + int_domain_max = 3 + schema = ( + ColumnSchema("foo") + .with_dtype(md.int32) + .with_properties({"domain": {"name": "bar", "min": 0, "max": int_domain_max}}) + ) + categorical_output = mm.CategoricalOutput(schema) + + inputs = torch.randn(3, 2) + outputs = module_utils.module_test(categorical_output, inputs) + + num_classes = int_domain_max + 1 + assert outputs.shape == (3, num_classes) + + def test_weight_tying(self): + embedding_dim = 8 + int_domain_max = 3 + schema = ( + ColumnSchema("foo") + .with_dtype(md.int32) + .with_properties({"domain": {"name": "bar", "min": 0, "max": int_domain_max}}) + ) + table = mm.EmbeddingTable(embedding_dim, schema) + categorical_output = mm.CategoricalOutput.with_weight_tying(table) + + inputs = torch.randn(3, embedding_dim) + outputs = module_utils.module_test(categorical_output, inputs) + + num_classes = int_domain_max + 1 + assert outputs.shape == (3, num_classes) + + cat_output = mm.CategoricalOutput(schema).tie_weights(table) + assert isinstance(cat_output[0], EmbeddingTablePrediction) + + def test_invalid_type_error(self): + with pytest.raises(ValueError, match="Target must be a ColumnSchema or Schema"): + mm.CategoricalOutput("invalid to_call") + + def test_multiple_column_schema_error(self, item_id_col_schema, user_id_col_schema): + schema = Schema([item_id_col_schema]) + assert len(schema) == 1 + _ = mm.CategoricalOutput(schema) + + schema_with_two_columns = schema + Schema([user_id_col_schema]) + assert len(schema_with_two_columns) == 2 + with pytest.raises(ValueError, match="must contain exactly one"): + _ = mm.CategoricalOutput(schema_with_two_columns) + + +class TestCategoricalTarget: + def test_init(self, user_id_col_schema): + schema = Schema([user_id_col_schema]) + + # Test with ColumnSchema + model = CategoricalTarget(user_id_col_schema) + assert model.num_classes == user_id_col_schema.int_domain.max + 1 + assert isinstance(model.linear, nn.LazyLinear) + + # Test with Schema + model = CategoricalTarget(feature=schema) + assert model.num_classes == user_id_col_schema.int_domain.max + 1 + assert isinstance(model.linear, nn.LazyLinear) + + def test_forward(self, user_id_col_schema): + model = CategoricalTarget(feature=user_id_col_schema) + + inputs = torch.randn(5, 11) + output = model(inputs) + + assert output.shape == (5, 21) + + def test_forward_with_activation(self, user_id_col_schema): + model = CategoricalTarget(feature=user_id_col_schema, activation=nn.ReLU()) + + inputs = torch.randn(5, 11) + output = model(inputs) + + assert output.shape == (5, 21) + assert torch.all(output >= 0) + + def test_embedding_lookup(self, user_id_col_schema): + model = CategoricalTarget(feature=user_id_col_schema) + + model(torch.randn(5, 11)) # initialize the embedding table + input_indices = torch.tensor([1, 5, 10]) + hidden_vectors = model.embedding_lookup(input_indices) + + assert hidden_vectors.shape == (3, 11) + assert model.embeddings().shape == (11, 21) + + def test_forward_model_output(self): + int_domain_max = 3 + schema = ( + ColumnSchema("foo") + .with_dtype(md.int32) + .with_properties({"domain": {"name": "bar", "min": 0, "max": int_domain_max}}) + ) + target = mm.CategoricalTarget(schema) + categorical_output = mm.ModelOutput(target, loss=nn.CrossEntropyLoss()) + assert mm.output_schema(categorical_output).column_names == ["foo"] + + inputs = torch.randn(3, 2) + outputs = module_utils.module_test(categorical_output, inputs) + num_classes = int_domain_max + 1 + assert outputs.shape == (3, num_classes) + + +class TestEmbeddingTablePrediction: + def test_init_multiple_int_domains(self, user_id_col_schema, item_id_col_schema): + input_block = mm.TabularInputBlock(Schema([user_id_col_schema, item_id_col_schema])) + input_block.add_route(Tags.CATEGORICAL, mm.EmbeddingTable(10)) + table = mm.schema.select(input_block, Tags.USER_ID).leaf() + + with pytest.raises(ValueError): + EmbeddingTablePrediction(table) + + with pytest.raises(ValueError): + EmbeddingTablePrediction.with_weight_tying(input_block) + + with pytest.raises(ValueError): + EmbeddingTablePrediction.with_weight_tying(input_block, "a") + + with pytest.raises(ValueError): + EmbeddingTablePrediction.with_weight_tying(input_block, Tags.CATEGORICAL) + + assert isinstance(EmbeddingTablePrediction(table, Tags.USER_ID), EmbeddingTablePrediction) + assert isinstance( + EmbeddingTablePrediction.with_weight_tying(input_block, Tags.USER_ID), + EmbeddingTablePrediction, + ) + + def test_forward(self, user_id_col_schema): + input_block = mm.TabularInputBlock( + Schema([user_id_col_schema]), init="defaults", agg="concat" + ) + prediction = EmbeddingTablePrediction.with_weight_tying(input_block, Tags.USER_ID) + + inputs = torch.randn(5, 8) + output = module_utils.module_test(prediction, inputs) + + assert output.shape == (5, 21) + + def test_embedding_lookup(self): + embedding_dim = 8 + int_domain_max = 3 + schema = ( + ColumnSchema("foo") + .with_dtype(md.int32) + .with_properties({"domain": {"name": "bar", "min": 0, "max": int_domain_max}}) + ) + table = mm.EmbeddingTable(embedding_dim, schema) + model = mm.EmbeddingTablePrediction(table) + + batch_size = 16 + ids = torch.randint(0, int_domain_max, (batch_size,)) + outputs = model.embedding_lookup(ids) + + assert outputs.shape == (batch_size, embedding_dim) + assert model.embeddings().shape == (int_domain_max + 1, embedding_dim) diff --git a/tests/unit/torch/outputs/test_regression.py b/tests/unit/torch/outputs/test_regression.py index f8537bca51..3cb9c38be9 100644 --- a/tests/unit/torch/outputs/test_regression.py +++ b/tests/unit/torch/outputs/test_regression.py @@ -31,7 +31,6 @@ def test_init(self): assert isinstance(reg_output, mm.RegressionOutput) assert isinstance(reg_output.loss, nn.MSELoss) assert reg_output.metrics == [MeanSquaredError()] - assert reg_output.output_schema == Schema() def test_identity(self): reg_output = mm.RegressionOutput() @@ -91,7 +90,7 @@ def test_default_metrics(self): def test_custom_metrics(self): reg_output = mm.RegressionOutput( - metrics=(MeanAbsoluteError(), MeanAbsolutePercentageError()) + metrics=[MeanAbsoluteError(), MeanAbsolutePercentageError()] ) features = torch.randn(3, 2) targets = torch.randn(3, 1) diff --git a/tests/unit/torch/outputs/test_tabular.py b/tests/unit/torch/outputs/test_tabular.py index 22ae132735..3ed04b4725 100644 --- a/tests/unit/torch/outputs/test_tabular.py +++ b/tests/unit/torch/outputs/test_tabular.py @@ -1,9 +1,10 @@ import pytest import torch +import merlin.dtypes as md import merlin.models.torch as mm from merlin.models.torch.utils import module_utils -from merlin.schema import Schema, Tags +from merlin.schema import ColumnSchema, Schema, Tags class TestTabularOutputBlock: @@ -25,6 +26,40 @@ def test_init_defaults(self): assert "click" in outputs assert "like" in outputs + def test_init_defaults_with_binary_categorical(self): + test_schema = Schema( + [ + ColumnSchema("foo") + .with_dtype(md.int32) + .with_properties({"domain": {"name": "bar", "min": 0, "max": 1}}) + .with_tags([Tags.CATEGORICAL, Tags.TARGET]) + ] + ) + output_block = mm.TabularOutputBlock(test_schema, init="defaults") + + assert isinstance(output_block["foo"], mm.BinaryOutput) + + outputs = module_utils.module_test(output_block, torch.rand(10, 10)) + + assert "foo" in outputs + + def test_init_defaults_with_multiclass_categorical(self): + test_schema = Schema( + [ + ColumnSchema("foo") + .with_dtype(md.int32) + .with_properties({"domain": {"name": "bar", "min": 0, "max": 3}}) + .with_tags([Tags.CATEGORICAL, Tags.TARGET]) + ] + ) + output_block = mm.TabularOutputBlock(test_schema, init="defaults") + + assert isinstance(output_block["foo"], mm.CategoricalOutput) + + outputs = module_utils.module_test(output_block, torch.rand(10, 10)) + + assert "foo" in outputs + def test_exceptions(self): with pytest.raises(ValueError, match="not found"): mm.TabularOutputBlock(self.schema, init="not_found") diff --git a/tests/unit/torch/transforms/test_bias.py b/tests/unit/torch/transforms/test_bias.py new file mode 100644 index 0000000000..4f29ff592e --- /dev/null +++ b/tests/unit/torch/transforms/test_bias.py @@ -0,0 +1,36 @@ +import pytest +import torch + +from merlin.models.torch.transforms.bias import LogitsTemperatureScaler +from merlin.models.torch.utils import module_utils + + +class TestLogitsTemperatureScaler: + def test_init(self): + """Test correct temperature initialization.""" + scaler = LogitsTemperatureScaler(0.5) + assert scaler.temperature == 0.5 + + def test_invalid_temperature_type(self): + """Test exception is raised for incorrect temperature type.""" + with pytest.raises(ValueError, match=r"Invalid temperature type"): + LogitsTemperatureScaler("invalid") + + def test_invalid_temperature_value(self): + """Test exception is raised for out-of-range temperature value.""" + with pytest.raises(ValueError, match=r"Invalid temperature value"): + LogitsTemperatureScaler(1.5) + + def test_temperature_scaling(self): + """Test temperature scaling of logits.""" + logits = torch.tensor([1.0, 2.0, 3.0]) + expected_scaled_logits = torch.tensor([2.0, 4.0, 6.0]) + + scaler = LogitsTemperatureScaler(0.5) + outputs = module_utils.module_test(scaler, logits) + assert torch.allclose(outputs, expected_scaled_logits) + + def test_zero_temperature_value(self): + """Test exception is raised for zero temperature value.""" + with pytest.raises(ValueError, match=r"Invalid temperature value"): + LogitsTemperatureScaler(0.0) diff --git a/tests/unit/torch/utils/test_traversal_utils.py b/tests/unit/torch/utils/test_traversal_utils.py index d44b53e55d..33bdd10822 100644 --- a/tests/unit/torch/utils/test_traversal_utils.py +++ b/tests/unit/torch/utils/test_traversal_utils.py @@ -1,4 +1,3 @@ -import pytest from torch import nn import merlin.models.torch as mm @@ -86,10 +85,9 @@ def __init__(self): model = CustomModule() assert isinstance(leaf(model), CustomModule) - def test_exception(self): - model = nn.Sequential(nn.Linear(10, 20), nn.Linear(10, 20)) - with pytest.raises(ValueError): - leaf(model) + def test_sequential(self): + model = nn.Sequential(nn.Linear(10, 20), nn.Linear(10, 30)) + assert leaf(model).out_features == 30 def test_embedding(self, user_id_col_schema): input_block = mm.TabularInputBlock(Schema([user_id_col_schema]), init="defaults") From 9922f2522c3ab0644f6ceec422bfae952cf537c4 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Thu, 6 Jul 2023 15:47:51 +1000 Subject: [PATCH 10/34] Remove examples spanning multiple repos / promote ecommerce session-based example (#1160) * Remove 2 examples (move to Merlin) * Move session-based ecommerce example one level up, from usecases to examples * Delete benchmarking (asvdb) test (move to Merlin) * add link/fix image links --- ...-session-based-next-item-prediction.ipynb} | 8 +- ...rediction-with-pretrained-embeddings.ipynb | 1433 ---------------- .../transformers-next-item-prediction.ipynb | 1516 ----------------- ...asvdb_transformers_next_item_prediction.py | 95 -- ..._08_session_based_next_item_prediction.py} | 2 +- ...ecase_transformers_next_item_prediction.py | 57 - ...m_prediction_with_pretrained_embeddings.py | 38 - 7 files changed, 6 insertions(+), 3143 deletions(-) rename examples/{usecases/ecommerce-session-based-next-item-prediction-for-fashion.ipynb => 08-Train-a-model-for-session-based-next-item-prediction.ipynb} (99%) delete mode 100644 examples/usecases/transformers-next-item-prediction-with-pretrained-embeddings.ipynb delete mode 100644 examples/usecases/transformers-next-item-prediction.ipynb delete mode 100644 tests/benchmark/test_asvdb_transformers_next_item_prediction.py rename tests/unit/tf/examples/{test_usecase_ecommerce_session_based.py => test_08_session_based_next_item_prediction.py} (95%) delete mode 100644 tests/unit/tf/examples/test_usecase_transformers_next_item_prediction.py delete mode 100644 tests/unit/tf/examples/test_usecase_transformers_next_item_prediction_with_pretrained_embeddings.py diff --git a/examples/usecases/ecommerce-session-based-next-item-prediction-for-fashion.ipynb b/examples/08-Train-a-model-for-session-based-next-item-prediction.ipynb similarity index 99% rename from examples/usecases/ecommerce-session-based-next-item-prediction-for-fashion.ipynb rename to examples/08-Train-a-model-for-session-based-next-item-prediction.ipynb index 6769dae3f2..e6c7ea1a04 100644 --- a/examples/usecases/ecommerce-session-based-next-item-prediction-for-fashion.ipynb +++ b/examples/08-Train-a-model-for-session-based-next-item-prediction.ipynb @@ -41,6 +41,8 @@ "\n", "NVIDIA-Merlin team participated in [Recsys2022 challenge](http://www.recsyschallenge.com/2022/index.html) and secured 3rd position. This notebook contains the various techniques used in the solution.\n", "\n", + "In this notebook we train several different architectures with the last one being a transformer model. We only cover training. If you would be interested also in putting your model in production and serving predictions using the industry standard Triton Inference Server, please consult [this notebook](https://github.com/NVIDIA-Merlin/Merlin/blob/main/examples/Next-Item-Prediction-with-Transformers/tf/transformers-next-item-prediction.ipynb).\n", + "\n", "### Learning Objective\n", "\n", "In this notebook, we will apply important concepts that improve recommender systems. We leveraged them for our RecSys solution:\n", @@ -860,7 +862,7 @@ "\n", "We train a Sequential-Multi-Layer Perceptron model, which averages the sequential input features (e.g. `item_id_list_seq`) and concatenate the resulting embeddings with the categorical embeddings (e.g. `item_id_last`). We visualize the architecture in the figure below.\n", "\n", - "" + "" ] }, { @@ -1285,7 +1287,7 @@ "source": [ "In this section, we train a Bi-LSTM model, an extension of traditional LSTMs, which enables straight (past) and reverse traversal of input (future) sequence to be used. The input block concatenates the embedding vectors for all sequential features (`item_id_list_seq`, `f_47_list_seq`, `f_68_list_seq`) per step (e.g. here 3). The concatenated vectors are processed by a BiLSTM architecture. The hidden state of the BiLSTM is concatenated with the embedding vectors of the categorical features (`item_id_last`). Then we connect it with a Multi-Layer Perceptron Block. We visualize the architecture in the figure below.\n", "\n", - "" + "" ] }, { @@ -2093,7 +2095,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.10 ('merlin_22.07_dev')", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/examples/usecases/transformers-next-item-prediction-with-pretrained-embeddings.ipynb b/examples/usecases/transformers-next-item-prediction-with-pretrained-embeddings.ipynb deleted file mode 100644 index 1632331ee5..0000000000 --- a/examples/usecases/transformers-next-item-prediction-with-pretrained-embeddings.ipynb +++ /dev/null @@ -1,1433 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "5b545747", - "metadata": {}, - "outputs": [], - "source": [ - "# Copyright 2022 NVIDIA Corporation. All Rights Reserved.\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# http://www.apache.org/licenses/LICENSE-2.0\n", - "#`\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions anda\n", - "# limitations under the License.\n", - "# ==============================================================================\n", - "\n", - "# Each user is responsible for checking the content of datasets and the\n", - "# applicable licenses and determining if suitable for the intended use." - ] - }, - { - "cell_type": "markdown", - "id": "5ec6d3b8", - "metadata": {}, - "source": [ - "\n", - "\n", - "# Transformer-based architecture for next-item prediction task with pretrained embeddings\n", - "\n", - "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) container.\n", - "\n", - "## Overview\n", - "\n", - "In this use case we will train a Transformer-based architecture for next-item prediction task with pretrained embeddings.\n", - "\n", - "**You can chose to download the full dataset manually or use synthetic data.**\n", - "\n", - "We will use the [SIGIR eCOM 2021 Data Challenge Dataset](https://github.com/coveooss/SIGIR-ecom-data-challenge) to train a session-based model. The dataset contains 36M events of users browsing an online store.\n", - "\n", - "We will reshape the data to organize it into 'sessions'. Each session will be a full customer online journey in chronological order. The goal will be to predict the `url` of the next action taken.\n", - "\n", - "\n", - "### Learning objectives\n", - "\n", - "- Training a Transformer-based architecture for next-item prediction task" - ] - }, - { - "cell_type": "markdown", - "id": "fd2b847f", - "metadata": {}, - "source": [ - "## Downloading and preparing the dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "2dd7827c", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-20 22:58:36.667322: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-06-20 22:58:38.026020: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-06-20 22:58:38.026445: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-06-20 22:58:38.026622: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" - ] - } - ], - "source": [ - "import os\n", - "import cudf\n", - "import numpy as np\n", - "import pandas as pd\n", - "import nvtabular as nvt\n", - "from merlin.schema import ColumnSchema, Schema, Tags\n", - "\n", - "OUTPUT_DATA_DIR = os.environ.get('OUTPUT_DATA_DIR', '/workspace/data')\n", - "NUM_EPOCHS = int(os.environ.get('NUM_EPOCHS', 5))\n", - "NUM_EXAMPLES = int(os.environ.get('NUM_EXAMPLES', 100_000))\n", - "MINIMUM_SESSION_LENGTH = int(os.environ.get('MINIMUM_SESSION_LENGTH', 5))" - ] - }, - { - "cell_type": "markdown", - "id": "7fcf7c86", - "metadata": {}, - "source": [ - "You can download the full dataset by registering [here](https://www.coveo.com/en/ailabs/sigir-ecom-data-challenge). If you chose to download the data, please place it alongside this notebook in the `sigir_dataset` directory and extract it.\n", - "\n", - "By default, in this notebook we will be using synthetically generated data based on the SIGIR dataset, but you can run on the full dataset by changing the value of the boolean flag below." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "bc3d1882", - "metadata": {}, - "outputs": [], - "source": [ - "RUN_ON_SYNTHETIC_DATA = True" - ] - }, - { - "cell_type": "markdown", - "id": "68bc6d6d", - "metadata": {}, - "source": [ - "### Clean downloaded data" - ] - }, - { - "cell_type": "markdown", - "id": "9016a3e2", - "metadata": {}, - "source": [ - "If you are training on the full SIGIR dataset, the following code will pre-process it.\n", - "\n", - "Here we deal with `nan` values, drop rows with missing information and parse strings containing lists to lists.\n", - "\n", - "The synthetically generated data is already clean -- it doesn't require this pre-processing." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "428ab049", - "metadata": {}, - "outputs": [], - "source": [ - "if not RUN_ON_SYNTHETIC_DATA:\n", - " train = nvt.Dataset('/workspace/sigir_dataset/train/browsing_train.csv', part_size='500MB')\n", - " skus = nvt.Dataset('/workspace/sigir_dataset/train/sku_to_content.csv')\n", - "\n", - " skus = pd.read_csv('/workspace/sigir_dataset/train/sku_to_content.csv')\n", - "\n", - " skus['description_vector'] = skus['description_vector'].replace(np.nan, '')\n", - " skus['image_vector'] = skus['image_vector'].replace(np.nan, '')\n", - "\n", - " skus['description_vector'] = skus['description_vector'].apply(lambda x: [] if len(x) == 0 else eval(x))\n", - " skus['image_vector'] = skus['image_vector'].apply(lambda x: [] if len(x) == 0 else eval(x))\n", - " skus = skus[skus.description_vector.apply(len) > 0]\n", - " skus = nvt.Dataset(skus)" - ] - }, - { - "cell_type": "markdown", - "id": "9b33fa32", - "metadata": {}, - "source": [ - "### Generate synthetic data" - ] - }, - { - "cell_type": "markdown", - "id": "4c4ba9b9", - "metadata": {}, - "source": [ - "If you are not running on the full dataset, the following lines of code will generate its synthetic counterpart." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "84789211", - "metadata": {}, - "outputs": [], - "source": [ - "if RUN_ON_SYNTHETIC_DATA:\n", - " from merlin.datasets.synthetic import generate_data\n", - "\n", - " train = generate_data('sigir-browsing', NUM_EXAMPLES)\n", - " skus = generate_data('sigir-sku', NUM_EXAMPLES)" - ] - }, - { - "cell_type": "markdown", - "id": "5533f446", - "metadata": {}, - "source": [ - "## Constructing a workflow" - ] - }, - { - "cell_type": "markdown", - "id": "ac47bc4e", - "metadata": {}, - "source": [ - "We need to process our data further before we can use it to train our model.\n", - "\n", - "In particular, the `skus` dataset contains the mapping between the `product_sku_hash` (essentially an item id) to the `description_vector` -- an embedding obtained from the description.\n", - "\n", - "We would like to enable our model to make use of this piece of information. In order to feed this data to our model, we need to map the `product_sku_hash` to an id.\n", - "\n", - "But we need to make sure that the way we process `skus` and the `train` dataset (event information) is consistent, that the same `product_sku_hash` is mapped to the same id both when processing `skus` and `train`.\n", - "\n", - "We do so by defining and fitting a `Categorify` op once and using it to process both the `skus` and the `train` datasets.\n", - "\n", - "Additionally, we apply some further processing to the `train` dataset. We group rows by `session_id_hash` so that each training example will contain events from a single customer visit to the online store arranged in chronological order.\n", - "\n", - "If you would like to learn more about leveraging `NVTabular` to process tabular data on the GPU using a set of industry standard operators, please consult the examples available [here](https://github.com/NVIDIA-Merlin/NVTabular/tree/main/examples).\n", - "\n", - "Let's first process the `train` dataset and retain the `Categorify` operator (`cat_op`) for processing of `skus`." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "3b5feee3", - "metadata": {}, - "outputs": [], - "source": [ - "cat_op = nvt.ops.Categorify()\n", - "out = ['product_sku_hash'] >> cat_op >> nvt.ops.TagAsItemID()\n", - "out += ['event_type', 'product_action', 'session_id_hash', 'hashed_url'] >> nvt.ops.Categorify()\n", - "out += ['server_timestamp_epoch_ms'] >> nvt.ops.NormalizeMinMax()\n", - "\n", - "groupby_features = out >> nvt.ops.Groupby(\n", - " groupby_cols=['session_id_hash'],\n", - " aggs={\n", - " 'product_sku_hash': ['list'],\n", - " 'event_type': ['list'],\n", - " 'product_action': ['list'],\n", - " 'hashed_url': ['list', 'count'],\n", - " 'server_timestamp_epoch_ms': ['list']\n", - " },\n", - " sort_cols=\"server_timestamp_epoch_ms\"\n", - ")\n", - "\n", - "filtered_sessions = groupby_features >> nvt.ops.Filter(f=lambda df: df[\"hashed_url_count\"] >= MINIMUM_SESSION_LENGTH)\n", - "\n", - "# We won't be needing the `session_id_hash` nor the `hashed_url_count` any longer\n", - "wf = nvt.Workflow(\n", - " filtered_sessions[\n", - " 'product_sku_hash_list',\n", - " 'event_type_list',\n", - " 'product_action_list',\n", - " 'hashed_url_list',\n", - " ]\n", - ")\n", - "\n", - "# Let's save the output of our workflow -- transformed `train` for later use (training of our model).\n", - "wf.fit_transform(train).to_parquet('train_transformed')" - ] - }, - { - "cell_type": "markdown", - "id": "45a4828e", - "metadata": {}, - "source": [ - "Here are a couple of example rows from `train_transformed`." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "650fb0d0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
product_sku_hash_listevent_type_listproduct_action_listhashed_url_list
0[578, 972, 378, 420, 328, 126, 233, 925, 410, ...[3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 4, 4, ...[3, 3, 5, 6, 4, 3, 3, 4, 4, 4, 6, 5, 3, 4, 3, ...[766, 955, 745, 210, 940, 688, 986, 524, 425, ...
1[298, 304, 393, 697, 706, 313, 834, 83, 502, 1...[4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 4, 4, 3, 4, 3, ...[3, 5, 6, 4, 4, 3, 3, 3, 6, 6, 3, 3, 6, 6, 3, ...[13, 221, 915, 658, 456, 378, 802, 180, 580, 4...
2[706, 221, 22, 702, 339, 645, 436, 358, 84, 35...[4, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 3, 4, 3, ...[3, 6, 4, 6, 3, 3, 5, 5, 4, 6, 4, 6, 3, 5, 6, ...[271, 940, 562, 498, 172, 239, 270, 215, 489, ...
3[278, 153, 189, 717, 580, 540, 219, 79, 200, 9...[3, 3, 3, 3, 4, 4, 3, 4, 4, 3, 4, 4, 3, 3, 3, ...[6, 6, 6, 6, 3, 4, 4, 4, 4, 4, 3, 6, 5, 4, 3, ...[169, 419, 875, 725, 926, 770, 160, 554, 763, ...
4[156, 922, 914, 592, 842, 916, 137, 928, 615, ...[3, 4, 4, 4, 3, 4, 4, 4, 4, 3, 4, 3, 4, 3, 4, ...[6, 4, 5, 6, 5, 4, 3, 3, 6, 5, 6, 5, 3, 6, 3, ...[318, 506, 281, 191, 506, 480, 965, 399, 761, ...
\n", - "
" - ], - "text/plain": [ - " product_sku_hash_list \\\n", - "0 [578, 972, 378, 420, 328, 126, 233, 925, 410, ... \n", - "1 [298, 304, 393, 697, 706, 313, 834, 83, 502, 1... \n", - "2 [706, 221, 22, 702, 339, 645, 436, 358, 84, 35... \n", - "3 [278, 153, 189, 717, 580, 540, 219, 79, 200, 9... \n", - "4 [156, 922, 914, 592, 842, 916, 137, 928, 615, ... \n", - "\n", - " event_type_list \\\n", - "0 [3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 4, 4, ... \n", - "1 [4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 4, 4, 3, 4, 3, ... \n", - "2 [4, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 3, 4, 3, ... \n", - "3 [3, 3, 3, 3, 4, 4, 3, 4, 4, 3, 4, 4, 3, 3, 3, ... \n", - "4 [3, 4, 4, 4, 3, 4, 4, 4, 4, 3, 4, 3, 4, 3, 4, ... \n", - "\n", - " product_action_list \\\n", - "0 [3, 3, 5, 6, 4, 3, 3, 4, 4, 4, 6, 5, 3, 4, 3, ... \n", - "1 [3, 5, 6, 4, 4, 3, 3, 3, 6, 6, 3, 3, 6, 6, 3, ... \n", - "2 [3, 6, 4, 6, 3, 3, 5, 5, 4, 6, 4, 6, 3, 5, 6, ... \n", - "3 [6, 6, 6, 6, 3, 4, 4, 4, 4, 4, 3, 6, 5, 4, 3, ... \n", - "4 [6, 4, 5, 6, 5, 4, 3, 3, 6, 5, 6, 5, 3, 6, 3, ... \n", - "\n", - " hashed_url_list \n", - "0 [766, 955, 745, 210, 940, 688, 986, 524, 425, ... \n", - "1 [13, 221, 915, 658, 456, 378, 802, 180, 580, 4... \n", - "2 [271, 940, 562, 498, 172, 239, 270, 215, 489, ... \n", - "3 [169, 419, 875, 725, 926, 770, 160, 554, 763, ... \n", - "4 [318, 506, 281, 191, 506, 480, 965, 399, 761, ... " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nvt.Dataset('train_transformed', engine='parquet').head()" - ] - }, - { - "cell_type": "markdown", - "id": "18f12dbd", - "metadata": {}, - "source": [ - "Now that we have processed the train set, we can use the mapping preserved in the `cat_op` to process the `skus` dataset containing the embeddings we are after.\n", - "\n", - "Let's now `Categorify` the `product_sku_hash` in `skus` and grab just the description embedding information." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "313808d0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
product_sku_hashdescription_vectorcategory_hashprice_bucket
013[0.07939800762120258, 0.3465797761609977, -0.3...160.186690
125[0.4275482879608162, -0.30569476366666, 0.1440...380.951997
218[-0.31035419787213536, 0.18070481533058008, 0....220.973384
31[-0.31319783485940356, -0.11623980504981396, -...1380.146260
411[0.25091279302969943, -0.33473442518442525, 0....1190.808252
\n", - "
" - ], - "text/plain": [ - " product_sku_hash description_vector \\\n", - "0 13 [0.07939800762120258, 0.3465797761609977, -0.3... \n", - "1 25 [0.4275482879608162, -0.30569476366666, 0.1440... \n", - "2 18 [-0.31035419787213536, 0.18070481533058008, 0.... \n", - "3 1 [-0.31319783485940356, -0.11623980504981396, -... \n", - "4 11 [0.25091279302969943, -0.33473442518442525, 0.... \n", - "\n", - " category_hash price_bucket \n", - "0 16 0.186690 \n", - "1 38 0.951997 \n", - "2 22 0.973384 \n", - "3 138 0.146260 \n", - "4 119 0.808252 " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "skus.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "dfad1bcf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
product_sku_hashdescription_vector
0836[0.07939800762120258, 0.3465797761609977, -0.3...
1979[0.4275482879608162, -0.30569476366666, 0.1440...
211[-0.31035419787213536, 0.18070481533058008, 0....
3469[-0.31319783485940356, -0.11623980504981396, -...
4118[0.25091279302969943, -0.33473442518442525, 0....
\n", - "
" - ], - "text/plain": [ - " product_sku_hash description_vector\n", - "0 836 [0.07939800762120258, 0.3465797761609977, -0.3...\n", - "1 979 [0.4275482879608162, -0.30569476366666, 0.1440...\n", - "2 11 [-0.31035419787213536, 0.18070481533058008, 0....\n", - "3 469 [-0.31319783485940356, -0.11623980504981396, -...\n", - "4 118 [0.25091279302969943, -0.33473442518442525, 0...." - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = ['product_sku_hash'] >> cat_op\n", - "wf_skus = nvt.Workflow(out + 'description_vector')\n", - "skus_ds = wf_skus.transform(skus)\n", - "\n", - "skus_ds.head()" - ] - }, - { - "cell_type": "markdown", - "id": "360fe65d", - "metadata": {}, - "source": [ - "Let us now export the embedding information to a `numpy` array and write it to disk.\n", - "\n", - "We will later pass this information to the `Loader` so that it will load the correct emebedding for the product corresponding to a given step of a customer journey.\n", - "\n", - "The embeddings are linked to the train set using the `product_sku_hash` information." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "d99dfdd0", - "metadata": {}, - "outputs": [], - "source": [ - "skus_ds.to_npy('skus.npy')" - ] - }, - { - "cell_type": "markdown", - "id": "58d80879", - "metadata": {}, - "source": [ - "How will the `Loader` know which embedding to associate with a given row of the train set?\n", - "\n", - "The `product_sku_hash` ids have been exported along with the embeddings and are contained in the first column of the output `numpy` array.\n", - "\n", - "Here is the id of the first embedding stored in `skus.npy`:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "d60c6651", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "836.0" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.load('skus.npy')[0, 0]" - ] - }, - { - "cell_type": "markdown", - "id": "974cf669", - "metadata": {}, - "source": [ - "and here is the embedding vector corresponding to `product_sku_hash` of id referenced above:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "c2c111fd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0.07939801, 0.34657978, -0.38269496, 0.56307004, -0.10142923,\n", - " 0.03702352, -0.11606304, 0.10070879, -0.21879928, 0.06107687,\n", - " -0.20743195, -0.01330719, 0.60182867, 0.0920322 , 0.2648726 ,\n", - " 0.56061561, 0.48643498, 0.39045152, -0.40012162, 0.09153962,\n", - " -0.38351605, 0.57134731, 0.59986226, -0.40321368, -0.32984972,\n", - " 0.37559494, 0.1554353 , -0.0413067 , 0.33814398, 0.30678041,\n", - " 0.24001132, 0.42737922, 0.41554601, -0.40451691, 0.50428902,\n", - " -0.2004803 , -0.38297056, 0.06580838, 0.48285745, 0.51406472,\n", - " 0.02268894, 0.36343324, 0.32497967, -0.29736346, -0.00538915,\n", - " 0.12329302, -0.04998194, 0.27843002, 0.20212714, 0.39019503])" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.load('skus.npy')[0, 1:]" - ] - }, - { - "cell_type": "markdown", - "id": "7b8c4a13", - "metadata": {}, - "source": [ - "We are now ready to construct the `Loader` that will feed the data to our model.\n", - "\n", - "We begin by reading in the embeddings information." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "51e1f766", - "metadata": {}, - "outputs": [], - "source": [ - "embeddings = np.load('skus.npy')" - ] - }, - { - "cell_type": "markdown", - "id": "e0b1f18d", - "metadata": {}, - "source": [ - "We are now ready to define the `Loader`.\n", - "\n", - "We are passing in an `EmbeddingOperator` that will ensure that correct `sku` information (correct `description_vector`) is associated with the correct step in the customer journey (with the lookup key being contained in the `product_sku_hash_list`)\n", - "\n", - "When specifying the dataset, we are creating a `Merlin Dataset` based on the `train_transformed` data we saved above.\n", - "\n", - "Depending on the hardware that you will be running this on and the size of the dataset that you will be using, should you run out of GPU memory, you can specify one of the several parameters that can ease the memory load (`npartitions`, `part_size`, or `part_mem_fraction`).\n", - "\n", - "The `BATCH_SIZE` of 16 should work on a broad set of hardware, but if you are training on a lot of data and your hardware permitting you might want to significantly increase it." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "1d7212fc", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n", - "[INFO]: sparse_operation_kit is imported\n", - "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", - "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", - "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", - "[SOK INFO] Initialize finished, communication tool: horovod\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-20 22:58:50.835162: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-06-20 22:58:50.836068: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-06-20 22:58:50.836268: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-06-20 22:58:50.836425: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-06-20 22:58:50.836673: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-06-20 22:58:50.836849: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-06-20 22:58:50.837009: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-06-20 22:58:50.837114: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", - "2023-06-20 22:58:50.837130: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" - ] - } - ], - "source": [ - "BATCH_SIZE = 16\n", - "\n", - "from merlin.dataloader.tensorflow import Loader\n", - "from merlin.dataloader.ops.embeddings import EmbeddingOperator\n", - "import merlin.models.tf as mm\n", - "\n", - "embedding_operator = EmbeddingOperator(\n", - " embeddings[:, 1:].astype(np.float32),\n", - " id_lookup_table=embeddings[:, 0].astype(int),\n", - " lookup_key=\"product_sku_hash_list\",\n", - " embedding_name='product_embeddings'\n", - ")\n", - "\n", - "loader = Loader(\n", - " dataset=nvt.Dataset('train_transformed', engine='parquet'),\n", - " batch_size=BATCH_SIZE,\n", - " transforms=[\n", - " embedding_operator\n", - " ],\n", - " shuffle=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "4f037d5d", - "metadata": {}, - "source": [ - "Using the `EmbeddingOperator` object we referenced our `product_embeddings` and insructed the model what to use as a key to look up the information.\n", - "\n", - "Below is an example batch of data that our model will consume." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "7371e23d", - "metadata": {}, - "outputs": [], - "source": [ - "batch = mm.sample_batch(loader, batch_size=BATCH_SIZE, include_targets=False, prepare_features=True)" - ] - }, - { - "cell_type": "markdown", - "id": "f7c9a50d", - "metadata": {}, - "source": [ - "`product_embeddings` are included in the batch." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "3cbf8ea4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dict_keys(['product_sku_hash_list', 'event_type_list', 'product_action_list', 'hashed_url_list', 'product_embeddings'])" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "batch.keys()" - ] - }, - { - "cell_type": "markdown", - "id": "53e61e71", - "metadata": {}, - "source": [ - "## Creating and training the model" - ] - }, - { - "cell_type": "markdown", - "id": "2461926e", - "metadata": {}, - "source": [ - "We are now ready to construct our model." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "6867c8ba", - "metadata": {}, - "outputs": [], - "source": [ - "import merlin.models.tf as mm\n", - "\n", - "input_block = mm.InputBlockV2(\n", - " loader.output_schema,\n", - " embeddings=mm.Embeddings(\n", - " loader.output_schema.select_by_tag(Tags.CATEGORICAL),\n", - " sequence_combiner=None,\n", - " ),\n", - " pretrained_embeddings=mm.PretrainedEmbeddings(\n", - " loader.output_schema.select_by_tag(Tags.EMBEDDING),\n", - " sequence_combiner=None,\n", - " normalizer=\"l2-norm\",\n", - " output_dims={\"product_embeddings\": 64},\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "cafb788f", - "metadata": {}, - "source": [ - "We have now constructed an `input_block` that will take our batch and transform it in a fashion that will make it amenable for further processing by subsequent layers of our model.\n", - "\n", - "To test that everything has worked, we can pass our example `batch` through the `input_block`." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "3f8afa56", - "metadata": {}, - "outputs": [], - "source": [ - "input_batch = input_block(batch)" - ] - }, - { - "cell_type": "markdown", - "id": "d24a70fe", - "metadata": {}, - "source": [ - "Let us now construct the remaining layers of our model." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "78b21c0f", - "metadata": {}, - "outputs": [], - "source": [ - "target = 'hashed_url_list'\n", - "\n", - "# We do not need the `train_transformed` dataset here, but we do need\n", - "# to access the schema.\n", - "# It contains important information that will help our model construct itself.\n", - "schema = nvt.Dataset('train_transformed', engine='parquet').schema\n", - "\n", - "dmodel=64\n", - "mlp_block = mm.MLPBlock(\n", - " [128,dmodel],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", - "transformer_block = mm.XLNetBlock(d_model=dmodel, n_head=4, n_layer=2)\n", - "model = mm.Model(\n", - " input_block,\n", - " mlp_block,\n", - " transformer_block,\n", - " mm.CategoricalOutput(\n", - " schema.select_by_name(target),\n", - " default_loss=\"categorical_crossentropy\",\n", - " ),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "13b54d19", - "metadata": {}, - "source": [ - "And let us train it." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "fbb03f0c", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n", - "2023-06-20 22:58:58.950175: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8700\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-20 22:59:11.285571: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model/xl_net_block/sequential_block_7/replace_masked_embeddings/RaggedWhere/Assert/AssertGuard/branch_executed/_95\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18/18 [==============================] - 42s 2s/step - loss: 6.9800 - recall_at_10: 0.0106 - mrr_at_10: 0.0033 - ndcg_at_10: 0.0050 - map_at_10: 0.0033 - precision_at_10: 0.0011 - regularization_loss: 0.0000e+00 - loss_batch: 6.9689\n", - "Epoch 2/5\n", - "18/18 [==============================] - 34s 2s/step - loss: 6.9591 - recall_at_10: 0.0106 - mrr_at_10: 0.0031 - ndcg_at_10: 0.0048 - map_at_10: 0.0031 - precision_at_10: 0.0011 - regularization_loss: 0.0000e+00 - loss_batch: 6.9363\n", - "Epoch 3/5\n", - "18/18 [==============================] - 39s 2s/step - loss: 6.9471 - recall_at_10: 0.0107 - mrr_at_10: 0.0028 - ndcg_at_10: 0.0046 - map_at_10: 0.0028 - precision_at_10: 0.0011 - regularization_loss: 0.0000e+00 - loss_batch: 6.9206\n", - "Epoch 4/5\n", - "18/18 [==============================] - 38s 2s/step - loss: 6.9398 - recall_at_10: 0.0103 - mrr_at_10: 0.0030 - ndcg_at_10: 0.0047 - map_at_10: 0.0030 - precision_at_10: 0.0010 - regularization_loss: 0.0000e+00 - loss_batch: 6.9015\n", - "Epoch 5/5\n", - "18/18 [==============================] - 38s 2s/step - loss: 6.9375 - recall_at_10: 0.0104 - mrr_at_10: 0.0030 - ndcg_at_10: 0.0047 - map_at_10: 0.0030 - precision_at_10: 0.0010 - regularization_loss: 0.0000e+00 - loss_batch: 6.9095\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.compile(run_eagerly=False, optimizer='adam', loss=\"categorical_crossentropy\")\n", - "model.fit(loader, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, pre=mm.SequenceMaskRandom(schema=loader.output_schema, target=target, masking_prob=0.3, transformer=transformer_block))" - ] - }, - { - "cell_type": "markdown", - "id": "fa8ab17b", - "metadata": {}, - "source": [ - "## Serving predictions" - ] - }, - { - "cell_type": "markdown", - "id": "c778420d", - "metadata": {}, - "source": [ - "Now that we have prepared a workflow for processing our data (`wf`), defined the embedding operator (`embedding_operator`) and trained our model (`model`), we have all the components we need to serve our model using the Triton Inference Server (TIS).\n", - "\n", - "Let us define a set of inference operators (a pipeline for processing our data all the way to obtaining predictions) and export them as an ensemble that we will be able to serve using TIS." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "18f19033", - "metadata": {}, - "outputs": [], - "source": [ - "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n", - "from merlin.systems.dag.ensemble import Ensemble\n", - "from merlin.systems.dag.ops.workflow import TransformWorkflow" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "385aba04", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (product_sku_hash_list): TensorShape([16, None, 1])\n", - " (event_type_list): TensorShape([16, None, 1])\n", - " (product_action_list): TensorShape([16, None, 1])\n", - " (hashed_url_list): TensorShape([16, None, 1])\n", - " (product_embeddings): TensorShape([16, None, 50])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (product_sku_hash_list): tf.int64\n", - " (event_type_list): tf.int64\n", - " (product_action_list): tf.int64\n", - " (hashed_url_list): tf.int64\n", - " (product_embeddings): tf.float32\n", - " )\n", - "), because it is not built.\n", - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (product_sku_hash_list): TensorShape([16, None, 1])\n", - " (event_type_list): TensorShape([16, None, 1])\n", - " (product_action_list): TensorShape([16, None, 1])\n", - " (hashed_url_list): TensorShape([16, None, 1])\n", - " (product_embeddings): TensorShape([16, None, 50])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (product_sku_hash_list): tf.int64\n", - " (event_type_list): tf.int64\n", - " (product_action_list): tf.int64\n", - " (hashed_url_list): tf.int64\n", - " (product_embeddings): tf.float32\n", - " )\n", - "), because it is not built.\n", - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (product_sku_hash_list): TensorShape([16, None, 1])\n", - " (event_type_list): TensorShape([16, None, 1])\n", - " (product_action_list): TensorShape([16, None, 1])\n", - " (hashed_url_list): TensorShape([16, None, 1])\n", - " (product_embeddings): TensorShape([16, None, 50])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (product_sku_hash_list): tf.int64\n", - " (event_type_list): tf.int64\n", - " (product_action_list): tf.int64\n", - " (hashed_url_list): tf.int64\n", - " (product_embeddings): tf.float32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_mask_random_layer_call_fn, sequence_mask_random_layer_call_and_return_conditional_losses, prepare_list_features_1_layer_call_fn while saving (showing 5 of 110). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmpi3g8g7q7/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmpi3g8g7q7/assets\n" - ] - } - ], - "source": [ - "inference_operators = wf.input_schema.column_names >> TransformWorkflow(wf) >> embedding_operator >> PredictTensorflow(model)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "1c14a25d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (product_sku_hash_list): TensorShape([16, None, 1])\n", - " (event_type_list): TensorShape([16, None, 1])\n", - " (product_action_list): TensorShape([16, None, 1])\n", - " (hashed_url_list): TensorShape([16, None, 1])\n", - " (product_embeddings): TensorShape([16, None, 50])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (product_sku_hash_list): tf.int64\n", - " (event_type_list): tf.int64\n", - " (product_action_list): tf.int64\n", - " (hashed_url_list): tf.int64\n", - " (product_embeddings): tf.float32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (product_sku_hash_list): TensorShape([16, None, 1])\n", - " (event_type_list): TensorShape([16, None, 1])\n", - " (product_action_list): TensorShape([16, None, 1])\n", - " (hashed_url_list): TensorShape([16, None, 1])\n", - " (product_embeddings): TensorShape([16, None, 50])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (product_sku_hash_list): tf.int64\n", - " (event_type_list): tf.int64\n", - " (product_action_list): tf.int64\n", - " (hashed_url_list): tf.int64\n", - " (product_embeddings): tf.float32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (product_sku_hash_list): TensorShape([16, None, 1])\n", - " (event_type_list): TensorShape([16, None, 1])\n", - " (product_action_list): TensorShape([16, None, 1])\n", - " (hashed_url_list): TensorShape([16, None, 1])\n", - " (product_embeddings): TensorShape([16, None, 50])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (product_sku_hash_list): tf.int64\n", - " (event_type_list): tf.int64\n", - " (product_action_list): tf.int64\n", - " (hashed_url_list): tf.int64\n", - " (product_embeddings): tf.float32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (product_sku_hash_list): TensorShape([16, None, 1])\n", - " (event_type_list): TensorShape([16, None, 1])\n", - " (product_action_list): TensorShape([16, None, 1])\n", - " (hashed_url_list): TensorShape([16, None, 1])\n", - " (product_embeddings): TensorShape([16, None, 50])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (product_sku_hash_list): tf.int64\n", - " (event_type_list): tf.int64\n", - " (product_action_list): tf.int64\n", - " (hashed_url_list): tf.int64\n", - " (product_embeddings): tf.float32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (product_sku_hash_list): TensorShape([16, None, 1])\n", - " (event_type_list): TensorShape([16, None, 1])\n", - " (product_action_list): TensorShape([16, None, 1])\n", - " (hashed_url_list): TensorShape([16, None, 1])\n", - " (product_embeddings): TensorShape([16, None, 50])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (product_sku_hash_list): tf.int64\n", - " (event_type_list): tf.int64\n", - " (product_action_list): tf.int64\n", - " (hashed_url_list): tf.int64\n", - " (product_embeddings): tf.float32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (product_sku_hash_list): TensorShape([16, None, 1])\n", - " (event_type_list): TensorShape([16, None, 1])\n", - " (product_action_list): TensorShape([16, None, 1])\n", - " (hashed_url_list): TensorShape([16, None, 1])\n", - " (product_embeddings): TensorShape([16, None, 50])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (product_sku_hash_list): tf.int64\n", - " (event_type_list): tf.int64\n", - " (product_action_list): tf.int64\n", - " (hashed_url_list): tf.int64\n", - " (product_embeddings): tf.float32\n", - " )\n", - "), because it is not built.\n", - "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_mask_random_layer_call_fn, sequence_mask_random_layer_call_and_return_conditional_losses, prepare_list_features_1_layer_call_fn while saving (showing 5 of 110). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /workspace/data/ensemble/1_predicttensorflowtriton/1/model.savedmodel/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /workspace/data/ensemble/1_predicttensorflowtriton/1/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", - "/usr/local/lib/python3.8/dist-packages/keras/saving/legacy/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " return serialization.serialize_keras_object(obj)\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - } - ], - "source": [ - "ensemble = Ensemble(inference_operators, wf.input_schema)\n", - "ensemble.export(os.path.join(OUTPUT_DATA_DIR, 'ensemble'));" - ] - }, - { - "cell_type": "markdown", - "id": "264fd1ea", - "metadata": {}, - "source": [ - "After we export the ensemble, we are ready to start the Triton Inference Server.\n", - "\n", - "The server is installed in Merlin Tensorflow and Merlin PyTorch containers. If you are not using one of our containers, then ensure it is installed in your environment. For more information, see the Triton Inference Server [documentation](https://github.com/triton-inference-server/server/blob/r22.03/README.md#documentation).\n", - "\n", - "You can start the server by running the following command:\n", - "\n", - "```tritonserver --model-repository={OUTPUT_DATA_DIR}/ensemble/```\n", - "\n", - "For the --model-repository argument, specify the same value as the `export_path` that you specified previously in the `ensemble.export` method.\n", - "\n", - "After you run the `tritonserver` command, wait until your terminal shows messages like the following example:\n", - "\n", - "I0414 18:29:50.741833 4067 grpc_server.cc:4421] Started GRPCInferenceService at 0.0.0.0:8001
\n", - "I0414 18:29:50.742197 4067 http_server.cc:3113] Started HTTPService at 0.0.0.0:8000
\n", - "I0414 18:29:50.783470 4067 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002\n", - "\n", - "Let us now package our data for inference. We will send 5 rows of data, which corresponds to a single customer journey (session) through the online store. The data will be first processed by the `NVTabular` workflow and subsequentally passed to our transformer model for predicting. " - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "90483210", - "metadata": {}, - "outputs": [], - "source": [ - "# obtaining five rows of data\n", - "df = train.head(5)\n", - "# making sure all the rows correspond to the same online session (have the same `session_id_hash`)\n", - "df['session_id_hash'] = df['session_id_hash'].iloc[0]" - ] - }, - { - "cell_type": "markdown", - "id": "efdf671e", - "metadata": {}, - "source": [ - "Let us now send the data to the Triton Inference Server for inference." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "d8453048", - "metadata": {}, - "outputs": [], - "source": [ - "from merlin.systems.triton import convert_df_to_triton_input\n", - "import tritonclient.grpc as grpcclient\n", - "\n", - "inputs = convert_df_to_triton_input(wf.input_schema, df)\n", - "\n", - "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", - " response = client.infer('executor_model', inputs)" - ] - }, - { - "cell_type": "markdown", - "id": "913b80e8", - "metadata": {}, - "source": [ - "Let's parse the response." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "4cc4b046", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[-2.2332087 , -2.1218574 , -2.390479 , ..., -0.7735352 ,\n", - " 0.1954267 , -0.34523243]], dtype=float32)" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "predictions = response.as_numpy(\"hashed_url_list/categorical_output\")\n", - "predictions" - ] - }, - { - "cell_type": "markdown", - "id": "e49c2ed9", - "metadata": {}, - "source": [ - "The response contains logits predicting the id of the url the customer is most likely to arrive at as next step of their journey through the online store.\n", - "\n", - "Here is the predicted hashed url id:" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "0b9af2ae", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "34" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "predicted_hashed_url_id = predictions.argmax()\n", - "predicted_hashed_url_id" - ] - }, - { - "cell_type": "markdown", - "id": "8ef47efd", - "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "We have trained a transformer model for the next item prediction task using language model masking.\n", - "\n", - "For another session-based example that goes deeper into data preprocessing and that covers several advanced techniques (Weight Tying, Temperature Scaling) please see [Session-Based Next Item Prediction for Fashion E-Commerce](https://github.com/NVIDIA-Merlin/models/blob/t4rec_use_case/examples/usecases/ecommerce-session-based-next-item-prediction-for-fashion.ipynb). " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/usecases/transformers-next-item-prediction.ipynb b/examples/usecases/transformers-next-item-prediction.ipynb deleted file mode 100644 index b409170f00..0000000000 --- a/examples/usecases/transformers-next-item-prediction.ipynb +++ /dev/null @@ -1,1516 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "a556f660", - "metadata": {}, - "outputs": [], - "source": [ - "# Copyright 2022 NVIDIA Corporation. All Rights Reserved.\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# http://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions anda\n", - "# limitations under the License.\n", - "# ==============================================================================\n", - "\n", - "# Each user is responsible for checking the content of datasets and the\n", - "# applicable licenses and determining if suitable for the intended use." - ] - }, - { - "cell_type": "markdown", - "id": "697d1452", - "metadata": {}, - "source": [ - "\n", - "\n", - "# Transformer-based architecture for next-item prediction task\n", - "\n", - "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) container.\n", - "\n", - "## Overview\n", - "\n", - "In this use case we will train a Transformer-based architecture for next-item prediction task.\n", - "\n", - "**Note, the data for this notebook will be automatically downloaded to the folder specified in the cells below.**\n", - "\n", - "We will use the [booking.com dataset](https://github.com/bookingcom/ml-dataset-mdt) to train a session-based model. The dataset contains 1,166,835 of anonymized hotel reservations in the train set and 378,667 in the test set. Each reservation is a part of a customer's trip (identified by `utrip_id`) which includes consecutive reservations.\n", - "\n", - "We will reshape the data to organize it into 'sessions'. Each session will be a full customer itinerary in chronological order. The goal will be to predict the city_id of the final reservation of each trip.\n", - "\n", - "\n", - "### Learning objectives\n", - "\n", - "- Training a Transformer-based architecture for next-item prediction task" - ] - }, - { - "cell_type": "markdown", - "id": "1cccd005", - "metadata": {}, - "source": [ - "## Downloading and preparing the dataset" - ] - }, - { - "cell_type": "markdown", - "id": "1d0b619b", - "metadata": {}, - "source": [ - "We will download the dataset using a functionality provided by merlin models. The dataset can be found on GitHub [here](https://github.com/bookingcom/ml-dataset-mdt).\n", - "\n", - "**Read more about libraries used in the import statements below**\n", - "\n", - "- [get_lib](https://github.com/NVIDIA-Merlin/core/blob/stable/merlin/core/dispatch.py)\n", - "- [get_booking](https://github.com/NVIDIA-Merlin/models/tree/stable/merlin/datasets/ecommerce)\n", - "- [nvtabular](https://github.com/NVIDIA-Merlin/NVTabular/tree/stable/nvtabular)\n", - "- [nvtabular ops](https://github.com/NVIDIA-Merlin/NVTabular/tree/stable/nvtabular/ops)\n", - "- [schema tags](https://github.com/NVIDIA-Merlin/core/blob/stable/merlin/schema/tags.py)\n", - "- [merlin models tensorflow](https://github.com/NVIDIA-Merlin/models/tree/stable/merlin/models/tf)\n", - "- [get_booking](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/datasets/ecommerce/booking/dataset.py)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "40e9ef05", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-31 06:06:25.697025: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-05-31 06:06:26.988036: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-31 06:06:26.988386: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-31 06:06:26.988518: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO]: sparse_operation_kit is imported\n", - "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", - "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", - "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", - "[SOK INFO] Initialize finished, communication tool: horovod\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-31 06:06:28.519868: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-05-31 06:06:28.520815: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-31 06:06:28.520999: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-31 06:06:28.521129: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-31 06:06:28.591345: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-31 06:06:28.591534: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-31 06:06:28.591665: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-31 06:06:28.591770: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", - "2023-05-31 06:06:28.591778: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-05-31 06:06:28.591860: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", - "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "# Resetting the TF memory allocation to not be 50% by default. \n", - "import os\n", - "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", - "\n", - "from merlin.core.dispatch import get_lib\n", - "from merlin.datasets.ecommerce import get_booking\n", - "\n", - "import numpy as np\n", - "import timeit\n", - "\n", - "from nvtabular import *\n", - "from nvtabular import ops\n", - "\n", - "from merlin.schema.tags import Tags\n", - "import merlin.models.tf as mm\n", - "\n", - "INPUT_DATA_DIR = os.environ.get('INPUT_DATA_DIR', '/workspace/data')\n", - "OUTPUT_DATA_DIR = os.environ.get('OUTPUT_DATA_DIR', '/workspace/data')\n", - "NUM_EPOCHS = int(os.environ.get('NUM_EPOCHS', '5'))" - ] - }, - { - "cell_type": "markdown", - "id": "c1b42076", - "metadata": {}, - "source": [ - "Let's download the data." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "d0a33352", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.SESSION_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "text/plain": [ - "(,\n", - " )" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_booking(INPUT_DATA_DIR)" - ] - }, - { - "cell_type": "markdown", - "id": "ee9dd8c8", - "metadata": {}, - "source": [ - "Each reservation has a unique utrip_id. During each trip a customer vists several destinations." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "01d1b755", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " user_id checkin checkout city_id device_class affiliate_id \\\n", - "0 1000027 2016-08-13 2016-08-14 8183 desktop 7168 \n", - "1 1000027 2016-08-14 2016-08-16 15626 desktop 7168 \n", - "2 1000027 2016-08-16 2016-08-18 60902 desktop 7168 \n", - "3 1000027 2016-08-18 2016-08-21 30628 desktop 253 \n", - "4 1000033 2016-04-09 2016-04-11 38677 mobile 359 \n", - "\n", - " booker_country hotel_country utrip_id \n", - "0 Elbonia Gondal 1000027_1 \n", - "1 Elbonia Gondal 1000027_1 \n", - "2 Elbonia Gondal 1000027_1 \n", - "3 Elbonia Gondal 1000027_1 \n", - "4 Gondal Cobra Island 1000033_1 \n" - ] - } - ], - "source": [ - "# When displaying cudf dataframes use print() or display(), otherwise Jupyter creates hidden copies.\n", - "train = get_lib().read_csv(f'{INPUT_DATA_DIR}/train_set.csv', parse_dates=['checkin', 'checkout'])\n", - "print(train.head())" - ] - }, - { - "cell_type": "markdown", - "id": "fecc2d94", - "metadata": {}, - "source": [ - "We will train on sequences of `city_id` and `booker_country` and based on this information, our model will attempt to predict the next `city_id` (the next hop in the journey).\n", - "\n", - "We will train a transformer model that can work with sequences of variable length within a batch. This functionality is provided to us out of the box and doesn't require any changes to the architecture. Thanks to it we do not have to pad or trim our sequences to any particular length -- our model can make effective use of all of the data!\n", - "\n", - "*With one exception.* For a masked language model that we will be training, we need to discard sequences that are shorter than two hops. This makes sense as there is nothing our model could learn if it was only presented with an itinerary with a single destination on it!\n", - "\n", - "Let us begin by splitting the data into a train and validation set based on trip ID.\n", - "\n", - "Let's see how many unique trips there are in the dataset. Also, let us shuffle the trips along the way so that our validation set consists of a random sample of our train set." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "23bef6ae", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of unique trips is : 217686\n" - ] - } - ], - "source": [ - "# Unique trip ids.\n", - "utrip_ids = train.sample(frac=1).utrip_id.unique()\n", - "print('Number of unique trips is :', len(utrip_ids))" - ] - }, - { - "cell_type": "markdown", - "id": "f7eca1f6", - "metadata": {}, - "source": [ - "Now let's assign data to our train and validation sets. Furthermore, we sort the data by `utrip_id` and `checkin`. This way we ensure our sequences of visited `city_ids` will be in proper order!\n", - "\n", - "Also, let's remove trips where only a single city was visited as they cannot be modeled as a sequence." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "7754847c", - "metadata": {}, - "outputs": [], - "source": [ - "train = get_lib().from_pandas(\n", - " train.to_pandas().join(train.to_pandas().groupby('utrip_id').size().rename('num_examples'), on='utrip_id')\n", - ")\n", - "train = train[train.num_examples > 1]\n", - "\n", - "train.checkin = train.checkin.astype('int')\n", - "train.checkout = train.checkout.astype('int')\n", - "\n", - "train_set_utrip_ids = utrip_ids[:int(0.8 * utrip_ids.shape[0])]\n", - "validation_set_utrip_ids = utrip_ids[int(0.8 * utrip_ids.shape[0]):]\n", - "\n", - "train_set = train[train.utrip_id.isin(train_set_utrip_ids)].sort_values(['utrip_id', 'checkin'])\n", - "validation_set = train[train.utrip_id.isin(validation_set_utrip_ids)].sort_values(['utrip_id', 'checkin'])" - ] - }, - { - "cell_type": "markdown", - "id": "79cc3992", - "metadata": {}, - "source": [ - "## Preprocessing with NVTabular\n", - "\n", - "We can now begin with data preprocessing.\n", - "\n", - "We will combine trips into \"sessions\", discard trips that are too short and calculate total trip length.\n", - "\n", - "We will use NVTabular for this work. It offers optimized tabular data preprocessing operators that run on the GPU. If you would like to learn more about the NVTabular library, please take a look [here](https://github.com/NVIDIA-Merlin/NVTabular).\n", - "\n", - "Read more about the [Merlin's Dataset API](https://github.com/NVIDIA-Merlin/core/blob/stable/merlin/io/dataset.py) \n", - "Read more about how [parquet files are read in and processed by Merlin](https://github.com/NVIDIA-Merlin/core/blob/stable/merlin/io/parquet.py) \n", - "Read more about [Tags](https://github.com/NVIDIA-Merlin/core/blob/stable/merlin/schema/tags.py) \n", - "- [schema_select_by_tag](https://github.com/NVIDIA-Merlin/core/blob/stable/merlin/schema/schema.py) \n", - "\n", - "Read more about [NVTabular Workflows](https://github.com/NVIDIA-Merlin/NVTabular/blob/stable/nvtabular/workflow/workflow.py) \n", - "- [fit_transform](https://github.com/NVIDIA-Merlin/NVTabular/blob/stable/nvtabular/workflow/workflow.py)\n", - "- [transform](https://github.com/NVIDIA-Merlin/NVTabular/blob/stable/nvtabular/workflow/workflow.py) \n", - "\n", - "Read more about the [NVTabular Operators]() \n", - "- [Categorify](https://github.com/NVIDIA-Merlin/NVTabular/blob/stable/nvtabular/ops/categorify.py)\n", - "- [AddTags](https://github.com/NVIDIA-Merlin/NVTabular/blob/stable/nvtabular/ops/add_metadata.py)\n", - "- [LambdaOp](https://github.com/NVIDIA-Merlin/NVTabular/blob/stable/nvtabular/ops/lambdaop.py)\n", - "- [Rename](https://github.com/NVIDIA-Merlin/NVTabular/blob/stable/nvtabular/ops/rename.py)\n", - "- [Filter](https://github.com/NVIDIA-Merlin/NVTabular/blob/stable/nvtabular/ops/filter.py)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "3435af68", - "metadata": {}, - "outputs": [], - "source": [ - "train_set_dataset = Dataset(train_set)\n", - "validation_set_dataset = Dataset(validation_set)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "60bd5e59", - "metadata": {}, - "outputs": [], - "source": [ - "weekday_checkin = (\n", - " [\"checkin\"]\n", - " >> ops.LambdaOp(lambda col: get_lib().to_datetime(col).dt.weekday)\n", - " >> ops.Rename(name=\"weekday_checkin\")\n", - ")\n", - "\n", - "weekday_checkout = (\n", - " [\"checkout\"]\n", - " >> ops.LambdaOp(lambda col: get_lib().to_datetime(col).dt.weekday)\n", - " >> ops.Rename(name=\"weekday_checkout\")\n", - ")\n", - "\n", - "categorical_features = (['city_id', 'booker_country', 'hotel_country'] +\n", - " weekday_checkin + weekday_checkout\n", - " ) >> ops.Categorify()\n", - "\n", - "groupby_features = categorical_features + ['utrip_id', 'checkin'] >> ops.Groupby(\n", - " groupby_cols=['utrip_id'],\n", - " aggs={\n", - " 'city_id': ['list', 'count'],\n", - " 'booker_country': ['list'],\n", - " 'hotel_country': ['list'],\n", - " 'weekday_checkin': ['list'],\n", - " 'weekday_checkout': ['list']\n", - " },\n", - " sort_cols=\"checkin\"\n", - ")\n", - "\n", - "list_features = (\n", - " groupby_features['city_id_list', 'booker_country_list', 'hotel_country_list', \n", - " 'weekday_checkin_list', 'weekday_checkout_list'\n", - " ] >> ops.AddTags([Tags.SEQUENCE])\n", - ")\n", - "\n", - "# Filter out sessions with less than 2 interactions \n", - "MINIMUM_SESSION_LENGTH = 2\n", - "features = list_features + (groupby_features['city_id_count'] >> ops.AddTags([Tags.CONTINUOUS]))\n", - "filtered_sessions = features >> ops.Filter(f=lambda df: df[\"city_id_count\"] >= MINIMUM_SESSION_LENGTH) " - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "6105767a", - "metadata": {}, - "outputs": [], - "source": [ - "wf = Workflow(filtered_sessions)\n", - "\n", - "wf.fit_transform(train_set_dataset).to_parquet(os.path.join(OUTPUT_DATA_DIR, 'train_processed.parquet'))\n", - "wf.transform(validation_set_dataset).to_parquet(os.path.join(OUTPUT_DATA_DIR, 'validation_processed.parquet'))\n", - "\n", - "wf.save(os.path.join(OUTPUT_DATA_DIR, 'workflow'))" - ] - }, - { - "cell_type": "markdown", - "id": "539a6675", - "metadata": {}, - "source": [ - "Our data consists of a sequence of visited `city_ids`, a sequence of `booker_countries` (represented as integer categories) and a `city_id_count` column (which contains the count of visited cities in a trip)." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "2dee6b53", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
city_id_listbooker_country_listhotel_country_listweekday_checkin_listweekday_checkout_listcity_id_count
0[8238, 156, 2278, 2097][3, 3, 3, 3][3, 3, 3, 3][5, 7, 4, 3][7, 4, 2, 7]4
1[63, 1160, 87, 618, 63][1, 1, 1, 1, 1][1, 1, 1, 1, 1][5, 1, 4, 3, 5][6, 4, 2, 5, 4]5
2[7, 6, 24, 1050, 65, 52, 3][2, 2, 2, 2, 2, 2, 2][2, 2, 2, 16, 16, 3, 3][5, 1, 2, 6, 5, 7, 4][6, 3, 1, 5, 7, 4, 3]7
3[1032, 757, 140, 3][2, 2, 2, 2][19, 19, 19, 3][1, 4, 2, 3][4, 3, 2, 5]4
4[3603, 262, 662, 250, 359][1, 1, 1, 1, 1][30, 30, 30, 30, 30][1, 3, 6, 5, 1][2, 1, 5, 6, 3]5
\n", - "
" - ], - "text/plain": [ - " city_id_list booker_country_list \\\n", - "0 [8238, 156, 2278, 2097] [3, 3, 3, 3] \n", - "1 [63, 1160, 87, 618, 63] [1, 1, 1, 1, 1] \n", - "2 [7, 6, 24, 1050, 65, 52, 3] [2, 2, 2, 2, 2, 2, 2] \n", - "3 [1032, 757, 140, 3] [2, 2, 2, 2] \n", - "4 [3603, 262, 662, 250, 359] [1, 1, 1, 1, 1] \n", - "\n", - " hotel_country_list weekday_checkin_list weekday_checkout_list \\\n", - "0 [3, 3, 3, 3] [5, 7, 4, 3] [7, 4, 2, 7] \n", - "1 [1, 1, 1, 1, 1] [5, 1, 4, 3, 5] [6, 4, 2, 5, 4] \n", - "2 [2, 2, 2, 16, 16, 3, 3] [5, 1, 2, 6, 5, 7, 4] [6, 3, 1, 5, 7, 4, 3] \n", - "3 [19, 19, 19, 3] [1, 4, 2, 3] [4, 3, 2, 5] \n", - "4 [30, 30, 30, 30, 30] [1, 3, 6, 5, 1] [2, 1, 5, 6, 3] \n", - "\n", - " city_id_count \n", - "0 4 \n", - "1 5 \n", - "2 7 \n", - "3 4 \n", - "4 5 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Dataset(os.path.join(OUTPUT_DATA_DIR, 'train_processed.parquet')).head()" - ] - }, - { - "cell_type": "markdown", - "id": "e89cc3a0", - "metadata": {}, - "source": [ - "We are now ready to train our model." - ] - }, - { - "cell_type": "markdown", - "id": "ce95c794", - "metadata": {}, - "source": [ - "Here is the schema of the data that our model will use." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "c4813456", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nametagsdtypeis_listis_raggedproperties.num_bucketsproperties.freq_thresholdproperties.max_sizeproperties.start_indexproperties.cat_pathproperties.domain.minproperties.domain.maxproperties.domain.nameproperties.embedding_sizes.cardinalityproperties.embedding_sizes.dimensionproperties.value_count.minproperties.value_count.max
0city_id_list(Tags.SEQUENCE, Tags.CATEGORICAL)DType(name='int64', element_type=<ElementType....TrueTrueNone000.//categories/unique.city_id.parquet037202city_id372035120None
1booker_country_list(Tags.SEQUENCE, Tags.CATEGORICAL)DType(name='int64', element_type=<ElementType....TrueTrueNone000.//categories/unique.booker_country.parquet05booker_country6160None
2hotel_country_list(Tags.SEQUENCE, Tags.CATEGORICAL)DType(name='int64', element_type=<ElementType....TrueTrueNone000.//categories/unique.hotel_country.parquet0194hotel_country195310None
3weekday_checkin_list(Tags.SEQUENCE, Tags.CATEGORICAL)DType(name='int64', element_type=<ElementType....TrueTrueNone000.//categories/unique.weekday_checkin.parquet07weekday_checkin8160None
4weekday_checkout_list(Tags.SEQUENCE, Tags.CATEGORICAL)DType(name='int64', element_type=<ElementType....TrueTrueNone000.//categories/unique.weekday_checkout.parquet07weekday_checkout8160None
\n", - "
" - ], - "text/plain": [ - "[{'name': 'city_id_list', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.city_id.parquet', 'domain': {'min': 0, 'max': 37202, 'name': 'city_id'}, 'embedding_sizes': {'cardinality': 37203, 'dimension': 512}, 'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}, {'name': 'booker_country_list', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.booker_country.parquet', 'domain': {'min': 0, 'max': 5, 'name': 'booker_country'}, 'embedding_sizes': {'cardinality': 6, 'dimension': 16}, 'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}, {'name': 'hotel_country_list', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.hotel_country.parquet', 'domain': {'min': 0, 'max': 194, 'name': 'hotel_country'}, 'embedding_sizes': {'cardinality': 195, 'dimension': 31}, 'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}, {'name': 'weekday_checkin_list', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.weekday_checkin.parquet', 'domain': {'min': 0, 'max': 7, 'name': 'weekday_checkin'}, 'embedding_sizes': {'cardinality': 8, 'dimension': 16}, 'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}, {'name': 'weekday_checkout_list', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.weekday_checkout.parquet', 'domain': {'min': 0, 'max': 7, 'name': 'weekday_checkout'}, 'embedding_sizes': {'cardinality': 8, 'dimension': 16}, 'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seq_schema = Workflow.load(os.path.join(OUTPUT_DATA_DIR, 'workflow')).output_schema.select_by_tag(Tags.SEQUENCE)\n", - "seq_schema" - ] - }, - { - "cell_type": "markdown", - "id": "8d422833", - "metadata": {}, - "source": [ - "Let's also identify the target column." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "2b90424a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'city_id_list'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "target = Workflow.load(os.path.join(OUTPUT_DATA_DIR, 'workflow')).output_schema.select_by_tag(Tags.SEQUENCE).column_names[0]\n", - "target" - ] - }, - { - "cell_type": "markdown", - "id": "e9d8adad", - "metadata": {}, - "source": [ - "## Constructing the model" - ] - }, - { - "cell_type": "markdown", - "id": "c4cb17fe", - "metadata": {}, - "source": [ - "Let's construct our model.\n", - "\n", - "We can specify various hyperparameters, such as the number of heads and number of layers to use." - ] - }, - { - "cell_type": "markdown", - "id": "0a460e4c", - "metadata": {}, - "source": [ - "For the transformer portion of our model, we will use the `XLNet` architecture." - ] - }, - { - "cell_type": "markdown", - "id": "23bf02dc", - "metadata": {}, - "source": [ - "Later, when we run the `fit` method on our model, we will specify the `masking_probability` of `0.3` and link it to the transformer block defined in out model. Through the combination of these parameters, our model will train on sequences where any given timestep will be masked with a probability of 0.3 and it will be our model's training task to infer the target value for that step!\n", - "\n", - "To summarize, Masked Language Modeling is implemented by:\n", - "\n", - "* `SequenceMaskRandom()` - Used as a pre for model.fit(), it randomly selects items from the sequence to be masked for prediction as targets, by using Keras masking. This block also adds the necessary configuration to the specified `transformer` block so as it\n", - "is pre-configured with the necessary layers needed to prepare the inputs to the HuggingFace transformer layer and to post-process its outputs. For example, one pre-processing operation is to replace the input embeddings at masked positions for prediction by a dummy trainable embedding, to avoid leakage of the targets.\n", - "\n", - "\n", - "**Read more about the apis used to construct models** \n", - "- [blocks](https://github.com/NVIDIA-Merlin/models/tree/stable/merlin/models/tf/blocks)\n", - "- [MLPBlock](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/blocks/mlp.py)\n", - "- [InputBlockV2](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/inputs/base.py)\n", - "- [Embeddings](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/inputs/embedding.py)\n", - "- [XLNetBlock](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/transformers/block.py)\n", - "- [CategoricalOutput](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/outputs/classification.py)\n", - "- [.schema.select_by_name](https://github.com/NVIDIA-Merlin/core/blob/stable/merlin/schema/schema.py)\n", - "- [.schema.select_by_tag](https://github.com/NVIDIA-Merlin/core/blob/stable/merlin/schema/schema.py)\n", - "- [model.compile()](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/models/base.py)\n", - "- [model.fit()](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/models/base.py)\n", - "- [model.evaluate()](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/models/base.py)\n", - "- [mm.SequenceMaskRandom](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/transforms/sequence.py)\n", - "- [mm.SequenceMaskLast](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/models/tf/transforms/sequence.py)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "cddfd424", - "metadata": {}, - "outputs": [], - "source": [ - "dmodel=48\n", - "mlp_block = mm.MLPBlock(\n", - " [128,dmodel],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", - "transformer_block = mm.XLNetBlock(d_model=dmodel, n_head=4, n_layer=2)\n", - "model = mm.Model(\n", - " mm.InputBlockV2(\n", - " seq_schema,\n", - " embeddings=mm.Embeddings(\n", - " Workflow.load(os.path.join(OUTPUT_DATA_DIR, 'workflow')).output_schema.select_by_tag(Tags.CATEGORICAL), sequence_combiner=None\n", - " ),\n", - " ),\n", - " mlp_block,\n", - " transformer_block,\n", - " mm.CategoricalOutput(\n", - " Workflow.load(os.path.join(OUTPUT_DATA_DIR, 'workflow')).output_schema.select_by_name(target),\n", - " default_loss=\"categorical_crossentropy\",\n", - " ),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "aac975cd", - "metadata": {}, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "65d28c27", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initializer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-31 06:06:44.034041: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8700\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-31 06:06:54.541024: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model/xl_net_block/sequential_block_5/replace_masked_embeddings/RaggedWhere/Assert/AssertGuard/branch_executed/_95\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2720/2720 [==============================] - 81s 25ms/step - loss: 7.3315 - recall_at_10: 0.1973 - mrr_at_10: 0.0863 - ndcg_at_10: 0.1123 - map_at_10: 0.0863 - precision_at_10: 0.0197 - regularization_loss: 0.0000e+00 - loss_batch: 7.3306\n", - "Epoch 2/5\n", - "2720/2720 [==============================] - 70s 25ms/step - loss: 6.0979 - recall_at_10: 0.3633 - mrr_at_10: 0.1707 - ndcg_at_10: 0.2161 - map_at_10: 0.1707 - precision_at_10: 0.0363 - regularization_loss: 0.0000e+00 - loss_batch: 6.0950\n", - "Epoch 3/5\n", - "2720/2720 [==============================] - 71s 26ms/step - loss: 5.5827 - recall_at_10: 0.4306 - mrr_at_10: 0.2056 - ndcg_at_10: 0.2588 - map_at_10: 0.2056 - precision_at_10: 0.0431 - regularization_loss: 0.0000e+00 - loss_batch: 5.5806\n", - "Epoch 4/5\n", - "2720/2720 [==============================] - 72s 26ms/step - loss: 5.3211 - recall_at_10: 0.4627 - mrr_at_10: 0.2213 - ndcg_at_10: 0.2784 - map_at_10: 0.2213 - precision_at_10: 0.0463 - regularization_loss: 0.0000e+00 - loss_batch: 5.3194\n", - "Epoch 5/5\n", - "2720/2720 [==============================] - 71s 26ms/step - loss: 5.1920 - recall_at_10: 0.4787 - mrr_at_10: 0.2306 - ndcg_at_10: 0.2892 - map_at_10: 0.2306 - precision_at_10: 0.0479 - regularization_loss: 0.0000e+00 - loss_batch: 5.1903\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.compile(run_eagerly=False, optimizer='adam', loss=\"categorical_crossentropy\")\n", - "\n", - "model.fit(\n", - " Dataset(os.path.join(OUTPUT_DATA_DIR, 'train_processed.parquet')),\n", - " batch_size=64,\n", - " epochs=NUM_EPOCHS,\n", - " pre=mm.SequenceMaskRandom(schema=seq_schema, target=target, masking_prob=0.3, transformer=transformer_block)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "24699106", - "metadata": {}, - "source": [ - "## Model evaluation" - ] - }, - { - "cell_type": "markdown", - "id": "73d87d27", - "metadata": {}, - "source": [ - "We have trained our model.\n", - "\n", - "But in training the metrics come from a masked language modelling task. A portion of steps in the sequence was masked for each example. The metrics were calculated on this task.\n", - "\n", - "In reality, we probably care how well our model does on the next item prediction task (as it mimics the scenario in which the model would be likely to be used).\n", - "\n", - "Let's measure the performance of the model on a task where it attempts to predict the last item in a sequence.\n", - "\n", - "We will mask the last item using `SequenceMaskLast` and run inference." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "bb3c6358", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-31 06:12:51.968982: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model/xl_net_block/sequential_block_5/replace_masked_embeddings/RaggedWhere/Assert/AssertGuard/branch_executed/_74\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "340/340 [==============================] - 11s 20ms/step - loss: 4.7151 - recall_at_10: 0.5533 - mrr_at_10: 0.3083 - ndcg_at_10: 0.3665 - map_at_10: 0.3083 - precision_at_10: 0.0553 - regularization_loss: 0.0000e+00 - loss_batch: 4.7149\n" - ] - } - ], - "source": [ - "metrics = model.evaluate(\n", - " Dataset(os.path.join(OUTPUT_DATA_DIR, 'validation_processed.parquet')),\n", - " batch_size=128,\n", - " pre=mm.SequenceMaskLast(schema=seq_schema, target=target, transformer=transformer_block),\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "83ca276f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'loss': 4.715089797973633,\n", - " 'recall_at_10': 0.5533444881439209,\n", - " 'mrr_at_10': 0.30831339955329895,\n", - " 'ndcg_at_10': 0.36654922366142273,\n", - " 'map_at_10': 0.30831339955329895,\n", - " 'precision_at_10': 0.055334459990262985,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 4.635858535766602}" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "metrics" - ] - }, - { - "cell_type": "markdown", - "id": "9fb5cc29", - "metadata": {}, - "source": [ - "## Serving predictions using the Triton Inference Server" - ] - }, - { - "cell_type": "markdown", - "id": "9dc6ee5f", - "metadata": {}, - "source": [ - "Now, we will serve our trained models on [NVIDIA Triton Inference Server (TIS)](https://github.com/triton-inference-server/server). TIS is an open-source inference serving software that helps standardize model deployment and execution and delivers fast and scalable AI in production. To serve recommender models on TIS easily, NVIDIA Merlin team designed and developed [the Merlin Systems library](https://github.com/NVIDIA-Merlin/systems). Merlin Systems provides tools and operators to be able to serve end-to-end recommender systems pipelines on TIS easily\n", - "\n", - "In order to perform inference on the Triton Inference Server, we need to output the inference operators to disk.\n", - "\n", - "The inference operators form an `Ensemble`, which is a pipeline that takes in raw data, processes it using NVTabular, and finally outputs predictions from the model that we trained.\n", - "\n", - "Let's write the `Ensemble` to disk (we will later load it on Triton to perform inference)." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "7ae33813", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (city_id_list): TensorShape([64, None, 1])\n", - " (booker_country_list): TensorShape([64, None, 1])\n", - " (hotel_country_list): TensorShape([64, None, 1])\n", - " (weekday_checkin_list): TensorShape([64, None, 1])\n", - " (weekday_checkout_list): TensorShape([64, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (city_id_list): tf.int64\n", - " (booker_country_list): tf.int64\n", - " (hotel_country_list): tf.int64\n", - " (weekday_checkin_list): tf.int64\n", - " (weekday_checkout_list): tf.int64\n", - " )\n", - "), because it is not built.\n", - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (city_id_list): TensorShape([64, None, 1])\n", - " (booker_country_list): TensorShape([64, None, 1])\n", - " (hotel_country_list): TensorShape([64, None, 1])\n", - " (weekday_checkin_list): TensorShape([64, None, 1])\n", - " (weekday_checkout_list): TensorShape([64, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (city_id_list): tf.int64\n", - " (booker_country_list): tf.int64\n", - " (hotel_country_list): tf.int64\n", - " (weekday_checkin_list): tf.int64\n", - " (weekday_checkout_list): tf.int64\n", - " )\n", - "), because it is not built.\n", - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (city_id_list): TensorShape([64, None, 1])\n", - " (booker_country_list): TensorShape([64, None, 1])\n", - " (hotel_country_list): TensorShape([64, None, 1])\n", - " (weekday_checkin_list): TensorShape([64, None, 1])\n", - " (weekday_checkout_list): TensorShape([64, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (city_id_list): tf.int64\n", - " (booker_country_list): tf.int64\n", - " (hotel_country_list): tf.int64\n", - " (weekday_checkin_list): tf.int64\n", - " (weekday_checkout_list): tf.int64\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_mask_random_layer_call_fn, sequence_mask_random_layer_call_and_return_conditional_losses, sequence_mask_last_layer_call_fn while saving (showing 5 of 108). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmp1sakw940/model.savedmodel/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmp1sakw940/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", - "/usr/local/lib/python3.8/dist-packages/keras/saving/legacy/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " return serialization.serialize_keras_object(obj)\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initializer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initializer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n", - "/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/node.py:100: UserWarning: Operator 'TransformWorkflow' is producing the output column 'city_id_count', which is not being used by any downstream operator in the ensemble graph.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (city_id_list): TensorShape([64, None, 1])\n", - " (booker_country_list): TensorShape([64, None, 1])\n", - " (hotel_country_list): TensorShape([64, None, 1])\n", - " (weekday_checkin_list): TensorShape([64, None, 1])\n", - " (weekday_checkout_list): TensorShape([64, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (city_id_list): tf.int64\n", - " (booker_country_list): tf.int64\n", - " (hotel_country_list): tf.int64\n", - " (weekday_checkin_list): tf.int64\n", - " (weekday_checkout_list): tf.int64\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (city_id_list): TensorShape([64, None, 1])\n", - " (booker_country_list): TensorShape([64, None, 1])\n", - " (hotel_country_list): TensorShape([64, None, 1])\n", - " (weekday_checkin_list): TensorShape([64, None, 1])\n", - " (weekday_checkout_list): TensorShape([64, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (city_id_list): tf.int64\n", - " (booker_country_list): tf.int64\n", - " (hotel_country_list): tf.int64\n", - " (weekday_checkin_list): tf.int64\n", - " (weekday_checkout_list): tf.int64\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (city_id_list): TensorShape([64, None, 1])\n", - " (booker_country_list): TensorShape([64, None, 1])\n", - " (hotel_country_list): TensorShape([64, None, 1])\n", - " (weekday_checkin_list): TensorShape([64, None, 1])\n", - " (weekday_checkout_list): TensorShape([64, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (city_id_list): tf.int64\n", - " (booker_country_list): tf.int64\n", - " (hotel_country_list): tf.int64\n", - " (weekday_checkin_list): tf.int64\n", - " (weekday_checkout_list): tf.int64\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (city_id_list): TensorShape([64, None, 1])\n", - " (booker_country_list): TensorShape([64, None, 1])\n", - " (hotel_country_list): TensorShape([64, None, 1])\n", - " (weekday_checkin_list): TensorShape([64, None, 1])\n", - " (weekday_checkout_list): TensorShape([64, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (city_id_list): tf.int64\n", - " (booker_country_list): tf.int64\n", - " (hotel_country_list): tf.int64\n", - " (weekday_checkin_list): tf.int64\n", - " (weekday_checkout_list): tf.int64\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (city_id_list): TensorShape([64, None, 1])\n", - " (booker_country_list): TensorShape([64, None, 1])\n", - " (hotel_country_list): TensorShape([64, None, 1])\n", - " (weekday_checkin_list): TensorShape([64, None, 1])\n", - " (weekday_checkout_list): TensorShape([64, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (city_id_list): tf.int64\n", - " (booker_country_list): tf.int64\n", - " (hotel_country_list): tf.int64\n", - " (weekday_checkin_list): tf.int64\n", - " (weekday_checkout_list): tf.int64\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (city_id_list): TensorShape([64, None, 1])\n", - " (booker_country_list): TensorShape([64, None, 1])\n", - " (hotel_country_list): TensorShape([64, None, 1])\n", - " (weekday_checkin_list): TensorShape([64, None, 1])\n", - " (weekday_checkout_list): TensorShape([64, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (city_id_list): tf.int64\n", - " (booker_country_list): tf.int64\n", - " (hotel_country_list): tf.int64\n", - " (weekday_checkin_list): tf.int64\n", - " (weekday_checkout_list): tf.int64\n", - " )\n", - "), because it is not built.\n", - "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_mask_random_layer_call_fn, sequence_mask_random_layer_call_and_return_conditional_losses, sequence_mask_last_layer_call_fn while saving (showing 5 of 108). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /workspace/data/ensemble/1_predicttensorflowtriton/1/model.savedmodel/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /workspace/data/ensemble/1_predicttensorflowtriton/1/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", - "/usr/local/lib/python3.8/dist-packages/keras/saving/legacy/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " return serialization.serialize_keras_object(obj)\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initializer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - } - ], - "source": [ - "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n", - "from merlin.systems.dag.ensemble import Ensemble\n", - "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", - "\n", - "inf_ops = wf.input_schema.column_names >> TransformWorkflow(wf) >> PredictTensorflow(model)\n", - "\n", - "ensemble = Ensemble(inf_ops, wf.input_schema)\n", - "ensemble.export(os.path.join(OUTPUT_DATA_DIR, 'ensemble'));" - ] - }, - { - "cell_type": "markdown", - "id": "5edc6046", - "metadata": {}, - "source": [ - "After we export the ensemble, we are ready to start the Triton Inference Server.\n", - "\n", - "The server is installed in Merlin Tensorflow and Merlin PyTorch containers. If you are not using one of our containers, then ensure it is installed in your environment. For more information, see the Triton Inference Server [documentation](https://github.com/triton-inference-server/server/blob/r22.03/README.md#documentation).\n", - "\n", - "You can start the server by running the following command:\n", - "\n", - "```tritonserver --model-repository={OUTPUT_DATA_DIR}/ensemble/```\n", - "\n", - "For the --model-repository argument, specify the same value as the `export_path` that you specified previously in the `ensemble.export` method.\n", - "\n", - "After you run the `tritonserver` command, wait until your terminal shows messages like the following example:\n", - "\n", - "I0414 18:29:50.741833 4067 grpc_server.cc:4421] Started GRPCInferenceService at 0.0.0.0:8001
\n", - "I0414 18:29:50.742197 4067 http_server.cc:3113] Started HTTPService at 0.0.0.0:8000
\n", - "I0414 18:29:50.783470 4067 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002\n", - "\n", - "Let us now package our data for inference. We will send the first 4 rows of our validation data, which corresponds to a single trip. The data will be first processed by the `NVTabular` workflow and subsequentally passed to our transformer model for predicting. " - ] - }, - { - "cell_type": "markdown", - "id": "d83a304d", - "metadata": {}, - "source": [ - "Let us send the first 4 rows of our validation data to Triton. This will correspond to a single trip (all rows have the same `utrip_id`) with four stops." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "3cad9026", - "metadata": {}, - "outputs": [], - "source": [ - "from merlin.systems.triton import convert_df_to_triton_input\n", - "\n", - "validation_data = validation_set_dataset.compute()\n", - "inputs = convert_df_to_triton_input(wf.input_schema, validation_data.iloc[:4])" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "c508adce", - "metadata": {}, - "outputs": [], - "source": [ - "import tritonclient.grpc as grpcclient\n", - "\n", - "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", - " response = client.infer('executor_model', inputs)" - ] - }, - { - "cell_type": "markdown", - "id": "6d34eecf", - "metadata": {}, - "source": [ - "The response consists of logits coming from our model." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "b3284691", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[-2.8206294 , -1.3849059 , 1.9042726 , ..., 0.851537 ,\n", - " -2.4237087 , -0.73849726]], dtype=float32)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "response.as_numpy('city_id_list/categorical_output')" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "824d2b4f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1, 37203)" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "predictions = response.as_numpy('city_id_list/categorical_output')\n", - "predictions.shape" - ] - }, - { - "cell_type": "markdown", - "id": "fc5d415b", - "metadata": {}, - "source": [ - "The above values are logits output from the last layer of our model. They correspond in size to the cardinality of `city_id`, our target variable:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "29a8c0bd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "37203" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cardinality = wf.output_schema['city_id_list'].properties['embedding_sizes']['cardinality']\n", - "cardinality" - ] - }, - { - "cell_type": "markdown", - "id": "3c54c30f", - "metadata": {}, - "source": [ - "## Summary" - ] - }, - { - "cell_type": "markdown", - "id": "709c07fb", - "metadata": {}, - "source": [ - "We have trained a transformer model for the next item prediction task using language model masking.\n", - "\n", - "For another session-based example that goes deeper into data preprocessing and that covers several advanced techniques (Weight Tying, Temperature Scaling) please see [Session-Based Next Item Prediction for Fashion E-Commerce](https://github.com/NVIDIA-Merlin/models/blob/t4rec_use_case/examples/usecases/ecommerce-session-based-next-item-prediction-for-fashion.ipynb). " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tests/benchmark/test_asvdb_transformers_next_item_prediction.py b/tests/benchmark/test_asvdb_transformers_next_item_prediction.py deleted file mode 100644 index 49aa27416e..0000000000 --- a/tests/benchmark/test_asvdb_transformers_next_item_prediction.py +++ /dev/null @@ -1,95 +0,0 @@ -from asvdb import ASVDb, BenchmarkResult, utils -from testbook import testbook - -from tests.conftest import REPO_ROOT, get_benchmark_info - - -@testbook( - REPO_ROOT / "examples/usecases/transformers-next-item-prediction.ipynb", - timeout=720, - execute=False, -) -def test_func(tb, tmpdir): - tb.inject( - f""" - import os - os.environ["INPUT_DATA_DIR"] = "/raid/data/booking" - os.environ["OUTPUT_DATA_DIR"] = "{tmpdir}" - os.environ["NUM_EPOCHS"] = '1' - """ - ) - tb.cells.pop(6) - tb.cells[ - 15 - ].source = """ - def process_data(): - wf = Workflow(filtered_sessions) - - wf.fit_transform(train_set_dataset).to_parquet( - os.path.join(OUTPUT_DATA_DIR, 'train_processed.parquet') - ) - wf.transform(validation_set_dataset).to_parquet( - os.path.join(OUTPUT_DATA_DIR, 'validation_processed.parquet') - ) - - wf.save(os.path.join(OUTPUT_DATA_DIR, 'workflow')) - - data_processing_runtime = timeit.timeit(process_data, number=1) - """ - tb.cells[ - 29 - ].source = """ - model.compile(run_eagerly=False, optimizer='adam', loss="categorical_crossentropy") - - def train_model(): - model.fit( - Dataset(os.path.join(OUTPUT_DATA_DIR, 'train_processed.parquet')), - batch_size=64, - epochs=NUM_EPOCHS, - pre=mm.SequenceMaskRandom( - schema=seq_schema, - target=target, - masking_prob=0.3, - transformer=transformer_block - ) - ) - - training_runtime = timeit.timeit(train_model, number=1) - """ - tb.execute_cell(list(range(0, 35))) - data_processing_runtime = tb.ref("data_processing_runtime") - training_runtime = tb.ref("training_runtime") - ndcg_at_10 = tb.ref("metrics")["ndcg_at_10"] - - bResult1 = BenchmarkResult( - funcName="", - argNameValuePairs=[ - ("notebook_name", "usecases/transformers-next-item-prediction"), - ("measurement", "data_processing_runtime"), - ], - result=data_processing_runtime, - ) - bResult2 = BenchmarkResult( - funcName="", - argNameValuePairs=[ - ("notebook_name", "usecases/transformers-next-item-prediction"), - ("measurement", "training_runtime"), - ], - result=training_runtime, - ) - bResult3 = BenchmarkResult( - funcName="", - argNameValuePairs=[ - ("notebook_name", "usecases/transformers-next-item-prediction"), - ("measurement", "ndcg_at_10"), - ], - result=ndcg_at_10, - ) - - bInfo = get_benchmark_info() - (repo, branch) = utils.getRepoInfo() - - db = ASVDb(dbDir="s3://nvtab-bench-asvdb/models_metric_tracking", repo=repo, branches=[branch]) - db.addResult(bInfo, bResult1) - db.addResult(bInfo, bResult2) - db.addResult(bInfo, bResult3) diff --git a/tests/unit/tf/examples/test_usecase_ecommerce_session_based.py b/tests/unit/tf/examples/test_08_session_based_next_item_prediction.py similarity index 95% rename from tests/unit/tf/examples/test_usecase_ecommerce_session_based.py rename to tests/unit/tf/examples/test_08_session_based_next_item_prediction.py index 20272fa6b8..105dfda63c 100644 --- a/tests/unit/tf/examples/test_usecase_ecommerce_session_based.py +++ b/tests/unit/tf/examples/test_08_session_based_next_item_prediction.py @@ -6,7 +6,7 @@ pytest.importorskip("transformers") -p = "examples/usecases/ecommerce-session-based-next-item-prediction-for-fashion.ipynb" +p = "examples/08-Train-a-model-for-session-based-next-item-prediction.ipynb" @testbook( diff --git a/tests/unit/tf/examples/test_usecase_transformers_next_item_prediction.py b/tests/unit/tf/examples/test_usecase_transformers_next_item_prediction.py deleted file mode 100644 index a0dc9d9fdd..0000000000 --- a/tests/unit/tf/examples/test_usecase_transformers_next_item_prediction.py +++ /dev/null @@ -1,57 +0,0 @@ -import shutil - -import pytest -from testbook import testbook - -from tests.conftest import REPO_ROOT - -pytest.importorskip("transformers") -utils = pytest.importorskip("merlin.systems.triton.utils") - -TRITON_SERVER_PATH = shutil.which("tritonserver") - - -@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") -@testbook( - REPO_ROOT / "examples/usecases/transformers-next-item-prediction.ipynb", - timeout=720, - execute=False, -) -@pytest.mark.notebook -def test_next_item_prediction(tb, tmpdir): - tb.inject( - f""" - import os, random - os.environ["INPUT_DATA_DIR"] = "{tmpdir}" - os.environ["OUTPUT_DATA_DIR"] = "{tmpdir}" - from datetime import datetime, timedelta - from merlin.datasets.synthetic import generate_data - ds = generate_data('booking.com-raw', 10000) - df = ds.compute() - def generate_date(): - date = datetime.today() - if random.randint(0, 1): - date -= timedelta(days=7) - return date - df['checkin'] = [generate_date() for _ in range(df.shape[0])] - df['checkout'] = [generate_date() for _ in range(df.shape[0])] - df.to_csv('{tmpdir}/train_set.csv') - """ - ) - tb.cells.pop(6) - tb.cells[29].source = tb.cells[29].source.replace("epochs=5", "epochs=1") - tb.execute_cell(list(range(0, 38))) - - with utils.run_triton_server(f"{tmpdir}/ensemble", grpc_port=8001): - tb.execute_cell(list(range(38, len(tb.cells)))) - - tb.inject( - """ - logits_count = predictions.shape[1] - """ - ) - tb.execute_cell(len(tb.cells) - 1) - - cardinality = tb.ref("cardinality") - logits_count = tb.ref("logits_count") - assert logits_count == cardinality diff --git a/tests/unit/tf/examples/test_usecase_transformers_next_item_prediction_with_pretrained_embeddings.py b/tests/unit/tf/examples/test_usecase_transformers_next_item_prediction_with_pretrained_embeddings.py deleted file mode 100644 index cc7c0b4a70..0000000000 --- a/tests/unit/tf/examples/test_usecase_transformers_next_item_prediction_with_pretrained_embeddings.py +++ /dev/null @@ -1,38 +0,0 @@ -import shutil - -import pytest -from testbook import testbook - -from tests.conftest import REPO_ROOT - -pytest.importorskip("transformers") -utils = pytest.importorskip("merlin.systems.triton.utils") - -TRITON_SERVER_PATH = shutil.which("tritonserver") - - -@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") -@testbook( - REPO_ROOT - / "examples/usecases/transformers-next-item-prediction-with-pretrained-embeddings.ipynb", - timeout=720, - execute=False, -) -@pytest.mark.notebook -def test_next_item_prediction(tb, tmpdir): - tb.inject( - f""" - import os, random - os.environ["OUTPUT_DATA_DIR"] = "{tmpdir}" - os.environ["NUM_EPOCHS"] = "1" - os.environ["NUM_EXAMPLES"] = "1_500" - os.environ["MINIMUM_SESSION_LENGTH"] = "2" - """ - ) - tb.execute_cell(list(range(0, 48))) - - with utils.run_triton_server(f"{tmpdir}/ensemble", grpc_port=8001): - tb.execute_cell(list(range(48, len(tb.cells)))) - - predicted_hashed_url_id = tb.ref("predicted_hashed_url_id").item() - assert predicted_hashed_url_id >= 0 and predicted_hashed_url_id <= 1002 From 58ce8c15fef5de2e034bb6619201049285b1914d Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Thu, 6 Jul 2023 13:05:31 +0200 Subject: [PATCH 11/34] Making input- and output-block lazy to allow for nesting (#1184) * Making schema optional in TabularInputBlock to allow for nesting * Adding tests to ensure correct behaviour * Fixing failing test --------- Co-authored-by: edknv <109497216+edknv@users.noreply.github.com> --- merlin/models/torch/inputs/tabular.py | 25 ++++++++++++++---------- merlin/models/torch/outputs/tabular.py | 24 +++++++++++++---------- merlin/models/torch/router.py | 14 +++++++++---- tests/unit/torch/inputs/test_tabular.py | 18 +++++++++++++++++ tests/unit/torch/outputs/test_tabular.py | 10 ++++++++++ tests/unit/torch/test_router.py | 9 +++++++++ 6 files changed, 76 insertions(+), 24 deletions(-) diff --git a/merlin/models/torch/inputs/tabular.py b/merlin/models/torch/inputs/tabular.py index 0d73656e20..dc6ae4473e 100644 --- a/merlin/models/torch/inputs/tabular.py +++ b/merlin/models/torch/inputs/tabular.py @@ -51,21 +51,26 @@ class TabularInputBlock(RouterBlock): def __init__( self, - schema: Schema, + schema: Optional[Schema] = None, init: Optional[Union[str, Initializer]] = None, agg: Optional[Union[str, nn.Module]] = None, ): + self.init = init + self.agg = agg super().__init__(schema) + + def setup_schema(self, schema: Schema): + super().setup_schema(schema) self.schema: Schema = self.selectable.schema - if init: - if isinstance(init, str): - init = self.initializers.get(init) - if not init: - raise ValueError(f"Initializer {init} not found.") - - init(self) - if agg: - self.append(Block.parse(agg)) + if self.init: + if isinstance(self.init, str): + self.init = self.initializers.get(self.init) + if not self.init: + raise ValueError(f"Initializer {self.init} not found.") + + self.init(self) + if self.agg: + self.append(Block.parse(self.agg)) @classmethod def register_init(cls, name: str): diff --git a/merlin/models/torch/outputs/tabular.py b/merlin/models/torch/outputs/tabular.py index bab73d9be8..dfc74eb059 100644 --- a/merlin/models/torch/outputs/tabular.py +++ b/merlin/models/torch/outputs/tabular.py @@ -49,22 +49,26 @@ class TabularOutputBlock(RouterBlock): def __init__( self, - schema: Schema, + schema: Optional[Schema] = None, init: Optional[Union[str, Initializer]] = None, selection: Optional[Selection] = Tags.TARGET, ): - if selection: - schema = select(schema, selection) - + self.selection = selection + self.init = init super().__init__(schema, prepend_routing_module=False) + + def setup_schema(self, schema: Schema): + if self.selection: + schema = select(schema, self.selection) + super().setup_schema(schema) self.schema: Schema = self.selectable.schema - if init: - if isinstance(init, str): - init = self.initializers.get(init) - if not init: - raise ValueError(f"Initializer {init} not found.") + if self.init: + if isinstance(self.init, str): + self.init = self.initializers.get(self.init) + if not self.init: + raise ValueError(f"Initializer {self.init} not found.") - init(self) + self.init(self) @classmethod def register_init(cls, name: str): diff --git a/merlin/models/torch/router.py b/merlin/models/torch/router.py index 087068b6e0..bfa7cbc1ec 100644 --- a/merlin/models/torch/router.py +++ b/merlin/models/torch/router.py @@ -49,13 +49,16 @@ class RouterBlock(ParallelBlock): def __init__(self, selectable: schema.Selectable, prepend_routing_module: bool = True): super().__init__() + self.prepend_routing_module = prepend_routing_module if isinstance(selectable, Schema): - from merlin.models.torch.inputs.select import SelectKeys + self.setup_schema(selectable) + else: + self.selectable: schema.Selectable = selectable - selectable = SelectKeys(selectable) + def setup_schema(self, schema: Schema): + from merlin.models.torch.inputs.select import SelectKeys - self.selectable: schema.Selectable = selectable - self.prepend_routing_module = prepend_routing_module + self.selectable = SelectKeys(schema) def add_route( self, @@ -88,6 +91,9 @@ def add_route( The router block with the new route added. """ + if self.selectable is None: + raise ValueError(f"{self} has nothing to select from, so cannot add route.") + routing_module = schema.select(self.selectable, selection) if not routing_module: return self diff --git a/tests/unit/torch/inputs/test_tabular.py b/tests/unit/torch/inputs/test_tabular.py index e81fe44ce4..da8acc90b4 100644 --- a/tests/unit/torch/inputs/test_tabular.py +++ b/tests/unit/torch/inputs/test_tabular.py @@ -131,3 +131,21 @@ def test_extract_double_nesting(self): no_user_id, user_id_route = mm.schema.extract(input_block, Tags.USER_ID) assert no_user_id + + def test_nesting(self): + input_block = mm.TabularInputBlock(self.schema) + input_block.add_route( + lambda schema: schema, + mm.TabularInputBlock(init="defaults"), + ) + outputs = module_utils.module_test(input_block, self.batch) + + for name in mm.schema.select(self.schema, Tags.CONTINUOUS).column_names: + assert name in outputs + + for name in mm.schema.select(self.schema, Tags.CATEGORICAL).column_names: + assert name in outputs + assert outputs[name].shape == ( + 10, + infer_embedding_dim(self.schema.select_by_name(name)), + ) diff --git a/tests/unit/torch/outputs/test_tabular.py b/tests/unit/torch/outputs/test_tabular.py index 3ed04b4725..12b045879a 100644 --- a/tests/unit/torch/outputs/test_tabular.py +++ b/tests/unit/torch/outputs/test_tabular.py @@ -69,3 +69,13 @@ def test_no_route_for_non_existent_tag(self): outputs.add_route(Tags.CATEGORICAL) assert not outputs + + def test_nesting(self): + output_block = mm.TabularOutputBlock(self.schema) + output_block.add_route(Tags.TARGET, mm.TabularOutputBlock(init="defaults")) + + outputs = module_utils.module_test(output_block, torch.rand(10, 10)) + + assert "play_percentage" in outputs + assert "click" in outputs + assert "like" in outputs diff --git a/tests/unit/torch/test_router.py b/tests/unit/torch/test_router.py index 76459ea8db..10e57d7d74 100644 --- a/tests/unit/torch/test_router.py +++ b/tests/unit/torch/test_router.py @@ -163,3 +163,12 @@ def test_nested(self): outputs = module_utils.module_test(nested, self.batch.features) assert list(outputs.keys()) == ["user_age"] assert "user_age" in mm.output_schema(nested).column_names + + def test_exceptions(self): + router = mm.RouterBlock(None) + with pytest.raises(ValueError): + router.add_route(Tags.CONTINUOUS) + + router = mm.RouterBlock(self.schema, prepend_routing_module=False) + with pytest.raises(ValueError): + router.add_route(Tags.CONTINUOUS) From 7c629ec3e0a32635bd7979fa8dcf2b71b8c9684e Mon Sep 17 00:00:00 2001 From: Oliver Holworthy Date: Thu, 6 Jul 2023 16:24:18 +0100 Subject: [PATCH 12/34] Update docs theme from `sphinx_rtd_theme` to `sphinx_book_theme` (#1189) * Update docs theme from `sphinx_rtd_theme` to `sphinx_book_theme` * Add missing `PrepareListFeatures` to merlin.models.tf module * Add upper bound on tensorflow version --- docs/source/_static/NVIDIA-LogoBlack.svg | 1 + docs/source/_static/NVIDIA-LogoWhite.svg | 58 ++ docs/source/_static/css/custom.css | 500 ++++++++++++++++-- docs/source/_static/css/versions.css | 140 +++++ docs/source/_static/favicon.png | Bin 0 -> 2197 bytes .../source/_static/js/rtd-version-switcher.js | 5 + docs/source/_templates/layout.html | 30 +- docs/source/_templates/merlin-ecosystem.html | 14 + docs/source/_templates/versions.html | 2 +- docs/source/conf.py | 29 +- merlin/models/tf/__init__.py | 2 + requirements/docs.txt | 5 +- requirements/tensorflow.txt | 2 +- 13 files changed, 736 insertions(+), 52 deletions(-) create mode 100755 docs/source/_static/NVIDIA-LogoBlack.svg create mode 100755 docs/source/_static/NVIDIA-LogoWhite.svg create mode 100644 docs/source/_static/css/versions.css create mode 100755 docs/source/_static/favicon.png create mode 100644 docs/source/_static/js/rtd-version-switcher.js create mode 100644 docs/source/_templates/merlin-ecosystem.html diff --git a/docs/source/_static/NVIDIA-LogoBlack.svg b/docs/source/_static/NVIDIA-LogoBlack.svg new file mode 100755 index 0000000000..c612396c71 --- /dev/null +++ b/docs/source/_static/NVIDIA-LogoBlack.svg @@ -0,0 +1 @@ +NVIDIA-LogoBlack \ No newline at end of file diff --git a/docs/source/_static/NVIDIA-LogoWhite.svg b/docs/source/_static/NVIDIA-LogoWhite.svg new file mode 100755 index 0000000000..942ca3b2a0 --- /dev/null +++ b/docs/source/_static/NVIDIA-LogoWhite.svg @@ -0,0 +1,58 @@ + + + + + + + NVIDIA-LogoBlack + + + + + diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css index 319ddff89a..7287e49212 100644 --- a/docs/source/_static/css/custom.css +++ b/docs/source/_static/css/custom.css @@ -1,34 +1,472 @@ -.wy-nav-content { - margin: 0; - background: #fcfcfc; - padding-top: 40px; +/* +# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Parts of this are adapted from the NVIDIA Omniverse Docs Sphinx Theme */ + +/* Set up for old browsers*/ +@supports not (font-variation-settings: normal) { + @font-face { + font-family: "NVIDIA"; + src: url("https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/NVIDIASans_W_Lt.woff") format("woff"), + url("https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/NVIDIASans_W_Lt.woff2") format("woff2"); + font-weight: 300; + font-style: normal; + } + @font-face { + font-family: "NVIDIA"; + src: url("https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/NVIDIASans_W_Rg.woff") format("woff"), + url("https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/NVIDIASans_W_Rg.woff2") format("woff2"); + font-weight: 400; + font-style: normal; + } + @font-face { + font-family: "NVIDIA"; + src: url("https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/NVIDIASans_W_Md.woff") format("woff"), + url("https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/NVIDIASans_W_Md.woff2") format("woff2"); + font-weight: 500; + font-style: normal; + } + @font-face { + font-family: "NVIDIA"; + src: url("https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/NVIDIASans_W_Bd.woff") format("woff"), + url("https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/NVIDIASans_W_Bd.woff2") format("woff2"); + font-weight: 700; + font-style: normal; + } +} + +/* Set up for modern browsers, all weights */ +@supports (font-variation-settings: normal) { + @font-face { + font-family: 'NVIDIA'; + src: url('https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/var/NVIDIASansVF_W_Wght.woff2') format('woff2 supports variations'), + url('https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/var/NVIDIASansVF_W_Wght.woff2') format('woff2-variations'); + font-weight: 100 1000; + font-stretch: 25% 151%; + font-style: normal; + } + @font-face{ + font-family:'NVIDIA'; + src:url('https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/var/NVIDIASansVF_Wght_W_Italic.woff2') format('woff2 supports variations'), + url('https://images.nvidia.com/etc/designs/nvidiaGDC/clientlibs_base/fonts/nvidia-sans/GLOBAL/var/NVIDIASansVF_Wght_W_Italic.woff2') format('woff2-variations'); + font-weight:100 1000; + font-stretch:25% 151%; + font-style:italic; + } } -.wy-side-nav-search { +:root +{ + /* nv branding */ + --nv-green: #76b900; + --nv-green-illuminate: #76d300; /* button state - hover */ + --nv-black: #000000; + --nv-white: #ffffff; + --nv-green-2: #004831; + + --nv-success: var(--nv-green); + --nv-error: #f44336; + + --nv-font-face: NVIDIA,Arial,Helvetica,Sans-Serif; + --nv-font-face-mono: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace; + + /* nv branding: light theme */ + --text: #1a1a1a; + --background-default: #ffffff; + --background-alternate: #eeeeee; + --ui-and-graphics: #999999; + + --white: #ffffff; + --gray-1: #f7f7f7; + --gray-2: #eeeeee; + --gray-3: #dddddd; + --gray-4: #cccccc; + + /* nv branding: light theme mobile (closely matches our old font sizes) */ + --h1-color: var(--nv-green); + --h1-font-weight: 100; + --h1-letter-spacing: -0.02em; + --h1-font-size: 36px; + --h1-line-height: 1em; + --h1-text-transform: uppercase; + + --h2-color: var(--nv-green); + --h2-font-weight: 100; + --h2-letter-spacing: -0.02em; + --h2-font-size: 24px; + --h2-line-height: 1em; + --h2-text-transform: uppercase; + + --h3-color: var(--nv-green); + --h3-font-weight: 100; + --h3-letter-spacing: -0.02em; + --h3-font-size: 21px; + --h3-line-height: 1em; + --h3-text-transform: uppercase; + + --h4-color: var(--nv-green); + --h4-font-weight: 100; + --h4-letter-spacing: -0.02em; + --h4-font-size: 18px; + --h4-line-height: 1em; + --h4-text-transform: uppercase; + + --h5-color: var(--nv-green); + --h5-font-size: var(--body-font-size); + + --h6-color: var(--nv-green); + --h6-font-weight: 400; + + --body-font-color: var(--text); + --body-font-weight: normal; + --body-font-size: 16px; + --body-line-height: 1.5em; + + --small-font-color: var(--ui-and-graphics); + --small-font-weight: normal; + --small-font-size: 12px; + --small-line-height: 1.25em; + + --ul-font-color: var(--text); + --ul-font-weight: normal; + --ul-font-size: 16px; + --ul-line-height: 2em; + --ul-marker-font-face: FontAwesome; + --ul-marker-content: '\f105 \00a0 \00a0'; + + --ol-font-color: var(--text); + --ol-font-weight: normal; + --ol-font-size: 16px; + --ol-line-height: 2em; + --ol-list-style-type: decimal; + --ol-ol-list-style-type: upper-alpha; + --ol-ol-ol-list-style-type: decimal; /* not specified in style guide */ + + --disabled-font-color: var(--gray-4); + --disabled-font-weight: normal; + --disabled-font-size: 16px; + --disabled-line-height: 1em; /* style guide says 16px */ + + --error-font-color: var(--nv-error); + --error-font-weight: normal; + --error-font-size: 16px; + --error-line-height: 1em; /* style guide says 16px */ + + --success-font-color: var(--nv-success); + --success-font-weight: normal; + --success-font-size: 16px; + --success-line-height: 1em; /* style guide says 16px */ + + /* omni-style */ + --sidebar-color: #000000; + --sidebar-alt-color: #333333; + --sidebar-headline-color: var(--nv-green); + --sidebar-text-color: #cccccc; + + --table-background-header: var(--nv-black); + --table-background-alternate: var(--background-alternate); /* for alternating rows */ + --table-text: var(--text); + --table-border: var(--ui-and-graphics); + --table-border-header: var(--gray-3); + + /* this is off-brand, but `uppercase` makes headings with source code look bad. */ + --h1-text-transform: none; + --h2-text-transform: none; + --h3-text-transform: none; + --h4-text-transform: none; + + --h3-font-weight: normal; /* this is off-brand and overrides the above definition */ + + --note-background-color: var(--nv-green); + --note-background-alt-color: #cccccc; + + --important-background-color: #f44336; + --important-background-alt-color: #cccccc; + + --link-color: var(--nv-green); + --link-visited-color: var(--nv-green); + --link-hover-color: var(--nv-green-illuminate); + + --background-color: var(--background-default); + + /* template T* tryAcquireInterface(const void* pluginInterface) */ + --api-member-header-background-color: var(--gray-2); + --api-member-header-border-color: var(--sidebar-headline-color); + --api-member-header-text-color: var(--text); + --api-member-header-link-color: var(--link-color); + + --api-member-background-color: var(--gray-1); + + /* struct carb::Framework */ + --api-header-text-color: var(--nv-green); + --api-header-border-color: var(--ui-and-graphics); + + /* sphinx-design color modifications */ + --sd-color-tabs-label-active: var(--nv-green); + --sd-color-tabs-underline-active: var(--nv-green); + + --sd-color-tabs-label-hover: var(--nv-green-illuminate); + --sd-color-tabs-underline-hover: var(--nv-green-illuminate); +} + +/* Custom Styles */ +:root { + --pst-font-size-base: none; + --pst-color-admonition-note: var(--pst-color-primary); + --pst-color-admonition-default: var(--pst-color-primary); + --pst-color-info: 255, 193, 7; + --pst-color-admonition-tip: var(--pst-color-info); + --pst-color-admonition-hint: var(--pst-color-info); + --pst-color-admonition-important: var(--pst-color-info); + --pst-color-warning: 245, 162, 82; + --pst-color-danger: 230, 101, 129; + --pst-color-admonition-warning: var(--pst-color-danger); + --pst-color-link: 118, 185, 0; + --pst-color-inline-code: 92, 22, 130; + --font-family-sans-serif: NVIDIA Sans, Helvetica, Arial, var(--pst-font-family-base-system); + --pst-font-family-heading: NVIDIA Sans, Helvetica, Arial, var(--pst-font-family-base-system); + --pst-font-family-monospace: Roboto Mono, var(--pst-font-family-monospace-system); + font-family: NVIDIA Sans, Helvetica, Arial,Sans-serif; +} + + +html[data-theme="light"] { + --pst-color-primary: var(--nv-green); +} +html[data-theme="dark"] { + --pst-color-primary: var(--nv-green); +} + +/**********************************************************************************************************************/ +/* Standard Text Formatting */ +/**********************************************************************************************************************/ + +/* Headline Formatting */ +.bd-container h1 +{ + color: var(--h1-color); + + font-weight: var(--h1-font-weight); + font-size: var(--h1-font-size); + font-style: normal; + + line-height: var(--h1-line-height); + margin-top: 0.75em; + margin-bottom: 0.75em !important; /* override RTD theme */ + + text-transform: var(--h1-text-transform); +} + +.bd-container h2 +{ + color: var(--h2-color); + + font-weight: var(--h2-font-weight); + font-size: var(--h2-font-size); + font-style: normal; + + line-height: var(--h2-line-height); + margin-top: 1.25em; + margin-bottom: 0.5em !important; /* override RTD theme */ + + text-transform: var(--h2-text-transform); +} + +.bd-container h3 +{ + color: var(--h3-color); + + font-weight: var(--h3-font-weight); + font-size: var(--h3-font-size); + font-style: normal; + + line-height: var(--h3-line-height); + margin-top: 1.25em; + margin-bottom: 0.5em !important; /* override RTD theme */ + + text-transform: var(--h3-text-transform); +} + +.bd-container h4 +{ + color: var(--h4-color); + + font-weight: var(--h4-font-weight); + font-size: var(--h4-font-size); + font-style: normal; + + line-height: var(--h4-line-height); + margin-top: 1.25em; + margin-bottom: 0.5em !important; /* override RTD theme */ + + text-transform: var(--h4-text-transform); +} + +.bd-container h5 +{ + color: var(--h5-color); + + font-size: var(--h5-font-size); +} + +.bd-container h6 +{ + color: var(--h6-color); + + font-weight: var(--h6-font-weight); +} + +/* Math should inherit its color */ +span[id*=MathJax-Span] +{ + color: inherit; +} + +/* text highlighted by search */ +.rst-content .highlighted +{ + background: #f1c40f3b; + box-shadow: 0 0 0 1px #f1c40f; + display: inline; + font-weight: inherit; +} + +/* a local table-of-contents messes with heading colors. make sure to use the regular heading colors */ +.rst-content .toc-backref +{ + color: inherit; +} + +/* make links to function looks like other literals */ +.rst-content code.xref, +.rst-content tt.xref, +a .rst-content code, +a .rst-content tt +{ + color: #e74c3c; + font-weight: inherit; +} + +/* Link Colors */ +a +{ + color: var(--link-color); +} + +a:visited +{ + color: var(--link-visited-color); +} + +a:hover +{ + color: var(--link-hover-color); +} + +/* follow branding guide for small footer text */ +footer p +{ + color: var(--small-font-color); + font-weight: var(--small-font-weight); + font-size: var(--small-font-size); + line-height: var(--small-line-height); +} + +/* add nvidia logo (like www.nvidia.com) */ +html[data-theme="light"] footer.bd-footer-content p.copyright::before +{ + content: url(../NVIDIA-LogoBlack.svg); + display: block; + width: 110px; + margin: 0px; + position: relative; + left: -9px; +} + +/* add nvidia logo (like www.nvidia.com) */ +html[data-theme="dark"] footer.bd-footer-content p.copyright::before +{ + content: url(../NVIDIA-LogoWhite.svg); display: block; - width: 300px; - padding: .809em; - padding-top: 0.809em; - margin-bottom: .809em; - z-index: 200; - background-color: #2980b9; - text-align: center; - color: #fcfcfc; - padding-top: 40px; -} - -div.banner { - position: fixed; - top: 10px; - left: 20px; - margin: 0; - z-index: 1000; - width: 1050px; - text-align: center; -} - -p.banner { - border-radius: 4px; - color: #004831; - background: #76b900; -} \ No newline at end of file + width: 110px; + margin: 0px; + position: relative; + left: -9px; +} + + +/**********************************************************************************************************************/ +/* Lists */ +/**********************************************************************************************************************/ + +/* unordered list should have a nv-green > */ +.rst-content section ul:not(.treeView):not(.collapsibleList) li:not(.collapsibleListClosed):not(.collapsibleListOpen):not(.lastChild)::marker, +.rst-content .toctree-wrapper ul li::marker, +.wy-plain-list-disc li::marker, +article ul li::marker +{ + font-family: var(--ul-marker-font-face); + content: var(--ul-marker-content); + color: var(--nv-green); + font-weight: 600; +} + +/* top-level ordered list should have a nv-green number */ +.rst-content section ol li::marker, +.rst-content ol.arabic li::marker, +.wy-plain-list-decimal li::marker, +article ol li::marker +{ + color: var(--nv-green); + font-weight: 600; + list-style: var(--ol-list-style-type); +} + +/* second-level ordered list should have a nv-green uppercase letter */ +.rst-content section ol ol li, +.rst-content ol.arabic ol.arabic li, +.wy-plain-list-decimal ol ol li, +article ol ol li +{ + list-style: var(--ol-ol-list-style-type); +} + +/* third-level ordered lists aren't in the branding guide. let's use numbers. */ +.rst-content section ol ol ol li, +.rst-content ol.arabic ol.arabic ol li, +.wy-plain-list-decimal ol ol ol li, +article ol ol ol li +{ + list-style: var(--ol-ol-ol-list-style-type); +} + +/* start the first paragraph immediately (don't add space at the top) */ +dd p:first-child +{ + margin-top: 0px; +} diff --git a/docs/source/_static/css/versions.css b/docs/source/_static/css/versions.css new file mode 100644 index 0000000000..cafebc54ba --- /dev/null +++ b/docs/source/_static/css/versions.css @@ -0,0 +1,140 @@ +/* Version Switcher */ + +.rst-versions { + flex-align: bottom; + bottom: 0; + left: 0; + z-index: 400 +} + +.rst-versions a { + color: var(--nv-green); + text-decoration: none +} + +.rst-versions .rst-badge-small { + display: none +} + +.rst-versions .rst-current-version { + padding: 12px; + display: block; + text-align: right; + font-size: 90%; + cursor: pointer; + border-top: 1px solid rgba(0,0,0,.1); + *zoom:1 +} + +.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after { + display: table; + content: "" +} + +.rst-versions .rst-current-version:after { + clear: both +} + +.rst-versions .rst-current-version .fa-book { + float: left +} + +.rst-versions .rst-current-version .icon-book { + float: left +} + +.rst-versions .rst-current-version.rst-out-of-date { + background-color: #E74C3C; + color: #fff +} + +.rst-versions .rst-current-version.rst-active-old-version { + background-color: #F1C40F; + color: #000 +} + +.rst-versions.shift-up { + height: auto; + max-height: 100% +} + +.rst-versions.shift-up .rst-other-versions { + display: block +} + +.rst-versions .rst-other-versions { + font-size: 90%; + padding: 12px; + color: gray; + display: none +} + +.rst-versions .rst-other-versions hr { + display: block; + height: 1px; + border: 0; + margin: 20px 0; + padding: 0; + border-top: solid 1px #413d3d +} + +.rst-versions .rst-other-versions dd { + display: inline-block; + margin: 0 +} + +.rst-versions .rst-other-versions dd a { + display: inline-block; + padding: 6px; + color: var(--nv-green); + font-weight: 500; +} + +.rst-versions.rst-badge { + width: auto; + bottom: 20px; + right: 20px; + left: auto; + border: none; + max-width: 300px +} + +.rst-versions.rst-badge .icon-book { + float: none +} + +.rst-versions.rst-badge .fa-book { + float: none +} + +.rst-versions.rst-badge.shift-up .rst-current-version { + text-align: right +} + +.rst-versions.rst-badge.shift-up .rst-current-version .fa-book { + float: left +} + +.rst-versions.rst-badge.shift-up .rst-current-version .icon-book { + float: left +} + +.rst-versions.rst-badge .rst-current-version { + width: auto; + height: 30px; + line-height: 30px; + padding: 0 6px; + display: block; + text-align: center +} + +@media screen and (max-width: 768px) { + .rst-versions { + width:85%; + display: none + } + + .rst-versions.shift { + display: block + } +} diff --git a/docs/source/_static/favicon.png b/docs/source/_static/favicon.png new file mode 100755 index 0000000000000000000000000000000000000000..a00f862ecfbbe5cf3799ac2b550438a706e804a0 GIT binary patch literal 2197 zcmcImX;_n27QWw?6v8G(S*?KpmZeZgKqo>m36)L4DvBG0;Fb;y7^4P6LK3VYiWZs2 z7L-k$5>OCmMNk2Wq*fLMeWHUU4M}i{A|&QvjUkZv=r6ncm><(0-*fIg@4e@|=iGbF z{Z7ZnMp&7zF$VxxZHWxs3IK!*A%HPKL^7-HAR^3|k-Lrn@ac->4|>#kLhgPK{Y4I1&t3f4>FR7sl}&oXV_k>Z-YGFz#}Hv-2m zuAKb`YJC_Vug$M!2wpn&k5RPqSR-^u+L{UZFG|>j$uPK&1_fpSy&7QL!OCcWF9TEp z#H0VEz>92J*SL%$^;M>eujLPC4!75goT!4{bNtCtj2Qct=2gVmcPetI=0a&kyDGy< zEYFgT_$YJ5o$7mccI^Wl=>IFxjL#ik52^3HI|dD*k{$=VmyzZT9cDT^K-KZze^;sIsQAujlyUc-p%jsMVW~TT``x0?`1T=dV< z&!lr#Grw8ZRc?aKB&nZ~KiHUowlqGd_yqQnq!y4(f@#as7$^R>D!()Zs%$+h;1b=7 zsKeUQGbrt=9!1vU7O;Tmxn;(eRdCKBuUy zAiYE+rRD+`r}OZLPzqJXClt~3!~=j`-daC9`wY90rK``o>~kME^;jI=t7p^53?Ae0ieftDm z>p0d*;XJykPwR)=N3-CtXWG&hR6(=@B>1c0U?pI+E zlRe%g(SoQz2PYZN-6yffZ}hVjM;CH`c3c`nZp`$mU(0tPYn;@ns9xZ3 zqi-)rv!qTzG>$*G34NC?#;x5 z&7rEZ=@#OZ2|LR?Oz1+-=yfj3x&n6$qT3jf4g|vP=m8s^%T=uvQisMUFJpyjG)!d~ z4r{L&r+@Beo$hu+0GV9WIbJ-mq+9>+kZnSas04HC=;lB81wAuvlKnwCyR2i>}KA#`mHKtX-vNyFc%Ovt|cu zc`g>hZuLL2LsAU)z>zmxH}2e?;{wl)$vDc~#B<^mxoB}`)H|K-UZsn>}g){^wibnNSFyH)WxOSpy{qxKBei8N(vuP zenh=oa%|f9M5mn8Ha?#SLzs_X8>q(}#!xq6d)$E#=^n}|%>Gqi$sIn9<_!v|c97EB z_55bwunu-a^AcR+DUcG6tt+qOCw;O1?>A^~T;nIHE#xYQU^|FTiUeg_?fJd-nZM+r zd0xlrM}x|G&R{^|xWJg(OG}&Ha0qUcsvpr}v->YD<|$tZxHV|{DSh$S!KY0Ja$Y+W z2)GYA(DZE-L$+yB*9L>P2e9-vAdZ_Hme#^zh9wu_b4Wlb2FyV%V->LE5+DpichihY zkQ9S2{S8Po`*p|ZsXPMGoi=;b$QZ`rl073TQmzy#yTQf*j|jcV~b`gvQrtO+{(dirpyfT}tKhsDEqUTSmt-U7GV~ zB9NrGN%FsKLAK}}<`{K>h%vH&v-cJnG4`-|kbVv#OwJM~?Y|d*j)2MM#7T>%Et%!{ q&Qza5^vS>WasAT={QUI`Q}&~6