From 94f74b55613531e8f0bf2472d5666d7f78b8f90f Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Fri, 30 Jun 2023 10:12:05 +0200 Subject: [PATCH 1/6] Initialize default metrics/loss inside ModelOutput instead (#1164) * Initialize default metrics/loss inside ModelOutput instead * Trying to change py38 -> py39 in tox.ini to see if that fixes CI * Trying to change py38 -> py39 in tox.ini to see if that fixes CI * Trying to change py39 -> py310 in tox.ini to see if that fixes CI * Update LD_LIBRARY_PATH --- .github/workflows/gpu.yml | 4 ++-- merlin/models/torch/outputs/classification.py | 16 +++++++--------- merlin/models/torch/outputs/regression.py | 11 +++++++---- tests/unit/torch/outputs/test_classification.py | 4 ++-- tests/unit/torch/outputs/test_regression.py | 2 +- tox.ini | 16 ++++++++-------- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu.yml index 8999056146..db4b63275e 100644 --- a/.github/workflows/gpu.yml +++ b/.github/workflows/gpu.yml @@ -34,7 +34,7 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="unit and not (examples or integration or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py38-gpu + cd ${{ github.workspace }}; PYTEST_MARKERS="unit and not (examples or integration or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py310-gpu tests-examples: runs-on: 1GPU @@ -55,4 +55,4 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py38-gpu + cd ${{ github.workspace }}; PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py310-gpu diff --git a/merlin/models/torch/outputs/classification.py b/merlin/models/torch/outputs/classification.py index e201953fd0..2ca36143f7 100644 --- a/merlin/models/torch/outputs/classification.py +++ b/merlin/models/torch/outputs/classification.py @@ -36,24 +36,22 @@ class BinaryOutput(ModelOutput): The metrics used for evaluation. Default includes Accuracy, AUROC, Precision, and Recall. """ + DEFAULT_LOSS_CLS = nn.BCEWithLogitsLoss + DEFAULT_METRICS_CLS = (Accuracy, AUROC, Precision, Recall) + def __init__( self, schema: Optional[ColumnSchema] = None, - loss: nn.Module = nn.BCEWithLogitsLoss(), - metrics: Sequence[Metric] = ( - Accuracy(task="binary"), - AUROC(task="binary"), - Precision(task="binary"), - Recall(task="binary"), - ), + loss: Optional[nn.Module] = None, + metrics: Sequence[Metric] = (), ): """Initializes a BinaryOutput object.""" super().__init__( nn.LazyLinear(1), nn.Sigmoid(), schema=schema, - loss=loss, - metrics=metrics, + loss=loss or self.DEFAULT_LOSS_CLS(), + metrics=metrics or [m(task="binary") for m in self.DEFAULT_METRICS_CLS], ) def setup_schema(self, target: Optional[Union[ColumnSchema, Schema]]): diff --git a/merlin/models/torch/outputs/regression.py b/merlin/models/torch/outputs/regression.py index 0f9f9ad318..e3b2f97b09 100644 --- a/merlin/models/torch/outputs/regression.py +++ b/merlin/models/torch/outputs/regression.py @@ -36,18 +36,21 @@ class RegressionOutput(ModelOutput): The metrics used for evaluation. Default is MeanSquaredError. """ + DEFAULT_LOSS_CLS = nn.MSELoss + DEFAULT_METRICS_CLS = (MeanSquaredError,) + def __init__( self, schema: Optional[ColumnSchema] = None, - loss: nn.Module = nn.MSELoss(), - metrics: Sequence[Metric] = (MeanSquaredError(),), + loss: Optional[nn.Module] = None, + metrics: Sequence[Metric] = (), ): """Initializes a RegressionOutput object.""" super().__init__( nn.LazyLinear(1), schema=schema, - loss=loss, - metrics=metrics, + loss=loss or self.DEFAULT_LOSS_CLS(), + metrics=metrics or [m() for m in self.DEFAULT_METRICS_CLS], ) def setup_schema(self, target: Optional[Union[ColumnSchema, Schema]]): diff --git a/tests/unit/torch/outputs/test_classification.py b/tests/unit/torch/outputs/test_classification.py index ea6643c740..755d465350 100644 --- a/tests/unit/torch/outputs/test_classification.py +++ b/tests/unit/torch/outputs/test_classification.py @@ -31,12 +31,12 @@ def test_init(self): assert isinstance(binary_output, mm.BinaryOutput) assert isinstance(binary_output.loss, nn.BCEWithLogitsLoss) - assert binary_output.metrics == ( + assert binary_output.metrics == [ Accuracy(task="binary"), AUROC(task="binary"), Precision(task="binary"), Recall(task="binary"), - ) + ] assert binary_output.output_schema == Schema() def test_identity(self): diff --git a/tests/unit/torch/outputs/test_regression.py b/tests/unit/torch/outputs/test_regression.py index 17541c5081..f8537bca51 100644 --- a/tests/unit/torch/outputs/test_regression.py +++ b/tests/unit/torch/outputs/test_regression.py @@ -30,7 +30,7 @@ def test_init(self): assert isinstance(reg_output, mm.RegressionOutput) assert isinstance(reg_output.loss, nn.MSELoss) - assert reg_output.metrics == (MeanSquaredError,) + assert reg_output.metrics == [MeanSquaredError()] assert reg_output.output_schema == Schema() def test_identity(self): diff --git a/tox.ini b/tox.ini index 2446672a8f..67477b0fb9 100644 --- a/tox.ini +++ b/tox.ini @@ -2,14 +2,14 @@ ; .github/workflows/cpu-ci.yml for the workflow definition. [tox] -envlist = py38-gpu,py38-multi-gpu +envlist = py310-gpu,py310-multi-gpu [testenv] commands = pip install --upgrade pip pip install -e .[all] -[testenv:py38-gpu] +[testenv:py310-gpu] ; Runs in: Github Actions ; Runs GPU-based tests. allowlist_externals = @@ -28,7 +28,7 @@ commands = python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/systems.git@{env:MERLIN_BRANCH:main} bash -c 'python -m pytest --cov-report term --cov merlin -m "{env:PYTEST_MARKERS}" -rxs tests/ || ([ $? = 5 ] && exit 0 || exit $?)' -[testenv:py38-multi-gpu] +[testenv:py310-multi-gpu] ; Runs in: Github Actions ; Runs GPU-based tests. allowlist_externals = @@ -40,7 +40,7 @@ passenv = setenv = TF_GPU_ALLOCATOR=cuda_malloc_async CPATH={env:CPATH}{:}{envdir}/hugectr/include - LD_LIBRARY_PATH=${envdir}/hugectr/include/lib{:}/usr/local/lib/python3.8/dist-packages/tensorflow{:}{env:LD_LIBRARY_PATH} + LD_LIBRARY_PATH=${envdir}/hugectr/include/lib{:}/usr/local/lib/python3.10/dist-packages/tensorflow{:}{env:LD_LIBRARY_PATH} LIBRARY_PATH=${envdir}/hugectr/lib{:}{env:LIBRARY_PATH} sitepackages=true commands = @@ -50,7 +50,7 @@ commands = sh examples/usecases/multi-gpu/install_sparse_operation_kit.sh {envdir} bash -c 'horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh python -m pytest -m "horovod {env:EXTRA_PYTEST_MARKERS}" -rxs tests/unit || ([ $? = 5 ] && exit 0 || exit $?)' -[testenv:py38-horovod-cpu] +[testenv:py310-horovod-cpu] setenv = HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 @@ -66,7 +66,7 @@ commands = {envdir}/env/bin/python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/nvtabular.git@{env:MERLIN_BRANCH:main} {envdir}/env/bin/horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh pytest -m "horovod {env:EXTRA_PYTEST_MARKERS}" -rxs tests/unit -[testenv:py38-nvtabular-cpu] +[testenv:py310-nvtabular-cpu] passenv=GIT_COMMIT allowlist_externals = git deps = @@ -82,7 +82,7 @@ commands = python -m pip install . python -m pytest nvtabular-{env:GIT_COMMIT}/tests/unit -[testenv:py38-systems-cpu] +[testenv:py310-systems-cpu] passenv=GIT_COMMIT allowlist_externals = git deps = @@ -99,7 +99,7 @@ commands = python -m pip install . python -m pytest -m "not notebook" systems-{env:GIT_COMMIT}/tests/unit -[testenv:py38-transformers4rec-cpu] +[testenv:py310-transformers4rec-cpu] passenv=GIT_COMMIT allowlist_externals = git commands = From b671c8e58231f0a39a4ac6e527dc28506633073b Mon Sep 17 00:00:00 2001 From: tien-nguyen Date: Fri, 30 Jun 2023 08:32:55 -0700 Subject: [PATCH 2/6] We need to remove README after the hyper link. (#1163) Co-authored-by: Marc Romeyn --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bca6283a66..94a80bd7be 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ You can find more details and information about a low-level API in our overview ### Notebook Examples and Tutorials -View the example notebooks in the [documentation](https://nvidia-merlin.github.io/models/stable/examples/README.html) to help you become familiar with Merlin Models. +View the example notebooks in the [documentation](https://nvidia-merlin.github.io/models/stable/examples/) to help you become familiar with Merlin Models. The same notebooks are available in the `examples` directory from the [Merlin Models](https://github.com/NVIDIA-Merlin/models) GitHub repository. From 1783951391ae800925e05333f4fc82039c12e102 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy Date: Sat, 1 Jul 2023 01:57:21 +0100 Subject: [PATCH 3/6] Rename and update tox environments (#1168) * Remove python prefix from tox environment names * Move Merlin dependencies to deps configuation of tox environment * Use posargs for tests path in tox environments --- .github/workflows/cpu-horovod.yml | 2 +- .github/workflows/cpu-nvtabular.yml | 2 +- .github/workflows/cpu-systems.yml | 2 +- .github/workflows/cpu-t4r.yml | 2 +- .github/workflows/gpu-multi.yml | 2 +- .github/workflows/gpu.yml | 4 +- tox.ini | 57 +++++++++++++++-------------- 7 files changed, 37 insertions(+), 34 deletions(-) diff --git a/.github/workflows/cpu-horovod.yml b/.github/workflows/cpu-horovod.yml index 5f08ae3cc1..13c93d2588 100644 --- a/.github/workflows/cpu-horovod.yml +++ b/.github/workflows/cpu-horovod.yml @@ -72,4 +72,4 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - EXTRA_PYTEST_MARKERS="$extra_pytest_markers" MERLIN_BRANCH="$merlin_branch" COMPARE_BRANCH=${{ github.base_ref }} tox -e py38-horovod-cpu + EXTRA_PYTEST_MARKERS="$extra_pytest_markers" MERLIN_BRANCH="$merlin_branch" COMPARE_BRANCH=${{ github.base_ref }} tox -e horovod-cpu diff --git a/.github/workflows/cpu-nvtabular.yml b/.github/workflows/cpu-nvtabular.yml index ab095e5dd9..21143cc2f5 100644 --- a/.github/workflows/cpu-nvtabular.yml +++ b/.github/workflows/cpu-nvtabular.yml @@ -64,4 +64,4 @@ jobs: - name: Run tests run: | merlin_branch="${{ steps.get-branch-name.outputs.branch }}" - MERLIN_BRANCH="$merlin_branch" GIT_COMMIT=$(git rev-parse HEAD) tox -e py38-nvtabular-cpu + MERLIN_BRANCH="$merlin_branch" GIT_COMMIT=$(git rev-parse HEAD) tox -e nvtabular-cpu diff --git a/.github/workflows/cpu-systems.yml b/.github/workflows/cpu-systems.yml index 01ef47b35f..0106791290 100644 --- a/.github/workflows/cpu-systems.yml +++ b/.github/workflows/cpu-systems.yml @@ -64,4 +64,4 @@ jobs: - name: Run tests run: | merlin_branch="${{ steps.get-branch-name.outputs.branch }}" - MERLIN_BRANCH="$merlin_branch" GIT_COMMIT=$(git rev-parse HEAD) tox -e py38-systems-cpu + MERLIN_BRANCH="$merlin_branch" GIT_COMMIT=$(git rev-parse HEAD) tox -e systems-cpu diff --git a/.github/workflows/cpu-t4r.yml b/.github/workflows/cpu-t4r.yml index cdda721ec4..2879fb9614 100644 --- a/.github/workflows/cpu-t4r.yml +++ b/.github/workflows/cpu-t4r.yml @@ -60,4 +60,4 @@ jobs: - name: Run tests run: | merlin_branch="${{ steps.get-branch-name.outputs.branch }}" - MERLIN_BRANCH="$merlin_branch" GIT_COMMIT=$(git rev-parse HEAD) tox -e py38-transformers4rec-cpu + MERLIN_BRANCH="$merlin_branch" GIT_COMMIT=$(git rev-parse HEAD) tox -e transformers4rec-cpu diff --git a/.github/workflows/gpu-multi.yml b/.github/workflows/gpu-multi.yml index 62e26961b9..3d47558932 100644 --- a/.github/workflows/gpu-multi.yml +++ b/.github/workflows/gpu-multi.yml @@ -56,4 +56,4 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; EXTRA_PYTEST_MARKERS=$extra_pytest_markers MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py38-multi-gpu + cd ${{ github.workspace }}; EXTRA_PYTEST_MARKERS=$extra_pytest_markers MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e multi-gpu diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu.yml index db4b63275e..90478661c9 100644 --- a/.github/workflows/gpu.yml +++ b/.github/workflows/gpu.yml @@ -34,7 +34,7 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="unit and not (examples or integration or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py310-gpu + cd ${{ github.workspace }}; PYTEST_MARKERS="unit and not (examples or integration or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e gpu tests-examples: runs-on: 1GPU @@ -55,4 +55,4 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py310-gpu + cd ${{ github.workspace }}; PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e gpu diff --git a/tox.ini b/tox.ini index 67477b0fb9..dc516cde78 100644 --- a/tox.ini +++ b/tox.ini @@ -2,33 +2,34 @@ ; .github/workflows/cpu-ci.yml for the workflow definition. [tox] -envlist = py310-gpu,py310-multi-gpu +envlist = gpu,multi-gpu,horovod-cpu,nvtabular-cpu,systems-cpu,transformers4rec-cpu,docs,docs-multi [testenv] commands = pip install --upgrade pip pip install -e .[all] -[testenv:py310-gpu] +[testenv:gpu] ; Runs in: Github Actions ; Runs GPU-based tests. allowlist_externals = bash deps = - --no-deps -rrequirements/test.txt + -rrequirements/test.txt + git+https://github.com/NVIDIA-Merlin/core.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/dataloader.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/NVTabular.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/systems.git@{env:MERLIN_BRANCH} passenv = OPAL_PREFIX setenv = TF_GPU_ALLOCATOR=cuda_malloc_async sitepackages=true commands = - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/core.git@{env:MERLIN_BRANCH:main} - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git@{env:MERLIN_BRANCH:main} - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/nvtabular.git@{env:MERLIN_BRANCH:main} - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/systems.git@{env:MERLIN_BRANCH:main} - bash -c 'python -m pytest --cov-report term --cov merlin -m "{env:PYTEST_MARKERS}" -rxs tests/ || ([ $? = 5 ] && exit 0 || exit $?)' + bash -c 'python -m pytest --cov-report term --cov merlin -m "{env:PYTEST_MARKERS}" -rxs {posargs:tests} || ([ $? = 5 ] && exit 0 || exit $?)' + -[testenv:py310-multi-gpu] +[testenv:multi-gpu] ; Runs in: Github Actions ; Runs GPU-based tests. allowlist_externals = @@ -43,30 +44,32 @@ setenv = LD_LIBRARY_PATH=${envdir}/hugectr/include/lib{:}/usr/local/lib/python3.10/dist-packages/tensorflow{:}{env:LD_LIBRARY_PATH} LIBRARY_PATH=${envdir}/hugectr/lib{:}{env:LIBRARY_PATH} sitepackages=true +deps = + git+https://github.com/NVIDIA-Merlin/core.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/dataloader.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/NVTabular.git@{env:MERLIN_BRANCH} commands = - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/core.git@{env:MERLIN_BRANCH:main} - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git@{env:MERLIN_BRANCH:main} - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/nvtabular.git@{env:MERLIN_BRANCH:main} sh examples/usecases/multi-gpu/install_sparse_operation_kit.sh {envdir} - bash -c 'horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh python -m pytest -m "horovod {env:EXTRA_PYTEST_MARKERS}" -rxs tests/unit || ([ $? = 5 ] && exit 0 || exit $?)' + bash -c 'horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh python -m pytest -m "unit and horovod {env:EXTRA_PYTEST_MARKERS}" -rxs {posargs:tests} || ([ $? = 5 ] && exit 0 || exit $?)' -[testenv:py310-horovod-cpu] +[testenv:horovod-cpu] setenv = HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 PATH={env:PATH}{:}{envdir}/env/bin LD_LIBRARY_PATH={env:LD_LIBRARY_PATH}{:}{envdir}/env/lib +deps = + git+https://github.com/NVIDIA-Merlin/core.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/dataloader.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/NVTabular.git@{env:MERLIN_BRANCH} commands = conda update --yes --name base --channel defaults conda conda env create --prefix {envdir}/env --file requirements/horovod-cpu-environment.yml --force {envdir}/env/bin/python -m pip install 'horovod==0.27.0' --no-cache-dir {envdir}/env/bin/horovodrun --check-build - {envdir}/env/bin/python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/core.git@{env:MERLIN_BRANCH:main} - {envdir}/env/bin/python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git@{env:MERLIN_BRANCH:main} - {envdir}/env/bin/python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/nvtabular.git@{env:MERLIN_BRANCH:main} - {envdir}/env/bin/horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh pytest -m "horovod {env:EXTRA_PYTEST_MARKERS}" -rxs tests/unit + {envdir}/env/bin/horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh pytest -m "unit and horovod {env:EXTRA_PYTEST_MARKERS}" -rxs {posargs:tests} -[testenv:py310-nvtabular-cpu] +[testenv:nvtabular-cpu] passenv=GIT_COMMIT allowlist_externals = git deps = @@ -82,7 +85,7 @@ commands = python -m pip install . python -m pytest nvtabular-{env:GIT_COMMIT}/tests/unit -[testenv:py310-systems-cpu] +[testenv:systems-cpu] passenv=GIT_COMMIT allowlist_externals = git deps = @@ -99,7 +102,7 @@ commands = python -m pip install . python -m pytest -m "not notebook" systems-{env:GIT_COMMIT}/tests/unit -[testenv:py310-transformers4rec-cpu] +[testenv:transformers4rec-cpu] passenv=GIT_COMMIT allowlist_externals = git commands = @@ -120,10 +123,10 @@ changedir = {toxinidir} deps = -rrequirements/docs.txt -rrequirements/test.txt + git+https://github.com/NVIDIA-Merlin/core.git + git+https://github.com/NVIDIA-Merlin/dataloader.git + git+https://github.com/NVIDIA-Merlin/NVTabular.git commands = - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/core.git - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/nvtabular.git python -m sphinx.cmd.build -E -P -b html docs/source docs/build/html [testenv:docs-multi] @@ -132,9 +135,9 @@ changedir = {toxinidir} deps = -rrequirements/docs.txt -rrequirements/test.txt + git+https://github.com/NVIDIA-Merlin/core.git + git+https://github.com/NVIDIA-Merlin/dataloader.git + git+https://github.com/NVIDIA-Merlin/NVTabular.git commands = - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/core.git - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/nvtabular.git sphinx-multiversion --dump-metadata docs/source docs/build/html | jq "keys" sphinx-multiversion docs/source docs/build/html From b89f66e13dbce55dac4cf724f74b7a34cfe7b1c3 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy Date: Sat, 1 Jul 2023 02:37:48 +0100 Subject: [PATCH 4/6] Add fixture to cleanup dataloader (#1167) Co-authored-by: edknv <109497216+edknv@users.noreply.github.com> --- tests/conftest.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 6e5daeb58b..386ea8884f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,6 +20,7 @@ import platform import warnings from pathlib import Path +from unittest.mock import patch import distributed import psutil @@ -27,6 +28,7 @@ from asvdb import BenchmarkInfo, utils from merlin.core.utils import Distributed +from merlin.dataloader.loader_base import LoaderBase from merlin.datasets.synthetic import generate_data from merlin.io import Dataset from merlin.models.utils import ci_utils @@ -145,3 +147,17 @@ def get_benchmark_info(): arch=uname.machine, ram="%d" % psutil.virtual_memory().total, ) + + +@pytest.fixture(scope="function", autouse=True) +def cleanup_dataloader(): + """After each test runs. Call .stop() on any dataloaders created during the test. + The avoids issues with background threads hanging around and interfering with subsequent tests. + This happens when a dataloader is partially consumed (not all batches are iterated through). + """ + with patch.object( + LoaderBase, "__iter__", side_effect=LoaderBase.__iter__, autospec=True + ) as patched: + yield + for call in patched.call_args_list: + call.args[0].stop() From 25f98f1566eb05ad8df3788e1dee550963a78fed Mon Sep 17 00:00:00 2001 From: Oliver Holworthy Date: Sat, 1 Jul 2023 04:20:25 +0100 Subject: [PATCH 5/6] Replace import of collections.Sequence with collections.abc.Sequence (#1166) Co-authored-by: edknv <109497216+edknv@users.noreply.github.com> --- merlin/models/tf/core/tabular.py | 4 ++-- merlin/models/tf/inputs/embedding.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/merlin/models/tf/core/tabular.py b/merlin/models/tf/core/tabular.py index 33b1ed5b42..3f2bb8e574 100644 --- a/merlin/models/tf/core/tabular.py +++ b/merlin/models/tf/core/tabular.py @@ -1,5 +1,5 @@ import abc -import collections +import collections.abc import copy from typing import Dict, List, Optional, Sequence, Union, overload @@ -600,7 +600,7 @@ def get_config(self): def select_by_tag(self, tags: Tags) -> Optional["Filter"]: if isinstance(self.feature_names, Tags): schema = self.schema.select_by_tag(self.feature_names).select_by_tag(tags) - elif isinstance(self.feature_names, collections.Sequence): + elif isinstance(self.feature_names, collections.abc.Sequence): schema = self.schema.select_by_name(self.feature_names).select_by_tag(tags) else: raise RuntimeError( diff --git a/merlin/models/tf/inputs/embedding.py b/merlin/models/tf/inputs/embedding.py index aff30184a6..156ef974e8 100644 --- a/merlin/models/tf/inputs/embedding.py +++ b/merlin/models/tf/inputs/embedding.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import collections +import collections.abc import inspect from copy import deepcopy from dataclasses import dataclass @@ -268,7 +268,7 @@ def select_by_tag(self, tags: Union[Tags, Sequence[Tags]]) -> Optional["Embeddin ------- An EmbeddingTable if the tags match. If no features match, it returns None. """ - if not isinstance(tags, collections.Sequence): + if not isinstance(tags, collections.abc.Sequence): tags = [tags] selected_schema = self.schema.select_by_tag(tags) From 86d0a34bf5902fe1510678eafccd5841810f4f5e Mon Sep 17 00:00:00 2001 From: edknv <109497216+edknv@users.noreply.github.com> Date: Sat, 1 Jul 2023 19:04:12 +0900 Subject: [PATCH 6/6] Add DLRM block (#1162) * First pass over DLRM-related blocks * add a test for dlrm block and fixes to make test pass * Add unit tests * Initialize default metrics/loss inside ModelOutput instead * Update merlin/models/torch/blocks/dlrm.py Co-authored-by: Radek Osmulski * fixes and changes for failing tests * pass batch to model test --------- Co-authored-by: Marc Romeyn Co-authored-by: Radek Osmulski --- merlin/models/torch/__init__.py | 2 + merlin/models/torch/blocks/dlrm.py | 141 +++++++++++++++++++++++ merlin/models/torch/router.py | 3 + merlin/models/torch/transforms/agg.py | 3 + tests/unit/torch/blocks/test_dlrm.py | 112 ++++++++++++++++++ tests/unit/torch/models/test_base.py | 6 +- tests/unit/torch/outputs/test_tabular.py | 6 +- 7 files changed, 268 insertions(+), 5 deletions(-) create mode 100644 merlin/models/torch/blocks/dlrm.py create mode 100644 tests/unit/torch/blocks/test_dlrm.py diff --git a/merlin/models/torch/__init__.py b/merlin/models/torch/__init__.py index eef6f9ece6..d2326af5e9 100644 --- a/merlin/models/torch/__init__.py +++ b/merlin/models/torch/__init__.py @@ -17,6 +17,7 @@ from merlin.models.torch import schema from merlin.models.torch.batch import Batch, Sequence from merlin.models.torch.block import Block, ParallelBlock +from merlin.models.torch.blocks.dlrm import DLRMBlock from merlin.models.torch.blocks.mlp import MLPBlock from merlin.models.torch.inputs.embedding import EmbeddingTable, EmbeddingTables from merlin.models.torch.inputs.select import SelectFeatures, SelectKeys @@ -51,4 +52,5 @@ "Concat", "Stack", "schema", + "DLRMBlock", ] diff --git a/merlin/models/torch/blocks/dlrm.py b/merlin/models/torch/blocks/dlrm.py new file mode 100644 index 0000000000..a24e4d1f71 --- /dev/null +++ b/merlin/models/torch/blocks/dlrm.py @@ -0,0 +1,141 @@ +from typing import Dict, Optional + +import torch +from torch import nn + +from merlin.models.torch.block import Block +from merlin.models.torch.inputs.embedding import EmbeddingTables +from merlin.models.torch.inputs.tabular import TabularInputBlock +from merlin.models.torch.link import Link +from merlin.models.torch.transforms.agg import MaybeAgg, Stack +from merlin.models.utils.doc_utils import docstring_parameter +from merlin.schema import Schema, Tags + +_DLRM_REF = """ + References + ---------- + .. [1] Naumov, Maxim, et al. "Deep learning recommendation model for + personalization and recommendation systems." arXiv preprint arXiv:1906.00091 (2019). +""" + + +@docstring_parameter(dlrm_reference=_DLRM_REF) +class DLRMInputBlock(TabularInputBlock): + """Input block for DLRM model. + + Parameters + ---------- + schema : Schema, optional + The schema to use for selection. Default is None. + dim : int + The dimensionality of the output vectors. + bottom_block : Block + Block to pass the continuous features to. + Note that, the output dimensionality of this block must be equal to ``dim``. + + {dlrm_reference} + + Raises + ------ + ValueError + If no categorical input is provided in the schema. + + """ + + def __init__(self, schema: Schema, dim: int, bottom_block: Block): + super().__init__(schema) + self.add_route(Tags.CATEGORICAL, EmbeddingTables(dim, seq_combiner="mean")) + self.add_route(Tags.CONTINUOUS, bottom_block) + + if "categorical" not in self: + raise ValueError("DLRMInputBlock must have a categorical input") + + +@docstring_parameter(dlrm_reference=_DLRM_REF) +class DLRMInteraction(nn.Module): + """ + This class defines the forward interaction operation as proposed + in the DLRM + `paper https://arxiv.org/pdf/1906.00091.pdf`_ [1]_. + + This forward operation performs elementwise multiplication + followed by a reduction sum (equivalent to a dot product) of all embedding pairs. + + {dlrm_reference} + + """ + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + if not hasattr(self, "triu_indices"): + self.register_buffer( + "triu_indices", torch.triu_indices(inputs.shape[1], inputs.shape[1], offset=1) + ) + + interactions = torch.bmm(inputs, torch.transpose(inputs, 1, 2)) + interactions_flat = interactions[:, self.triu_indices[0], self.triu_indices[1]] + + return interactions_flat + + +class ShortcutConcatContinuous(Link): + """ + A shortcut connection that concatenates + continuous input features and intermediate outputs. + + When there's no continuous input, the intermediate output is returned. + """ + + def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor: + intermediate_output = self.output(inputs) + + if "continuous" in inputs: + return torch.cat((inputs["continuous"], intermediate_output), dim=1) + + return intermediate_output + + +@docstring_parameter(dlrm_reference=_DLRM_REF) +class DLRMBlock(Block): + """Builds the DLRM architecture, as proposed in the following + `paper https://arxiv.org/pdf/1906.00091.pdf`_ [1]_. + + Parameters + ---------- + schema : Schema, optional + The schema to use for selection. Default is None. + dim : int + The dimensionality of the output vectors. + bottom_block : Block + Block to pass the continuous features to. + Note that, the output dimensionality of this block must be equal to ``dim``. + top_block : Block, optional + An optional upper-level block of the model. + interaction : nn.Module, optional + Interaction module for DLRM. + If not provided, DLRMInteraction will be used by default. + + {dlrm_reference} + + Raises + ------ + ValueError + If no categorical input is provided in the schema. + """ + + def __init__( + self, + schema: Schema, + dim: int, + bottom_block: Block, + top_block: Optional[Block] = None, + interaction: Optional[nn.Module] = None, + ): + super().__init__(DLRMInputBlock(schema, dim, bottom_block)) + + self.append( + Block(MaybeAgg(Stack(dim=1)), interaction or DLRMInteraction()), + link=ShortcutConcatContinuous(), + ) + + if top_block: + self.append(top_block) diff --git a/merlin/models/torch/router.py b/merlin/models/torch/router.py index 29126e0f91..326064c68c 100644 --- a/merlin/models/torch/router.py +++ b/merlin/models/torch/router.py @@ -88,6 +88,9 @@ def add_route( """ routing_module = schema.select(self.selectable, selection) + if not routing_module: + return self + if module is not None: schema.setup_schema(module, routing_module.schema) diff --git a/merlin/models/torch/transforms/agg.py b/merlin/models/torch/transforms/agg.py index f6dcf457d6..552fcf1d32 100644 --- a/merlin/models/torch/transforms/agg.py +++ b/merlin/models/torch/transforms/agg.py @@ -104,6 +104,9 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor: if self.align_dims: max_dims = max(tensor.dim() for tensor in sorted_tensors) + max_dims = max( + max_dims, 2 + ) # assume first dimension is batch size + at least one feature _sorted_tensors = [] for tensor in sorted_tensors: if tensor.dim() < max_dims: diff --git a/tests/unit/torch/blocks/test_dlrm.py b/tests/unit/torch/blocks/test_dlrm.py new file mode 100644 index 0000000000..21d65e8561 --- /dev/null +++ b/tests/unit/torch/blocks/test_dlrm.py @@ -0,0 +1,112 @@ +import math + +import pytest +import torch + +import merlin.models.torch as mm +from merlin.models.torch.batch import sample_batch +from merlin.models.torch.blocks.dlrm import DLRMInputBlock, DLRMInteraction +from merlin.models.torch.utils import module_utils +from merlin.schema import Tags + + +class TestDLRMInputBlock: + def test_routes_and_output_shapes(self, testing_data): + schema = testing_data.schema + embedding_dim = 64 + block = DLRMInputBlock(schema, embedding_dim, mm.MLPBlock([embedding_dim])) + + assert isinstance(block["categorical"], mm.EmbeddingTables) + assert len(block["categorical"]) == len(schema.select_by_tag(Tags.CATEGORICAL)) + + assert isinstance(block["continuous"][0], mm.SelectKeys) + assert isinstance(block["continuous"][1], mm.MLPBlock) + + batch_size = 16 + batch = sample_batch(testing_data, batch_size=batch_size) + + outputs = module_utils.module_test(block, batch) + + for col in schema.select_by_tag(Tags.CATEGORICAL): + assert outputs[col.name].shape == (batch_size, embedding_dim) + assert outputs["continuous"].shape == (batch_size, embedding_dim) + + +class TestDLRMInteraction: + @pytest.mark.parametrize( + "batch_size,num_features,dim", + [(16, 3, 3), (32, 5, 8), (64, 5, 4)], + ) + def test_output_shape(self, batch_size, num_features, dim): + module = DLRMInteraction() + inputs = torch.rand((batch_size, num_features, dim)) + outputs = module_utils.module_test(module, inputs) + + assert outputs.shape == (batch_size, num_features - 1 + math.comb(num_features - 1, 2)) + + +class TestDLRMBlock: + @pytest.fixture(autouse=True) + def setup_method(self, testing_data): + self.schema = testing_data.schema + self.batch_size = 16 + self.batch = sample_batch(testing_data, batch_size=self.batch_size) + + def test_dlrm_output_shape(self): + embedding_dim = 64 + block = mm.DLRMBlock( + self.schema, + dim=embedding_dim, + bottom_block=mm.MLPBlock([embedding_dim]), + ) + + outputs = module_utils.module_test(block, self.batch) + + num_features = len(self.schema.select_by_tag(Tags.CATEGORICAL)) + 1 + dot_product_dim = (num_features - 1) * num_features // 2 + assert list(outputs.shape) == [self.batch_size, dot_product_dim + embedding_dim] + + def test_dlrm_with_top_block(self): + embedding_dim = 32 + top_block_dim = 8 + block = mm.DLRMBlock( + self.schema, + dim=embedding_dim, + bottom_block=mm.MLPBlock([embedding_dim]), + top_block=mm.MLPBlock([top_block_dim]), + ) + + outputs = module_utils.module_test(block, self.batch) + + assert list(outputs.shape) == [self.batch_size, top_block_dim] + + def test_dlrm_block_no_categorical_features(self): + schema = self.schema.remove_by_tag(Tags.CATEGORICAL) + embedding_dim = 32 + + with pytest.raises(ValueError, match="must have a categorical input"): + _ = mm.DLRMBlock( + schema, + dim=embedding_dim, + bottom_block=mm.MLPBlock([embedding_dim]), + ) + + def test_dlrm_block_no_continuous_features(self, testing_data): + schema = testing_data.schema.remove_by_tag(Tags.CONTINUOUS) + testing_data.schema = schema + + embedding_dim = 32 + block = mm.DLRMBlock( + schema, + dim=embedding_dim, + bottom_block=mm.MLPBlock([embedding_dim]), + ) + + batch_size = 16 + batch = sample_batch(testing_data, batch_size=batch_size) + + outputs = module_utils.module_test(block, batch) + + num_features = len(schema.select_by_tag(Tags.CATEGORICAL)) + dot_product_dim = (num_features - 1) * num_features // 2 + assert list(outputs.shape) == [batch_size, dot_product_dim] diff --git a/tests/unit/torch/models/test_base.py b/tests/unit/torch/models/test_base.py index cedd3e6ff6..ab329b8ca1 100644 --- a/tests/unit/torch/models/test_base.py +++ b/tests/unit/torch/models/test_base.py @@ -133,11 +133,11 @@ def test_training_step_with_dataloader(self): mm.BinaryOutput(ColumnSchema("target")), ) - feature = [[1.0, 2.0], [3.0, 4.0]] - target = [[0.0], [1.0]] + feature = [2.0, 3.0] + target = [0.0, 1.0] dataset = Dataset(pd.DataFrame({"feature": feature, "target": target})) - with Loader(dataset, batch_size=1) as loader: + with Loader(dataset, batch_size=2) as loader: model.initialize(loader) batch = loader.peek() diff --git a/tests/unit/torch/outputs/test_tabular.py b/tests/unit/torch/outputs/test_tabular.py index b3dd2abe4b..22ae132735 100644 --- a/tests/unit/torch/outputs/test_tabular.py +++ b/tests/unit/torch/outputs/test_tabular.py @@ -29,6 +29,8 @@ def test_exceptions(self): with pytest.raises(ValueError, match="not found"): mm.TabularOutputBlock(self.schema, init="not_found") + def test_no_route_for_non_existent_tag(self): outputs = mm.TabularOutputBlock(self.schema) - with pytest.raises(ValueError): - outputs.add_route(Tags.CATEGORICAL) + outputs.add_route(Tags.CATEGORICAL) + + assert not outputs