From 84df0c2bdff651d50141080ff826b3289fa99875 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Wed, 8 Mar 2023 10:44:56 +1000 Subject: [PATCH 01/15] intial commit --- rees46_schema_modified.pbtxt | 227 ++++ train_and_save_model_for_benchmarking.ipynb | 1200 +++++++++++++++++++ 2 files changed, 1427 insertions(+) create mode 100644 rees46_schema_modified.pbtxt create mode 100644 train_and_save_model_for_benchmarking.ipynb diff --git a/rees46_schema_modified.pbtxt b/rees46_schema_modified.pbtxt new file mode 100644 index 0000000000..96960d23e6 --- /dev/null +++ b/rees46_schema_modified.pbtxt @@ -0,0 +1,227 @@ +feature { + name: "sess_pid_seq" + value_count { + min: 2 + } + type: INT + int_domain { + name: "sess_pid_seq" + min: 1 + max: 390000 + is_categorical: true + } + annotation { + tag: "item_id" + tag: "list" + tag: "categorical" + tag: "item" + } +} + +feature { + name: "sess_ccid_seq" + value_count { + min: 2 + max: 20 + } + type: INT + int_domain { + name: "sess_ccid_seq" + min: 1 + max: 150 + is_categorical: true + } + annotation { + tag: "list" + tag: "categorical" + tag: "item" + } +} + +feature { + name: "sess_csid_seq" + value_count { + min: 2 + max: 20 + } + type: INT + int_domain { + name: "sess_csid_seq" + min: 1 + max: 1400 + is_categorical: true + } + annotation { + tag: "list" + tag: "categorical" + tag: "item" + } +} + + +feature { + name: "sess_bid_seq" + value_count { + min: 2 + max: 20 + } + type: INT + int_domain { + name: "sess_bid_seq" + min: 1 + max: 7000 + is_categorical: true + } + annotation { + tag: "list" + tag: "categorical" + tag: "item" + } +} + +feature { + name: "sess_price_log_norm_seq" + value_count { + min: 2 + max: 20 + } + type: FLOAT + float_domain { + name: "sess_price_log_norm_seq" + min: 0.0 + max: 10000.0 + } + annotation { + tag: "item" + tag: "list" + tag: "continuous" + } +} + +feature { + name: "sess_relative_price_to_avg_category_seq" + value_count { + min: 2 + max: 20 + } + type: FLOAT + float_domain { + name: "sess_relative_price_to_avg_category_seq" + min: -10000.0 + max: 10000.0 + } + annotation { + tag: "item" + tag: "list" + tag: "continuous" + } +} + +feature { + name: "sess_prod_recency_days_log_norm_seq" + value_count { + min: 2 + max: 20 + } + type: FLOAT + float_domain { + name: "sess_prod_recency_days_log_norm_seq" + min: -10000.0 + max: 10000.0 + } + annotation { + tag: "item" + tag: "list" + tag: "continuous" + } +} + +feature { + name: "sess_et_hour_sin_seq" + value_count { + min: 2 + max: 20 + } + type: FLOAT + float_domain { + name: "sess_et_hour_sin_seq" + min: -1.0 + max: 1.0 + } + annotation { + tag: "list" + tag: "continuous" + } +} + +feature { + name: "sess_et_hour_cos_seq" + value_count { + min: 2 + max: 20 + } + type: FLOAT + float_domain { + name: "sess_et_hour_cos_seq" + min: -1.0 + max: 1.0 + } + annotation { + tag: "list" + tag: "continuous" + } +} + +feature { + name: "sess_et_dayofweek_sin_seq" + value_count { + min: 2 + max: 20 + } + type: FLOAT + float_domain { + name: "sess_et_dayofweek_sin_seq" + min: -1.0 + max: 1.0 + } + annotation { + tag: "list" + tag: "continuous" + } +} + +feature { + name: "sess_et_dayofweek_cos_seq" + value_count { + min: 2 + max: 20 + } + type: FLOAT + float_domain { + name: "sess_et_dayofweek_cos_seq" + min: -1.0 + max: 1.0 + } + annotation { + tag: "list" + tag: "continuous" + } +} + +feature { + name: "sess_etime_seq" + value_count { + min: 2 + max: 20 + } + type: FLOAT + float_domain { + name: "sess_etime_seq" + min: 0 + max: 0 + } + annotation { + tag: "time" + tag: "list" + } +} diff --git a/train_and_save_model_for_benchmarking.ipynb b/train_and_save_model_for_benchmarking.ipynb new file mode 100644 index 0000000000..98a6460224 --- /dev/null +++ b/train_and_save_model_for_benchmarking.ipynb @@ -0,0 +1,1200 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "54d6ef61", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Models\n", + " + 20a40d72...a92bdc24 tf/transformer-api -> origin/tf/transformer-api (forced update)\n", + "Warning: you are leaving 5 commits behind, not connected to\n", + "any of your branches:\n", + "\n", + " 20a40d72 fix masking of sequence-predict-next transform\n", + " dbd2d9c8 include PR comments\n", + " 1e642e87 update example notebook with the new API\n", + " e99e7985 add support of ragged tensor to weight tying\n", + " e87913d1 implement new design of the Transformer API on top of the release-23.02 branch\n", + "\n", + "If you want to keep them by creating a new branch, this may be a good time\n", + "to do so with:\n", + "\n", + " git branch 20a40d72\n", + "\n", + "HEAD is now at a92bdc24 adjust sample_weights to targets shape\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing /models\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==0.9.0+116.ga92bdc24) (0.0.2+41.gdbf8816)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==0.9.0+116.ga92bdc24) (0.9.0+54.g29c7587a)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.3.5)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (0.56.4)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2022.7.1)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2022.5.0)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2022.7.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (22.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (4.64.1)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.12.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (3.19.6)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.2.5)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (8.0.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.8.2)\n", + "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.22.4)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2022.7)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (0.39.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (5.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.3.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (0.12.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.2.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (6.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.4.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.7.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (6.1)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.0.4)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (8.1.3)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.2.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.26.13)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (5.9.4)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (3.1.2)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.0.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.3.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.2.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (3.11.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.1.1)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (6.0.4)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (4.0.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (6.0.1)\n", + "Building wheels for collected packages: merlin-models\n", + " Building wheel for merlin-models (PEP 517): started\n", + " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-models: filename=merlin_models-0.9.0+116.ga92bdc24-py3-none-any.whl size=374626 sha256=0b09335e9fef4f6221003e7ba9eb2e1e24b4bfdfd433c8211c5ea32aa2856aed\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-168j85q4/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + "Successfully built merlin-models\n", + "Installing collected packages: merlin-models\n", + " Attempting uninstall: merlin-models\n", + " Found existing installation: merlin-models 0.9.0+114.g20a40d72\n", + " Uninstalling merlin-models-0.9.0+114.g20a40d72:\n", + " Successfully uninstalled merlin-models-0.9.0+114.g20a40d72\n", + "Successfully installed merlin-models-0.9.0+116.ga92bdc24\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Already on 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/core\n", + " * branch main -> FETCH_HEAD\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Already up to date.\n", + "Processing /core\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (1.12.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (3.19.6)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (22.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (0.56.4)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (2022.7.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (1.3.5)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (2022.7.1)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (8.0.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (2022.5.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (1.2.5)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (1.3.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (45.2.0)\n", + "Requirement already satisfied: numpy<1.24,>=1.18 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (1.22.4)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (0.39.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (5.2.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (5.9.4)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (0.12.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (2.2.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (6.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (6.1)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (8.1.3)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.7.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.26.13)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.0.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (2.4.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (3.1.2)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (2.2.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.0.4)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (2022.7)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.3.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (1.2.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (3.11.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (2.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (1.14.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (6.0.4)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (6.0.1)\n", + "Building wheels for collected packages: merlin-core\n", + " Building wheel for merlin-core (PEP 517): started\n", + " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-core: filename=merlin_core-0.9.0+54.g29c7587a-py3-none-any.whl size=152409 sha256=cf0f970219f2ae5dcae772911442f0366c3b3400aaac27967ba709e9c9ac1a22\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-zn63nwq_/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + "Successfully built merlin-core\n", + "Installing collected packages: merlin-core\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 0.9.0+54.g29c7587a\n", + " Uninstalling merlin-core-0.9.0+54.g29c7587a:\n", + " Successfully uninstalled merlin-core-0.9.0+54.g29c7587a\n", + "Successfully installed merlin-core-0.9.0+54.g29c7587a\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Already on 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/NVTabular\n", + " * branch main -> FETCH_HEAD\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Already up to date.\n", + "Processing /nvtabular\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.9.0+54.g29c7587a)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (1.9.3)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.0.2+41.gdbf8816)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.56.4)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.0.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.19.6)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.5)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.5)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (22.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.5.0)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.12.0)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.64.1)\n", + "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+42.g9b186ee9) (1.22.4)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.39.1)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.26.13)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.1.2)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.12.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.1)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.4)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.1.3)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.9.4)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.4.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.7.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.11.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.1.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.14.0)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Building wheels for collected packages: nvtabular\n", + " Building wheel for nvtabular (PEP 517): started\n", + " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", + " Created wheel for nvtabular: filename=nvtabular-1.6.0+42.g9b186ee9-cp38-cp38-linux_x86_64.whl size=258506 sha256=20845f4d83c616304250353b73943fa82e251b9514cbd62b7387b83a6d21efe8\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-dt3f85gj/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + "Successfully built nvtabular\n", + "Installing collected packages: nvtabular\n", + " Attempting uninstall: nvtabular\n", + " Found existing installation: nvtabular 1.6.0+42.g9b186ee9\n", + " Uninstalling nvtabular-1.6.0+42.g9b186ee9:\n", + " Successfully uninstalled nvtabular-1.6.0+42.g9b186ee9\n", + "Successfully installed nvtabular-1.6.0+42.g9b186ee9\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Already on 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/systems\n", + " * branch main -> FETCH_HEAD\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Already up to date.\n", + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (0.9.0+54.g29c7587a)\n", + "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.28.1)\n", + "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (1.6.0+42.g9b186ee9)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite-runtime==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.9.3)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite-runtime==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.22.4)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.56.4)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.5)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (22.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.5)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.12.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.64.1)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.5.0)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.0.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.19.6)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (1.26.13)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2019.11.28)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.8)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+61.g329cba4) (0.0.2+41.gdbf8816)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.39.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (45.2.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.12.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.1.3)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.1.2)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.4)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.7.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.9.4)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.1)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.8.2)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.4.3)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.11.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.14.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.4)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.0.0)\n", + "Building wheels for collected packages: merlin-systems\n", + " Building wheel for merlin-systems (PEP 517): started\n", + " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+61.g329cba4-py3-none-any.whl size=99480 sha256=c9ed3baf0f65ac381e50f14a63222abcbac99f78a39f4f04bd7e6828a7ed9c16\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-zfooq_xi/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + "Successfully built merlin-systems\n", + "Installing collected packages: merlin-systems\n", + " Attempting uninstall: merlin-systems\n", + " Found existing installation: merlin-systems 0.7.0+61.g329cba4\n", + " Uninstalling merlin-systems-0.7.0+61.g329cba4:\n", + " Successfully uninstalled merlin-systems-0.7.0+61.g329cba4\n", + "Successfully installed merlin-systems-0.7.0+61.g329cba4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Already on 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/dataloader\n", + " * branch main -> FETCH_HEAD\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Already up to date.\n", + "Processing /dataloader\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-core>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+41.gdbf8816) (0.9.0+54.g29c7587a)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.0.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.5)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.56.4)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.5.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.64.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.5)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (22.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.19.6)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.12.0)\n", + "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow>=5.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.22.4)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.7.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.9.4)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.4.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.12.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.1)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.26.13)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.4)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.1.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.1.2)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.39.1)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.8.2)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.57.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.1.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.1.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.14.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.0.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Building wheels for collected packages: merlin-dataloader\n", + " Building wheel for merlin-dataloader (PEP 517): started\n", + " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+41.gdbf8816-py3-none-any.whl size=40852 sha256=60948b9af68c37dfacd1e48a9fdaaad2f9c78225e14116de0d4b643853d839bb\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-fwvmtvqd/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + "Successfully built merlin-dataloader\n", + "Installing collected packages: merlin-dataloader\n", + " Attempting uninstall: merlin-dataloader\n", + " Found existing installation: merlin-dataloader 0.0.2+41.gdbf8816\n", + " Uninstalling merlin-dataloader-0.0.2+41.gdbf8816:\n", + " Successfully uninstalled merlin-dataloader-0.0.2+41.gdbf8816\n", + "Successfully installed merlin-dataloader-0.0.2+41.gdbf8816\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.8/dist-packages (3.7.1)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (22.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (4.39.0)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (9.4.0)\n", + "Requirement already satisfied: importlib-resources>=3.2.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from matplotlib) (5.10.2)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.4.4)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.0.7)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (2.8.2)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (3.0.9)\n", + "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.22.4)\n", + "Requirement already satisfied: zipp>=3.1.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from importlib-resources>=3.2.0; python_version < \"3.10\"->matplotlib) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.14.0)\n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", + "cd /core && git checkout main && git pull origin main && pip install .\n", + "cd /nvtabular && git checkout main && git pull origin main && pip install .\n", + "cd /systems && git checkout main && git pull origin main && pip install .\n", + "cd /dataloader && git checkout main && git pull origin main && pip install .\n", + "pip install matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "152aee86", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.6.4)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", + "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", + "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", + "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading...\n", + "From: https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "To: /workspace/rees46_ecom_dataset_small_for_ci.zip\n", + "100%|██████████| 43.4M/43.4M [00:08<00:00, 5.42MB/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease\n", + "Get:2 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Hit:3 http://archive.ubuntu.com/ubuntu focal InRelease\n", + "Get:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Fetched 336 kB in 3s (129 kB/s)\n", + "Reading package lists...\n", + "Reading package lists...\n", + "Building dependency tree...\n", + "Reading state information...\n", + "unzip is already the newest version (6.0-25ubuntu1.1).\n", + "0 upgraded, 0 newly installed, 0 to remove and 83 not upgraded.\n", + "Archive: rees46_ecom_dataset_small_for_ci.zip\n", + " creating: ecom_dataset/0001/\n", + " inflating: ecom_dataset/0001/valid.parquet \n", + " extracting: ecom_dataset/0001/.zip \n", + " inflating: ecom_dataset/0001/train.parquet \n", + " inflating: ecom_dataset/0001/test.parquet \n", + " creating: ecom_dataset/0002/\n", + " inflating: ecom_dataset/0002/valid.parquet \n", + " inflating: ecom_dataset/0002/train.parquet \n", + " inflating: ecom_dataset/0002/test.parquet \n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "rm -rf ecom_dataset\n", + "mkdir -p ecom_dataset\n", + "\n", + "pip install gdown\n", + "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "apt-get update -y\n", + "apt-get install unzip -y\n", + "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-08 00:23:08.749959: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-08 00:23:11.232785: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-08 00:23:11.233226: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-08 00:23:11.233386: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-08 00:23:11.674938: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-08 00:23:11.675977: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-08 00:23:11.676191: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-08 00:23:11.676346: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-08 00:23:12.417852: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-08 00:23:12.418073: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-08 00:23:12.418234: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-08 00:23:12.418351: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-08 00:23:12.418418: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "81e7f635", + "metadata": {}, + "outputs": [], + "source": [ + "# this is only temporary, we can align the functionality with the CI script later on\n", + "\n", + "DATA_FOLDER = os.environ.get(\n", + " \"DATA_FOLDER\", \n", + " 'ecom_dataset/0002'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(os.path.join(DATA_FOLDER, \"train.parquet\"))\n", + "valid = Dataset(os.path.join(DATA_FOLDER, \"valid.parquet\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "792daa9d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_raggedproperties.value_count.minproperties.value_count.max
0sess_pid_seq()DType(name='int32', element_type=<ElementType....TrueTrue0None
\n", + "
" + ], + "text/plain": [ + "[{'name': 'sess_pid_seq', 'tags': set(), 'properties': {'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int32', element_type=, element_size=32, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.schema.select_by_name('sess_pid_seq')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of hyperparams I took from the CI script in T4Rec\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "mlp_block = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7f15a0a0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + "schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + ").to_merlin_schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "74ccc9a9", + "metadata": {}, + "outputs": [], + "source": [ + "train.schema = schema" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b2aa0beb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'sess_pid_seq': (, ), 'sess_ccid_seq': , 'sess_csid_seq': , 'sess_bid_seq': , 'sess_price_log_norm_seq': , 'sess_relative_price_to_avg_category_seq': , 'sess_prod_recency_days_log_norm_seq': , 'sess_et_hour_sin_seq': , 'sess_et_hour_cos_seq': , 'sess_et_dayofweek_sin_seq': , 'sess_et_dayofweek_cos_seq': , 'sess_etime_seq': }\n" + ] + } + ], + "source": [ + "from merlin.loader.tensorflow import Loader\n", + "\n", + "data = train\n", + "dataloader = Loader(data, batch_size=5)\n", + "batch = next(dataloader)\n", + "print(batch[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e64a9c0d", + "metadata": {}, + "outputs": [], + "source": [ + "# import nvtabular as nvt\n", + "\n", + "# ops = ['sess_pid_seq'] >> nvt.ops.Categorify()\n", + "\n", + "# wf = nvt.Workflow(ops)\n", + "# train = wf.fit_transform(train)\n", + "# valid = wf.transform(valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "292ef9ba", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(128, None, 192)\n" + ] + } + ], + "source": [ + "batch = mm.sample_batch(train, batch_size=batch_size, include_targets=False, to_ragged=True)\n", + "print(input_block(batch).shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "34c739b3", + "metadata": {}, + "outputs": [], + "source": [ + "train.schema = train.schema.select_by_name('sess_pid_seq')" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "5a4c7ca3", + "metadata": {}, + "outputs": [], + "source": [ + "input_block = mm.InputBlockV2(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " embeddings=mm.Embeddings(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " sequence_combiner=None,\n", + " dim=d_model\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "14c35b2a", + "metadata": {}, + "outputs": [], + "source": [ + "xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "866f3249", + "metadata": {}, + "outputs": [], + "source": [ + "dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "288d08df", + "metadata": {}, + "outputs": [], + "source": [ + "mlp_block2 = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "064ea5ec", + "metadata": {}, + "outputs": [], + "source": [ + "prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "6c008e16", + "metadata": {}, + "outputs": [], + "source": [ + "model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "49b12d31", + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "502ef8a3", + "metadata": {}, + "outputs": [], + "source": [ + "n_epoch = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "d84a30d3", + "metadata": {}, + "outputs": [], + "source": [ + "model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[4])\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a9611ab", + "metadata": {}, + "outputs": [], + "source": [ + "# model_transformer.fit(\n", + "# train,\n", + "# batch_size=batch_size,\n", + "# epochs=n_epoch,\n", + "# pre=mm.SequencePredictRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "e7474131", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "665/665 [==============================] - 74s 107ms/step - loss: 8.9015 - recall_at_4: 0.0224 - mrr_at_4: 0.0129 - ndcg_at_4: 0.0153 - map_at_4: 0.0129 - precision_at_4: 0.0056 - regularization_loss: 0.0000e+00 - loss_batch: 8.8957\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "7bf839e3", + "metadata": {}, + "outputs": [], + "source": [ + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "15ccc448", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 8s 40ms/step - loss: 8.8326 - recall_at_4: 0.0502 - mrr_at_4: 0.0319 - ndcg_at_4: 0.0365 - map_at_4: 0.0319 - precision_at_4: 0.0126 - regularization_loss: 0.0000e+00 - loss_batch: 8.8396\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.832579612731934,\n", + " 'recall_at_4': 0.05087455362081528,\n", + " 'mrr_at_4': 0.030891483649611473,\n", + " 'ndcg_at_4': 0.0359138660132885,\n", + " 'map_at_4': 0.030891483649611473,\n", + " 'precision_at_4': 0.01271863840520382,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.142295837402344}" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6560270f6dfcb12ecae74108e6195f37ec5ecde3 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Thu, 9 Mar 2023 16:17:38 +1000 Subject: [PATCH 02/15] update --- reproducing_T4Rec_results.ipynb | 1538 +++++++++++++++++++ train_and_save_model_for_benchmarking.ipynb | 2 +- 2 files changed, 1539 insertions(+), 1 deletion(-) create mode 100644 reproducing_T4Rec_results.ipynb diff --git a/reproducing_T4Rec_results.ipynb b/reproducing_T4Rec_results.ipynb new file mode 100644 index 0000000000..7b066f2f65 --- /dev/null +++ b/reproducing_T4Rec_results.ipynb @@ -0,0 +1,1538 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7f851659", + "metadata": {}, + "source": [ + "These are logs from training the following model from the CI script from T4Rec (the trianing was for 5 epochs):\n", + "\n", + "`### XLNet (MLM) - Item Id feature\n", + "python3 transf_exp_main_modified.py --output_dir ./tmp/ --overwrite_output_dir --do_train --do_eval --validate_every 10 --logging_steps 20 --save_steps 0 --data_path $DATA_PATH --features_schema_path $FEATURE_SCHEMA_PATH --fp16 --data_loader_engine merlin --start_time_window_index 1 --final_time_window_index 2 --time_window_folder_pad_digits 4 --model_type xlnet --loss_type cross_entropy --per_device_eval_batch_size 128 --similarity_type concat_mlp --tf_out_activation tanh --inp_merge mlp --learning_rate_warmup_steps 0 --learning_rate_schedule linear_with_warmup --hidden_act gelu --num_train_epochs $NUM_EPOCHS --dataloader_drop_last --compute_metrics_each_n_steps 1 --session_seq_length_max 20 --eval_on_last_item_seq_only --mf_constrained_embeddings --layer_norm_featurewise --attn_type bi --mlm --per_device_train_batch_size 128 --learning_rate 0.0006667377132554976 --dropout 0.0 --input_dropout 0.1 --weight_decay 3.910060265627374e-05 --d_model 192 --item_embedding_dim 448 --n_layer 3 --n_head 16 --label_smoothing 0.0 --stochastic_shared_embeddings_replacement_prob 0.1 --item_id_embeddings_init_std 0.11 --other_embeddings_init_std 0.02 --mlm_probability 0.30000000000000004 --eval_on_test_set --seed 100 --report_to none\n", + "`" + ] + }, + { + "cell_type": "markdown", + "id": "c0369401", + "metadata": {}, + "source": [ + "And here are the logs and the results, maybe reproducing that is something that we could work towards (the XLNet with MLM is what I used for benchmarking T4Rec, starting with it would be great)" + ] + }, + { + "cell_type": "markdown", + "id": "e26066be", + "metadata": {}, + "source": [ + "03/09/2023 04:21:44 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: True\n", + "03/09/20`23 04:21:45 - WARNING - transformers4rec - Projecting inputs of NextItemPredictionTask to'448' As weight tying requires the input dimension '192' to be equal to the item-id embedding dimension '448'\n", + "[INFO|trainer.py:434] 2023-03-09 04:21:45,787 >> Using amp fp16 backend\n", + "03/09/2023 04:21:45 - INFO - examples.t4rec_paper_experiments.t4r_paper_repro.exp_outputs - Training, Model and Data parameters {'data_path': '/transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/', 'features_schema_path': '/workspace/examples/t4rec_paper_experiments/datasets_configs/ecom_rees46/rees46_schema.pbtxt', 'start_time_window_index': 1, 'final_time_window_index': 2, 'time_window_folder_pad_digits': 4, 'no_incremental_training': False, 'training_time_window_size': 0, 'use_side_information_features': False, 'input_features_aggregation': 'concat', 'model_type': 'xlnet', 'tf_out_activation': 'tanh', 'mlm': True, 'mlm_probability': 0.30000000000000004, 'plm': False, 'plm_probability': 0.25, 'plm_max_span_length': 5, 'plm_mask_input': False, 'plm_permute_all': False, 'rtd': False, 'rtd_sample_from_batch': False, 'rtd_use_batch_interaction': False, 'rtd_discriminator_loss_weight': 50, 'rtd_generator_loss_weight': 1, 'rtd_tied_generator': False, 'd_model': 192, 'n_layer': 3, 'n_head': 16, 'layer_norm_eps': 1e-12, 'initializer_range': 0.02, 'hidden_act': 'gelu', 'dropout': 0.0, 'summary_type': 'last', 'num_hidden_groups': 1, 'inner_group_num': 1, 'eval_on_last_item_seq_only': True, 'train_on_last_item_seq_only': False, 'mf_constrained_embeddings': True, 'item_embedding_dim': 448, 'numeric_features_project_to_embedding_dim': 0, 'numeric_features_soft_one_hot_encoding_num_embeddings': 0, 'stochastic_shared_embeddings_replacement_prob': 0.1, 'softmax_temperature': 1.0, 'label_smoothing': 0.0, 'embedding_dim_from_cardinality_multiplier': 2.0, 'item_id_embeddings_init_std': 0.11, 'other_embeddings_init_std': 0.02, 'layer_norm_featurewise': True, 'attn_type': 'bi', 'input_dropout': 0.1, 'loss_type': 'cross_entropy', 'similarity_type': 'concat_mlp', 'inp_merge': 'mlp', 'learning_rate_warmup_steps': 0, 'avg_session_length': None, 'output_dir': './tmp/', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 0.0006667377132554976, 'weight_decay': 3.910060265627374e-05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './tmp/runs/Mar09_04-21-42_206f0524dae0', 'logging_first_step': False, 'logging_steps': 20, 'logging_nan_inf_filter': True, 'save_steps': 0, 'save_total_limit': None, 'save_on_each_node': False, 'no_cuda': False, 'seed': 100, 'fp16': True, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'xpu_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': True, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': [], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_token': None, 'gradient_checkpointing': False, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': None, '_n_gpu': 1, 'mp_parameters': '', 'max_sequence_length': 20, 'shuffle_buffer_size': 0, 'data_loader_engine': 'merlin', 'eval_on_test_set': True, 'eval_steps_on_train_set': 20, 'predict_top_k': 0, 'learning_rate_num_cosine_cycles_by_epoch': 1.25, 'log_predictions': False, 'compute_metrics_each_n_steps': 1, 'experiments_group': 'default', 'session_seq_length_max': 20, 'learning_rate_schedule': 'linear_with_warmup', 'validate_every': 10}\n", + "[INFO|trainer.py:1196] 2023-03-09 04:21:46,506 >> ***** Running training *****\n", + "[INFO|trainer.py:1197] 2023-03-09 04:21:46,506 >> Num examples = 86528\n", + "[INFO|trainer.py:1198] 2023-03-09 04:21:46,506 >> Num Epochs = 5\n", + "[INFO|trainer.py:1199] 2023-03-09 04:21:46,506 >> Instantaneous batch size per device = 128\n", + "[INFO|trainer.py:1200] 2023-03-09 04:21:46,506 >> Total train batch size (w. parallel, distributed & accumulation) = 128\n", + "[INFO|trainer.py:1201] 2023-03-09 04:21:46,506 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1202] 2023-03-09 04:21:46,506 >> Total optimization steps = 3380\n", + "DLL 2023-03-09 04:21:45.788371 - PARAMETER data_path : /transformers4rec/examples/t4rec_paper_experiments/t4r_paper_repro/ features_schema_path : /workspace/examples/t4rec_paper_experiments/datasets_configs/ecom_rees46/rees46_schema.pbtxt start_time_window_index : 1 final_time_window_index : 2 time_window_folder_pad_digits : 4 no_incremental_training : False training_time_window_size : 0 use_side_information_features : False input_features_aggregation : concat model_type : xlnet tf_out_activation : tanh mlm : True mlm_probability : 0.30000000000000004 plm : False plm_probability : 0.25 plm_max_span_length : 5 plm_mask_input : False plm_permute_all : False rtd : False rtd_sample_from_batch : False rtd_use_batch_interaction : False rtd_discriminator_loss_weight : 50 rtd_generator_loss_weight : 1 rtd_tied_generator : False d_model : 192 n_layer : 3 n_head : 16 layer_norm_eps : 1e-12 initializer_range : 0.02 hidden_act : gelu dropout : 0.0 summary_type : last num_hidden_groups : 1 inner_group_num : 1 eval_on_last_item_seq_only : True train_on_last_item_seq_only : False mf_constrained_embeddings : True item_embedding_dim : 448 numeric_features_project_to_embedding_dim : 0 numeric_features_soft_one_hot_encoding_num_embeddings : 0 stochastic_shared_embeddings_replacement_prob : 0.1 softmax_temperature : 1.0 label_smoothing : 0.0 embedding_dim_from_cardinality_multiplier : 2.0 item_id_embeddings_init_std : 0.11 other_embeddings_init_std : 0.02 layer_norm_featurewise : True attn_type : bi input_dropout : 0.1 loss_type : cross_entropy similarity_type : concat_mlp inp_merge : mlp learning_rate_warmup_steps : 0 avg_session_length : None output_dir : ./tmp/ overwrite_output_dir : True do_train : True do_eval : True do_predict : False prediction_loss_only : False per_device_train_batch_size : 128 per_device_eval_batch_size : 128 per_gpu_train_batch_size : None per_gpu_eval_batch_size : None gradient_accumulation_steps : 1 eval_accumulation_steps : None learning_rate : 0.0006667377132554976 weight_decay : 3.910060265627374e-05 adam_beta1 : 0.9 adam_beta2 : 0.999 adam_epsilon : 1e-08 max_grad_norm : 1.0 num_train_epochs : 5.0 max_steps : -1 lr_scheduler_type : linear warmup_ratio : 0.0 warmup_steps : 0 log_level : -1 log_level_replica : -1 log_on_each_node : True logging_dir : ./tmp/runs/Mar09_04-21-42_206f0524dae0 logging_first_step : False logging_steps : 20 logging_nan_inf_filter : True save_steps : 0 save_total_limit : None save_on_each_node : False no_cuda : False seed : 100 fp16 : True fp16_opt_level : O1 fp16_backend : auto fp16_full_eval : False local_rank : -1 xpu_backend : None tpu_num_cores : None tpu_metrics_debug : False debug : [] dataloader_drop_last : True eval_steps : None dataloader_num_workers : 0 past_index : -1 run_name : None disable_tqdm : False remove_unused_columns : True label_names : None load_best_model_at_end : False metric_for_best_model : None greater_is_better : None ignore_data_skip : False sharded_ddp : [] deepspeed : None label_smoothing_factor : 0.0 adafactor : False group_by_length : False length_column_name : length report_to : [] ddp_find_unused_parameters : None dataloader_pin_memory : True skip_memory_metrics : True use_legacy_prediction_loop : False push_to_hub : False resume_from_checkpoint : None hub_model_id : None hub_token : None gradient_checkpointing : False push_to_hub_model_id : None push_to_hub_organization : None push_to_hub_token : None _n_gpu : 1 mp_parameters : max_sequence_length : 20 shuffle_buffer_size : 0 data_loader_engine : merlin eval_on_test_set : True eval_steps_on_train_set : 20 predict_top_k : 0 learning_rate_num_cosine_cycles_by_epoch : 1.25 log_predictions : False compute_metrics_each_n_steps : 1 experiments_group : default session_seq_length_max : 20 learning_rate_schedule : linear_with_warmup validate_every : 10 \n", + "\n", + "***** Launch training for day 1: *****\n", + "{'loss': 12.9123, 'learning_rate': 0.0006627925196859384, 'epoch': 0.03}\n", + "{'loss': 12.4709, 'learning_rate': 0.0006588473261163793, 'epoch': 0.06}\n", + "{'loss': 11.5016, 'learning_rate': 0.0006549021325468202, 'epoch': 0.09}\n", + "{'loss': 10.9435, 'learning_rate': 0.0006509569389772609, 'epoch': 0.12}\n", + "{'loss': 10.4956, 'learning_rate': 0.0006470117454077018, 'epoch': 0.15}\n", + "{'loss': 10.3446, 'learning_rate': 0.0006430665518381426, 'epoch': 0.18}\n", + "{'loss': 10.1993, 'learning_rate': 0.0006391213582685835, 'epoch': 0.21}\n", + "{'loss': 10.0643, 'learning_rate': 0.0006351761646990243, 'epoch': 0.24}\n", + "{'loss': 10.0089, 'learning_rate': 0.0006312309711294651, 'epoch': 0.27}\n", + "{'loss': 9.8635, 'learning_rate': 0.000627285777559906, 'epoch': 0.3}\n", + "{'loss': 9.9116, 'learning_rate': 0.0006233405839903469, 'epoch': 0.33}\n", + "{'loss': 9.8111, 'learning_rate': 0.0006193953904207876, 'epoch': 0.36}\n", + "{'loss': 9.9284, 'learning_rate': 0.0006154501968512286, 'epoch': 0.38}\n", + "{'loss': 9.8935, 'learning_rate': 0.0006115050032816694, 'epoch': 0.41}\n", + "{'loss': 9.8119, 'learning_rate': 0.0006075598097121102, 'epoch': 0.44}\n", + "{'loss': 9.7587, 'learning_rate': 0.000603614616142551, 'epoch': 0.47}\n", + "{'loss': 9.6956, 'learning_rate': 0.000599669422572992, 'epoch': 0.5}\n", + "{'loss': 9.7389, 'learning_rate': 0.0005957242290034327, 'epoch': 0.53}\n", + "{'loss': 9.6166, 'learning_rate': 0.0005917790354338736, 'epoch': 0.56}\n", + "{'loss': 9.5585, 'learning_rate': 0.0005878338418643144, 'epoch': 0.59}\n", + "{'loss': 9.3571, 'learning_rate': 0.0005838886482947553, 'epoch': 0.62}\n", + "{'loss': 9.5001, 'learning_rate': 0.0005799434547251961, 'epoch': 0.65}\n", + "{'loss': 9.532, 'learning_rate': 0.0005759982611556369, 'epoch': 0.68}\n", + "{'loss': 9.5373, 'learning_rate': 0.0005720530675860778, 'epoch': 0.71}\n", + "{'loss': 9.4494, 'learning_rate': 0.0005681078740165187, 'epoch': 0.74}\n", + "{'loss': 9.465, 'learning_rate': 0.0005641626804469595, 'epoch': 0.77}\n", + "{'loss': 9.5593, 'learning_rate': 0.0005602174868774003, 'epoch': 0.8}\n", + "{'loss': 9.3824, 'learning_rate': 0.0005562722933078411, 'epoch': 0.83}\n", + "{'loss': 9.3634, 'learning_rate': 0.000552327099738282, 'epoch': 0.86}\n", + "{'loss': 9.3981, 'learning_rate': 0.0005483819061687229, 'epoch': 0.89}\n", + "{'loss': 9.419, 'learning_rate': 0.0005444367125991636, 'epoch': 0.92}\n", + "{'loss': 9.3024, 'learning_rate': 0.0005404915190296046, 'epoch': 0.95}\n", + "{'loss': 9.375, 'learning_rate': 0.0005365463254600454, 'epoch': 0.98}\n", + "{'loss': 9.4292, 'learning_rate': 0.0005326011318904862, 'epoch': 1.01}\n", + "{'loss': 9.018, 'learning_rate': 0.0005286559383209271, 'epoch': 1.04}\n", + "{'loss': 9.2277, 'learning_rate': 0.000524710744751368, 'epoch': 1.07}\n", + "{'loss': 9.1066, 'learning_rate': 0.0005207655511818087, 'epoch': 1.09}\n", + "{'loss': 9.1126, 'learning_rate': 0.0005168203576122496, 'epoch': 1.12}\n", + "{'loss': 9.0821, 'learning_rate': 0.0005128751640426904, 'epoch': 1.15}\n", + "{'loss': 9.0789, 'learning_rate': 0.0005089299704731313, 'epoch': 1.18}\n", + "{'loss': 9.0374, 'learning_rate': 0.0005049847769035721, 'epoch': 1.21}\n", + "{'loss': 9.1187, 'learning_rate': 0.0005010395833340129, 'epoch': 1.24}\n", + "{'loss': 9.1388, 'learning_rate': 0.0004970943897644538, 'epoch': 1.27}\n", + "{'loss': 9.0866, 'learning_rate': 0.0004931491961948947, 'epoch': 1.3}\n", + "{'loss': 9.112, 'learning_rate': 0.0004892040026253355, 'epoch': 1.33}\n", + "{'loss': 9.0176, 'learning_rate': 0.0004852588090557764, 'epoch': 1.36}\n", + "{'loss': 9.0055, 'learning_rate': 0.0004813136154862172, 'epoch': 1.39}\n", + "{'loss': 9.0298, 'learning_rate': 0.000477368421916658, 'epoch': 1.42}\n", + "{'loss': 9.0415, 'learning_rate': 0.0004734232283470988, 'epoch': 1.45}\n", + "{'loss': 9.0309, 'learning_rate': 0.00046947803477753974, 'epoch': 1.48}\n", + "{'loss': 8.9989, 'learning_rate': 0.00046553284120798055, 'epoch': 1.51}\n", + "{'loss': 8.9873, 'learning_rate': 0.00046158764763842136, 'epoch': 1.54}\n", + "{'loss': 9.01, 'learning_rate': 0.0004576424540688622, 'epoch': 1.57}\n", + "{'loss': 8.904, 'learning_rate': 0.0004536972604993031, 'epoch': 1.6}\n", + "{'loss': 8.8774, 'learning_rate': 0.0004497520669297439, 'epoch': 1.63}\n", + "{'loss': 9.0187, 'learning_rate': 0.0004458068733601847, 'epoch': 1.66}\n", + "{'loss': 9.0117, 'learning_rate': 0.00044186167979062564, 'epoch': 1.69}\n", + "{'loss': 9.0067, 'learning_rate': 0.00043791648622106645, 'epoch': 1.72}\n", + "{'loss': 8.9619, 'learning_rate': 0.00043397129265150726, 'epoch': 1.75}\n", + "{'loss': 9.0377, 'learning_rate': 0.0004300260990819481, 'epoch': 1.78}\n", + "{'loss': 8.8861, 'learning_rate': 0.000426080905512389, 'epoch': 1.8}\n", + "{'loss': 8.9635, 'learning_rate': 0.0004221357119428298, 'epoch': 1.83}\n", + "{'loss': 8.9096, 'learning_rate': 0.00041819051837327067, 'epoch': 1.86}\n", + "{'loss': 8.981, 'learning_rate': 0.0004142453248037115, 'epoch': 1.89}\n", + "{'loss': 8.925, 'learning_rate': 0.00041030013123415234, 'epoch': 1.92}\n", + "{'loss': 8.877, 'learning_rate': 0.0004063549376645932, 'epoch': 1.95}\n", + "{'loss': 8.9213, 'learning_rate': 0.000402409744095034, 'epoch': 1.98}\n", + "{'loss': 8.9171, 'learning_rate': 0.00039846455052547494, 'epoch': 2.01}\n", + "{'loss': 8.7143, 'learning_rate': 0.00039451935695591575, 'epoch': 2.04}\n", + "{'loss': 8.7358, 'learning_rate': 0.00039057416338635656, 'epoch': 2.07}\n", + "{'loss': 8.7629, 'learning_rate': 0.00038662896981679737, 'epoch': 2.1}\n", + "{'loss': 8.7493, 'learning_rate': 0.0003826837762472383, 'epoch': 2.13}\n", + "{'loss': 8.8725, 'learning_rate': 0.0003787385826776791, 'epoch': 2.16}\n", + "{'loss': 8.5959, 'learning_rate': 0.0003747933891081199, 'epoch': 2.19}\n", + "{'loss': 8.7501, 'learning_rate': 0.0003708481955385607, 'epoch': 2.22}\n", + "{'loss': 8.758, 'learning_rate': 0.00036690300196900164, 'epoch': 2.25}\n", + "{'loss': 8.908, 'learning_rate': 0.00036295780839944245, 'epoch': 2.28}\n", + "{'loss': 8.7367, 'learning_rate': 0.00035901261482988326, 'epoch': 2.31}\n", + "{'loss': 8.7486, 'learning_rate': 0.0003550674212603242, 'epoch': 2.34}\n", + "{'loss': 8.7116, 'learning_rate': 0.000351122227690765, 'epoch': 2.37}\n", + "{'loss': 8.6919, 'learning_rate': 0.0003471770341212058, 'epoch': 2.4}\n", + "{'loss': 8.7932, 'learning_rate': 0.00034323184055164667, 'epoch': 2.43}\n", + "{'loss': 8.7448, 'learning_rate': 0.00033928664698208754, 'epoch': 2.46}\n", + "{'loss': 8.8504, 'learning_rate': 0.00033534145341252835, 'epoch': 2.49}\n", + "{'loss': 8.6369, 'learning_rate': 0.0003313962598429692, 'epoch': 2.51}\n", + "{'loss': 8.7453, 'learning_rate': 0.0003274510662734101, 'epoch': 2.54}\n", + "{'loss': 8.7315, 'learning_rate': 0.0003235058727038509, 'epoch': 2.57}\n", + "{'loss': 8.6411, 'learning_rate': 0.00031956067913429176, 'epoch': 2.6}\n", + "{'loss': 8.5762, 'learning_rate': 0.00031561548556473257, 'epoch': 2.63}\n", + "{'loss': 8.642, 'learning_rate': 0.00031167029199517343, 'epoch': 2.66}\n", + "{'loss': 8.7194, 'learning_rate': 0.0003077250984256143, 'epoch': 2.69}\n", + "{'loss': 8.627, 'learning_rate': 0.0003037799048560551, 'epoch': 2.72}\n", + "{'loss': 8.7215, 'learning_rate': 0.000299834711286496, 'epoch': 2.75}\n", + "{'loss': 8.707, 'learning_rate': 0.0002958895177169368, 'epoch': 2.78}\n", + "{'loss': 8.5898, 'learning_rate': 0.00029194432414737765, 'epoch': 2.81}\n", + "{'loss': 8.7476, 'learning_rate': 0.00028799913057781846, 'epoch': 2.84}\n", + "{'loss': 8.6637, 'learning_rate': 0.0002840539370082593, 'epoch': 2.87}\n", + "{'loss': 8.5057, 'learning_rate': 0.00028010874343870014, 'epoch': 2.9}\n", + "{'loss': 8.6891, 'learning_rate': 0.000276163549869141, 'epoch': 2.93}\n", + "{'loss': 8.696, 'learning_rate': 0.0002722183562995818, 'epoch': 2.96}\n", + "{'loss': 8.5416, 'learning_rate': 0.0002682731627300227, 'epoch': 2.99}\n", + "{'loss': 8.592, 'learning_rate': 0.00026432796916046354, 'epoch': 3.02}\n", + "{'loss': 8.5271, 'learning_rate': 0.00026038277559090435, 'epoch': 3.05}\n", + "{'loss': 8.4965, 'learning_rate': 0.0002564375820213452, 'epoch': 3.08}\n", + "{'loss': 8.5365, 'learning_rate': 0.00025249238845178603, 'epoch': 3.11}\n", + "{'loss': 8.5022, 'learning_rate': 0.0002485471948822269, 'epoch': 3.14}\n", + "{'loss': 8.4691, 'learning_rate': 0.00024460200131266776, 'epoch': 3.17}\n", + "{'loss': 8.5848, 'learning_rate': 0.0002406568077431086, 'epoch': 3.2}\n", + "{'loss': 8.5176, 'learning_rate': 0.0002367116141735494, 'epoch': 3.22}\n", + "{'loss': 8.6456, 'learning_rate': 0.00023276642060399028, 'epoch': 3.25}\n", + "{'loss': 8.6207, 'learning_rate': 0.0002288212270344311, 'epoch': 3.28}\n", + "{'loss': 8.5979, 'learning_rate': 0.00022487603346487195, 'epoch': 3.31}\n", + "{'loss': 8.4435, 'learning_rate': 0.00022093083989531282, 'epoch': 3.34}\n", + "{'loss': 8.4809, 'learning_rate': 0.00021698564632575363, 'epoch': 3.37}\n", + "{'loss': 8.58, 'learning_rate': 0.0002130404527561945, 'epoch': 3.4}\n", + "{'loss': 8.4629, 'learning_rate': 0.00020909525918663533, 'epoch': 3.43}\n", + "{'loss': 8.4935, 'learning_rate': 0.00020515006561707617, 'epoch': 3.46}\n", + "{'loss': 8.4126, 'learning_rate': 0.000201204872047517, 'epoch': 3.49}\n", + "{'loss': 8.4416, 'learning_rate': 0.00019725967847795787, 'epoch': 3.52}\n", + "{'loss': 8.4937, 'learning_rate': 0.00019331448490839869, 'epoch': 3.55}\n", + "{'loss': 8.5058, 'learning_rate': 0.00018936929133883955, 'epoch': 3.58}\n", + "{'loss': 8.5741, 'learning_rate': 0.00018542409776928036, 'epoch': 3.61}\n", + "{'loss': 8.4768, 'learning_rate': 0.00018147890419972123, 'epoch': 3.64}\n", + "{'loss': 8.4054, 'learning_rate': 0.0001775337106301621, 'epoch': 3.67}\n", + "{'loss': 8.5333, 'learning_rate': 0.0001735885170606029, 'epoch': 3.7}\n", + "{'loss': 8.3779, 'learning_rate': 0.00016964332349104377, 'epoch': 3.73}\n", + "{'loss': 8.3714, 'learning_rate': 0.0001656981299214846, 'epoch': 3.76}\n", + "{'loss': 8.4416, 'learning_rate': 0.00016175293635192544, 'epoch': 3.79}\n", + "{'loss': 8.502, 'learning_rate': 0.00015780774278236628, 'epoch': 3.82}\n", + "{'loss': 8.4547, 'learning_rate': 0.00015386254921280715, 'epoch': 3.85}\n", + "{'loss': 8.4987, 'learning_rate': 0.000149917355643248, 'epoch': 3.88}\n", + "{'loss': 8.4498, 'learning_rate': 0.00014597216207368882, 'epoch': 3.91}\n", + "{'loss': 8.4753, 'learning_rate': 0.00014202696850412966, 'epoch': 3.93}\n", + "{'loss': 8.4321, 'learning_rate': 0.0001380817749345705, 'epoch': 3.96}\n", + "{'loss': 8.4252, 'learning_rate': 0.00013413658136501134, 'epoch': 3.99}\n", + "{'loss': 8.3991, 'learning_rate': 0.00013019138779545218, 'epoch': 4.02}\n", + "{'loss': 8.3454, 'learning_rate': 0.00012624619422589302, 'epoch': 4.05}\n", + "{'loss': 8.3294, 'learning_rate': 0.00012230100065633388, 'epoch': 4.08}\n", + "{'loss': 8.3815, 'learning_rate': 0.0001183558070867747, 'epoch': 4.11}\n", + "{'loss': 8.331, 'learning_rate': 0.00011441061351721554, 'epoch': 4.14}\n", + "{'loss': 8.3407, 'learning_rate': 0.00011046541994765641, 'epoch': 4.17}\n", + "{'loss': 8.4289, 'learning_rate': 0.00010652022637809725, 'epoch': 4.2}\n", + "{'loss': 8.405, 'learning_rate': 0.00010257503280853809, 'epoch': 4.23}\n", + "{'loss': 8.4328, 'learning_rate': 9.862983923897894e-05, 'epoch': 4.26}\n", + "{'loss': 8.4265, 'learning_rate': 9.468464566941978e-05, 'epoch': 4.29}\n", + "{'loss': 8.2568, 'learning_rate': 9.073945209986061e-05, 'epoch': 4.32}\n", + "{'loss': 8.4031, 'learning_rate': 8.679425853030145e-05, 'epoch': 4.35}\n", + "{'loss': 8.3285, 'learning_rate': 8.28490649607423e-05, 'epoch': 4.38}\n", + "{'loss': 8.3277, 'learning_rate': 7.890387139118314e-05, 'epoch': 4.41}\n", + "{'loss': 8.2869, 'learning_rate': 7.4958677821624e-05, 'epoch': 4.44}\n", + "{'loss': 8.4278, 'learning_rate': 7.101348425206483e-05, 'epoch': 4.47}\n", + "{'loss': 8.3403, 'learning_rate': 6.706829068250567e-05, 'epoch': 4.5}\n", + "{'loss': 8.4259, 'learning_rate': 6.312309711294651e-05, 'epoch': 4.53}\n", + "{'loss': 8.3813, 'learning_rate': 5.917790354338735e-05, 'epoch': 4.56}\n", + "{'loss': 8.2961, 'learning_rate': 5.5232709973828204e-05, 'epoch': 4.59}\n", + "{'loss': 8.3352, 'learning_rate': 5.128751640426904e-05, 'epoch': 4.62}\n", + "{'loss': 8.3326, 'learning_rate': 4.734232283470989e-05, 'epoch': 4.64}\n", + "{'loss': 8.3014, 'learning_rate': 4.3397129265150726e-05, 'epoch': 4.67}\n", + "{'loss': 8.358, 'learning_rate': 3.945193569559157e-05, 'epoch': 4.7}\n", + "{'loss': 8.4064, 'learning_rate': 3.5506742126032416e-05, 'epoch': 4.73}\n", + "{'loss': 8.2876, 'learning_rate': 3.1561548556473254e-05, 'epoch': 4.76}\n", + "{'loss': 8.3134, 'learning_rate': 2.7616354986914102e-05, 'epoch': 4.79}\n", + "{'loss': 8.1968, 'learning_rate': 2.3671161417354944e-05, 'epoch': 4.82}\n", + "{'loss': 8.3942, 'learning_rate': 1.9725967847795785e-05, 'epoch': 4.85}\n", + "{'loss': 8.3894, 'learning_rate': 1.5780774278236627e-05, 'epoch': 4.88}\n", + "{'loss': 8.3288, 'learning_rate': 1.1835580708677472e-05, 'epoch': 4.91}\n", + "{'loss': 8.3074, 'learning_rate': 7.890387139118313e-06, 'epoch': 4.94}\n", + "{'loss': 8.3924, 'learning_rate': 3.945193569559157e-06, 'epoch': 4.97}\n", + "{'loss': 8.3114, 'learning_rate': 0.0, 'epoch': 5.0}\n", + "{'train_runtime': 268.0183, 'train_samples_per_second': 0.019, 'train_steps_per_second': 12.611, 'train_loss': 8.906013858953171, 'epoch': 5.0}\n", + "\n", + "***** Evaluation results for day 2 (train set):*****\n", + "\n", + "{'train_/next-item/ndcg_at_10': 0.08422642946243286, 'train_/next-item/ndcg_at_20': 0.10001382976770401, 'train_/next-item/recall_at_10': 0.15468750894069672, 'train_/next-item/recall_at_20': 0.21757812798023224, 'train_/loss': 7.968885898590088, 'train_runtime': 0.6484, 'train_samples_per_second': 3948.003, 'train_steps_per_second': 30.844}\n", + "\n", + "***** Evaluation results for day 2 (eval set):*****\n", + "\n", + "{'eval_/next-item/ndcg_at_10': 0.08305524289608002, 'eval_/next-item/ndcg_at_20': 0.09936655312776566, 'eval_/next-item/recall_at_10': 0.15436746180057526, 'eval_/next-item/recall_at_20': 0.2190323770046234, 'eval_/loss': 8.334789276123047, 'eval_runtime': 2.2443, 'eval_samples_per_second': 4733.773, 'eval_steps_per_second': 36.983}" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "54d6ef61", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Models\n", + " * [new branch] ci/horovod -> origin/ci/horovod\n", + " * [new branch] codespell_fix -> origin/codespell_fix\n", + " 16fb4149..b1c10317 fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", + " 95462360..a69adf75 gh-pages -> origin/gh-pages\n", + " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", + " 835ad186..e7fe759c main -> origin/main\n", + " * [new branch] mtl_example -> origin/mtl_example\n", + " cb431a8a..b90e9a1b release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", + " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", + " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", + " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", + " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", + " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", + " + 169f3df5...06eecddd tf/output-block -> origin/tf/output-block (forced update)\n", + " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", + " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", + " * [new branch] tf/transformer-api -> origin/tf/transformer-api\n", + " + 0a65d603...9f53e8ff update_07 -> origin/update_07 (forced update)\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + "Previous HEAD position was cb431a8a Fix the serialization of `SequenceSummary` block (#927)\n", + "HEAD is now at a92bdc24 adjust sample_weights to targets shape\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing /models\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+6.ga92bdc24) (0.10.0)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+6.ga92bdc24) (0.0.4)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.56.4)\n", + "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.5)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (3.19.6)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (22.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.5.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (8.0.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.2.5)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.12.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.2.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.12.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (5.2.0)\n", + "Requirement already satisfied: numpy<1.24,>=1.18 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.22.4)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.1)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (8.1.3)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (3.1.2)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.4.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.7.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.26.13)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.2.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.4)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (5.9.4)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.57.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.14.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.1)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0.4)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0.1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Building wheels for collected packages: merlin-models\n", + " Building wheel for merlin-models (PEP 517): started\n", + " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-models: filename=merlin_models-23.2.0+6.ga92bdc24-py3-none-any.whl size=374609 sha256=2aa872a5f1575151273bcc94d5c4b0205a1f22af84ab44d48d9f75d74f9daa93\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-l8ge0dm1/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + "Successfully built merlin-models\n", + "Installing collected packages: merlin-models\n", + " Attempting uninstall: merlin-models\n", + " Found existing installation: merlin-models 0.11.0\n", + " Uninstalling merlin-models-0.11.0:\n", + " Successfully uninstalled merlin-models-0.11.0\n", + "Successfully installed merlin-models-23.2.0+6.ga92bdc24\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was 2fc6889 add schema parameter to the `repartition` method (#192)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/core\n", + " * branch main -> FETCH_HEAD\n", + " cd96ca5f..aad0c874 main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating cd96ca5f..aad0c874\n", + "Fast-forward\n", + " .github/release-drafter.yml | 44 +--\n", + " .github/workflows/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", + " .../ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", + " .../workflows/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", + " .../workflows/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/workflows/ISSUE_TEMPLATE/task.md | 5 +-\n", + " .github/workflows/cpu-ci.yml | 145 +++-------\n", + " .github/workflows/cpu-models.yml | 52 ++--\n", + " .github/workflows/cpu-nvtabular.yml | 52 ++--\n", + " .github/workflows/cpu-packages.yml | 126 +++++++++\n", + " .github/workflows/cpu-systems.yml | 52 ++--\n", + " .github/workflows/docs-preview-pr.yaml | 2 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 30 +-\n", + " .github/workflows/release-drafter.yaml | 2 +-\n", + " .pre-commit-config.yaml | 55 ++--\n", + " .prettierignore | 2 +\n", + " CLA.md | 9 +-\n", + " CONTRIBUTING.md | 28 +-\n", + " README.md | 68 ++---\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " docs/README.md | 49 ++--\n", + " merlin/core/compat.py | 59 +++-\n", + " merlin/core/dispatch.py | 51 +++-\n", + " merlin/dag/__init__.py | 1 +\n", + " merlin/dag/base_operator.py | 30 +-\n", + " merlin/dag/dictarray.py | 3 +-\n", + " merlin/dag/executors.py | 107 ++++---\n", + " merlin/dag/graph.py | 20 ++\n", + " merlin/dag/node.py | 2 +-\n", + " merlin/dag/utils.py | 69 +++++\n", + " merlin/dispatch/lazy.py | 152 ++++++++++\n", + " merlin/dtypes/__init__.py | 60 ++++\n", + " merlin/dtypes/aliases.py | 52 ++++\n", + " merlin/dtypes/base.py | 178 ++++++++++++\n", + " merlin/dtypes/mapping.py | 173 ++++++++++++\n", + " merlin/dtypes/mappings/__init__.py | 18 ++\n", + " merlin/dtypes/mappings/cudf.py | 57 ++++\n", + " merlin/dtypes/mappings/numpy.py | 52 ++++\n", + " merlin/dtypes/mappings/pandas.py | 38 +++\n", + " merlin/dtypes/mappings/python.py | 31 ++\n", + " merlin/dtypes/mappings/tf.py | 52 ++++\n", + " merlin/dtypes/mappings/torch.py | 43 +++\n", + " merlin/dtypes/mappings/triton.py | 53 ++++\n", + " merlin/dtypes/registry.py | 142 ++++++++++\n", + " merlin/dtypes/shape.py | 183 ++++++++++++\n", + " merlin/io/avro.py | 4 -\n", + " merlin/io/csv.py | 1 -\n", + " merlin/io/dask.py | 6 +-\n", + " merlin/io/dataset.py | 19 +-\n", + " merlin/io/fsspec_utils.py | 8 +-\n", + " merlin/io/parquet.py | 8 -\n", + " merlin/io/writer.py | 1 -\n", + " merlin/schema/io/tensorflow_metadata.py | 86 +++---\n", + " merlin/schema/schema.py | 298 +++++++++++---------\n", + " merlin/table/__init__.py | 24 ++\n", + " merlin/table/conversions.py | 135 +++++++++\n", + " merlin/table/cupy_column.py | 92 ++++++\n", + " merlin/table/numpy_column.py | 100 +++++++\n", + " merlin/table/tensor_column.py | 217 ++++++++++++++\n", + " merlin/table/tensor_table.py | 222 +++++++++++++++\n", + " merlin/table/tensorflow_column.py | 159 +++++++++++\n", + " merlin/table/torch_column.py | 124 ++++++++\n", + " requirements.txt | 5 +-\n", + " tests/conftest.py | 16 +-\n", + " tests/unit/core/test_dispatch.py | 19 ++\n", + " tests/unit/core/test_version.py | 4 +\n", + " tests/unit/dag/test_dag_utils.py | 31 ++\n", + " tests/unit/dispatch/test_lazy_dispatch.py | 61 ++++\n", + " tests/unit/dtypes/test_module.py | 48 ++++\n", + " tests/unit/dtypes/test_shape.py | 222 +++++++++++++++\n", + " tests/unit/io/test_io.py | 27 +-\n", + " tests/unit/schema/test_column_schemas.py | 142 ++++++----\n", + " tests/unit/schema/test_schema.py | 7 +-\n", + " tests/unit/schema/test_schema_io.py | 27 +-\n", + " tests/unit/table/test_convert_column.py | 75 +++++\n", + " tests/unit/table/test_tensor_column.py | 186 ++++++++++++\n", + " tests/unit/table/test_tensor_table.py | 311 +++++++++++++++++++++\n", + " tests/unit/utils/test_utils.py | 3 -\n", + " tox.ini | 4 +\n", + " 80 files changed, 4413 insertions(+), 672 deletions(-)\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .prettierignore\n", + " create mode 100644 merlin/dag/utils.py\n", + " create mode 100644 merlin/dispatch/lazy.py\n", + " create mode 100644 merlin/dtypes/__init__.py\n", + " create mode 100644 merlin/dtypes/aliases.py\n", + " create mode 100644 merlin/dtypes/base.py\n", + " create mode 100644 merlin/dtypes/mapping.py\n", + " create mode 100644 merlin/dtypes/mappings/__init__.py\n", + " create mode 100644 merlin/dtypes/mappings/cudf.py\n", + " create mode 100644 merlin/dtypes/mappings/numpy.py\n", + " create mode 100644 merlin/dtypes/mappings/pandas.py\n", + " create mode 100644 merlin/dtypes/mappings/python.py\n", + " create mode 100644 merlin/dtypes/mappings/tf.py\n", + " create mode 100644 merlin/dtypes/mappings/torch.py\n", + " create mode 100644 merlin/dtypes/mappings/triton.py\n", + " create mode 100644 merlin/dtypes/registry.py\n", + " create mode 100644 merlin/dtypes/shape.py\n", + " create mode 100644 merlin/table/__init__.py\n", + " create mode 100644 merlin/table/conversions.py\n", + " create mode 100644 merlin/table/cupy_column.py\n", + " create mode 100644 merlin/table/numpy_column.py\n", + " create mode 100644 merlin/table/tensor_column.py\n", + " create mode 100644 merlin/table/tensor_table.py\n", + " create mode 100644 merlin/table/tensorflow_column.py\n", + " create mode 100644 merlin/table/torch_column.py\n", + " create mode 100644 tests/unit/dag/test_dag_utils.py\n", + " create mode 100644 tests/unit/dispatch/test_lazy_dispatch.py\n", + " create mode 100644 tests/unit/dtypes/test_module.py\n", + " create mode 100644 tests/unit/dtypes/test_shape.py\n", + " create mode 100644 tests/unit/table/test_convert_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_table.py\n", + "Processing /core\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.5.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (22.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (11.4.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (3.19.6)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.3.5)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (8.0.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (0.56.4)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.2.5)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.12.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (4.64.1)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (2.8.2)\n", + "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.22.4)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (45.2.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (6.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (0.12.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.3.0)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (0.4.3)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.7.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (6.1)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.26.13)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.4)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (5.9.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.4.0)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (8.1.3)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.2.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (3.1.2)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.3.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (3.11.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (6.0.4)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.1.1)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (4.0.0)\n", + "Building wheels for collected packages: merlin-core\n", + " Building wheel for merlin-core (PEP 517): started\n", + " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-core: filename=merlin_core-0.9.0+56.gaad0c874-py3-none-any.whl size=152601 sha256=e6e379a2bc1756cddf2a2ed74086c0071fd68f95bba9432dae3f8096116fbb8a\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-nvai80xu/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + "Successfully built merlin-core\n", + "Installing collected packages: merlin-core\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 0.10.0\n", + " Uninstalling merlin-core-0.10.0:\n", + " Successfully uninstalled merlin-core-0.10.0\n", + "Successfully installed merlin-core-0.9.0+56.gaad0c874\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was 020b24b7 Fix output error occurring due to check if it is a dict or not (#1742)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/NVTabular\n", + " * branch main -> FETCH_HEAD\n", + " c5bc4098..9b186ee9 main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating c5bc4098..9b186ee9\n", + "Fast-forward\n", + " .github/ISSUE_TEMPLATE/bug_report.md | 11 +-\n", + " .github/ISSUE_TEMPLATE/documentation-request.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/feature_request.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/operator_request.md | 14 +-\n", + " .github/ISSUE_TEMPLATE/research_question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/task.md | 4 +-\n", + " .github/release-drafter.yml | 44 ++--\n", + " .github/workflows/blossom-ci.yml | 230 ++++++++++-----------\n", + " .github/workflows/conda-env-create.yml | 30 +--\n", + " .github/workflows/cpu-ci.yml | 138 -------------\n", + " .github/workflows/cpu-packages.yml | 132 ++++++++++++\n", + " .github/workflows/cpu-tests.yml | 69 +++++++\n", + " .github/workflows/docs-preview-pr.yaml | 2 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 6 +-\n", + " .github/workflows/gpu-ci.yml | 30 ---\n", + " .github/workflows/gpu-tests.yml | 30 +++\n", + " .gitlab-ci.yml | 23 +--\n", + " .pre-commit-config.yaml | 47 +++--\n", + " .prettierignore | 2 +\n", + " CHANGELOG.md | 187 ++++++++---------\n", + " CONTRIBUTING.md | 30 +--\n", + " README.md | 48 ++---\n", + " bench/datasets/tools/train_tensorflow.py | 1 -\n", + " bench/examples/MultiGPUBench.md | 67 +++---\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/environments/nvtabular_aws_sagemaker.yml | 2 +-\n", + " docs/README.md | 18 +-\n", + " docs/source/core_features.md | 48 ++---\n", + " docs/source/resources/architecture.md | 17 +-\n", + " docs/source/resources/cloud_integration.md | 24 ++-\n", + " docs/source/resources/links.md | 40 ++--\n", + " docs/source/toc.yaml | 12 +-\n", + " examples/01-Getting-started.ipynb | 5 +-\n", + " examples/02-Advanced-NVTabular-workflow.ipynb | 5 +-\n", + " .../03-Running-on-multiple-GPUs-or-on-CPU.ipynb | 5 +-\n", + " examples/README.md | 1 +\n", + " nvtabular/inference/__init__.py | 4 +-\n", + " nvtabular/inference/triton/ensemble.py | 86 ++------\n", + " nvtabular/inference/triton/model/model_pt.py | 1 -\n", + " nvtabular/inference/workflow/hugectr.py | 2 +-\n", + " nvtabular/loader/backend.py | 31 +--\n", + " nvtabular/loader/tensorflow.py | 1 +\n", + " nvtabular/ops/categorify.py | 2 -\n", + " nvtabular/ops/groupby.py | 35 ++--\n", + " nvtabular/ops/join_external.py | 1 -\n", + " nvtabular/ops/join_groupby.py | 18 +-\n", + " nvtabular/ops/list_slice.py | 22 +-\n", + " nvtabular/ops/moments.py | 2 -\n", + " nvtabular/ops/reduce_dtype_size.py | 9 +-\n", + " nvtabular/ops/value_counts.py | 14 +-\n", + " nvtabular/workflow/workflow.py | 113 +++++++++-\n", + " requirements-test.txt | 2 -\n", + " requirements/test.txt | 3 +-\n", + " setup.py | 5 +\n", + " tests/conftest.py | 1 -\n", + " .../test_02-Advanced-NVTabular-workflow.py | 12 +-\n", + " tests/unit/ops/test_column_similarity.py | 1 -\n", + " tests/unit/ops/test_groupyby.py | 2 +-\n", + " tests/unit/ops/test_lambda.py | 28 ++-\n", + " tests/unit/ops/test_ops_schema.py | 25 ++-\n", + " tests/unit/ops/test_value_count.py | 2 +\n", + " tests/unit/workflow/test_workflow.py | 75 ++++++-\n", + " tox.ini | 9 +-\n", + " 64 files changed, 1056 insertions(+), 786 deletions(-)\n", + " delete mode 100644 .github/workflows/cpu-ci.yml\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .github/workflows/cpu-tests.yml\n", + " delete mode 100644 .github/workflows/gpu-ci.yml\n", + " create mode 100644 .github/workflows/gpu-tests.yml\n", + " create mode 100644 .prettierignore\n", + " delete mode 100644 requirements-test.txt\n", + "Processing /nvtabular\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.9.0+56.gaad0c874)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.0.4)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (1.9.3)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.64.1)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.5)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.56.4)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.5.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (22.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (11.4.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.5)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.12.0)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.0.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.19.6)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", + "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+42.g9b186ee9) (1.22.4)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.7.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.1)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.4.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.9.4)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.4)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.1.2)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.1.3)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.12.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.26.13)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.39.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.57.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.4)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.1.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.14.0)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.1)\n", + "Building wheels for collected packages: nvtabular\n", + " Building wheel for nvtabular (PEP 517): started\n", + " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", + " Created wheel for nvtabular: filename=nvtabular-1.6.0+42.g9b186ee9-cp38-cp38-linux_x86_64.whl size=258506 sha256=33bd39a7ce6bd4d1b7e81ef0ecd16abcffc75944d1a9a8510902f42658baf22e\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ws2h8usp/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + "Successfully built nvtabular\n", + "Installing collected packages: nvtabular\n", + " Attempting uninstall: nvtabular\n", + " Found existing installation: nvtabular 1.8.0\n", + " Uninstalling nvtabular-1.8.0:\n", + " Successfully uninstalled nvtabular-1.8.0\n", + "Successfully installed nvtabular-1.6.0+42.g9b186ee9\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was feaf748 adding async tf strategy for gpu memory (#264)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/systems\n", + " * branch main -> FETCH_HEAD\n", + " 20bb231..329cba4 main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating 20bb231..329cba4\n", + "Fast-forward\n", + " .github/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", + " .github/ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", + " .github/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", + " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/task.md | 5 +-\n", + " .github/release-drafter.yml | 44 +-\n", + " .github/workflows/cpu-ci.yml | 112 ++--\n", + " .github/workflows/docs-preview-pr.yaml | 2 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 32 +-\n", + " .github/workflows/lint.yaml | 12 +-\n", + " .github/workflows/release-drafter.yml | 2 +-\n", + " .pre-commit-config.yaml | 71 +-\n", + " .prettierignore | 2 +\n", + " CLA.md | 9 +-\n", + " CONTRIBUTING.md | 2 +-\n", + " README.md | 2 +-\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " docs/README.md | 53 +-\n", + " ...ing-An-Implicit-Model-With-Merlin-Systems.ipynb | 5 +-\n", + " ...ving-An-XGboost-Model-With-Merlin-Systems.ipynb | 5 +-\n", + " ...erving-Ranking-Models-With-Merlin-Systems.ipynb | 5 +-\n", + " merlin/systems/dag/dictarray.py | 4 +-\n", + " merlin/systems/dag/op_runner.py | 1 -\n", + " merlin/systems/dag/ops/__init__.py | 11 +-\n", + " merlin/systems/dag/ops/faiss.py | 4 +-\n", + " merlin/systems/dag/ops/feast.py | 80 +--\n", + " merlin/systems/dag/ops/fil.py | 4 +-\n", + " merlin/systems/dag/ops/implicit.py | 72 +-\n", + " merlin/systems/dag/ops/operator.py | 189 +-----\n", + " merlin/systems/dag/ops/pytorch.py | 4 +-\n", + " merlin/systems/dag/ops/session_filter.py | 4 +-\n", + " merlin/systems/dag/ops/softmax_sampling.py | 17 +-\n", + " merlin/systems/dag/ops/unroll_features.py | 4 +-\n", + " merlin/systems/dag/ops/workflow.py | 4 +-\n", + " merlin/systems/dag/runtimes/triton/ops/implicit.py | 185 ++++++\n", + " merlin/systems/dag/runtimes/triton/ops/operator.py | 169 ++++-\n", + " merlin/systems/dag/runtimes/triton/ops/pytorch.py | 2 +-\n", + " .../systems/dag/runtimes/triton/ops/tensorflow.py | 12 +-\n", + " merlin/systems/dag/runtimes/triton/ops/workflow.py | 141 +++-\n", + " merlin/systems/dag/runtimes/triton/runtime.py | 14 +-\n", + " merlin/systems/triton/__init__.py | 33 +-\n", + " merlin/systems/triton/export.py | 724 +--------------------\n", + " merlin/systems/triton/models/executor_model.py | 34 +-\n", + " merlin/systems/triton/models/oprunner_model.py | 32 +-\n", + " merlin/systems/triton/models/pytorch_model.py | 127 ++--\n", + " merlin/systems/triton/models/workflow_model.py | 50 +-\n", + " merlin/systems/triton/utils.py | 35 +-\n", + " tests/conftest.py | 4 +-\n", + " ...erving_an_implicit_model_with_merlin_systems.py | 4 +-\n", + " ...serving_an_xgboost_model_with_merlin_systems.py | 4 +-\n", + " tests/unit/systems/dag/ops/test_ops.py | 20 +-\n", + " .../runtimes/local/ops/nvtabular/test_ensemble.py | 2 +-\n", + " .../triton/ops/fil/test_lightgbm_triton.py | 4 +-\n", + " .../runtimes/triton/ops/fil/test_sklearn_triton.py | 4 +-\n", + " .../runtimes/triton/ops/fil/test_xgboost_triton.py | 4 +-\n", + " .../dag/runtimes/triton/ops/torch/test_op.py | 4 +-\n", + " .../runtimes/triton/ops/workflow/test_ensemble.py | 67 +-\n", + " .../systems/dag/runtimes/triton/test_triton.py | 4 +-\n", + " tests/unit/systems/dag/test_dict_array.py | 4 +-\n", + " tests/unit/systems/dag/test_executors.py | 4 +-\n", + " tests/unit/systems/ops/faiss/test_executor.py | 4 +-\n", + " tests/unit/systems/ops/feast/test_op.py | 46 +-\n", + " tests/unit/systems/ops/fil/test_ensemble.py | 4 +-\n", + " tests/unit/systems/ops/implicit/test_executor.py | 4 +-\n", + " tests/unit/systems/ops/implicit/test_op.py | 11 +-\n", + " tests/unit/systems/ops/tf/test_ensemble.py | 4 +-\n", + " tests/unit/systems/utils/ops.py | 7 +-\n", + " tests/unit/test_export.py | 77 ---\n", + " tox.ini | 1 -\n", + " 70 files changed, 1072 insertions(+), 1580 deletions(-)\n", + " create mode 100644 .prettierignore\n", + " create mode 100644 merlin/systems/dag/runtimes/triton/ops/implicit.py\n", + " delete mode 100644 tests/unit/test_export.py\n", + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.28.1)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (0.9.0+56.gaad0c874)\n", + "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (1.6.0+42.g9b186ee9)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2019.11.28)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.8)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (1.26.13)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.5)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.5.0)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (11.4.1)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.5)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (22.0)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.0.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.64.1)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.12.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.19.6)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.56.4)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite-runtime==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.22.4)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite-runtime==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.9.3)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+61.g329cba4) (0.0.4)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.8.2)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.12.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.4.3)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.1.2)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.1)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.9.4)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.1.3)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.7.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.4)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.39.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.14.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.1.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.11.0)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.1)\n", + "Building wheels for collected packages: merlin-systems\n", + " Building wheel for merlin-systems (PEP 517): started\n", + " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+61.g329cba4-py3-none-any.whl size=99480 sha256=50ebea88cab88355f4a562867fa250a1754ad79ba82ab44a242f1451ff918f50\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ig69oyt6/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + "Successfully built merlin-systems\n", + "Installing collected packages: merlin-systems\n", + " Attempting uninstall: merlin-systems\n", + " Found existing installation: merlin-systems 0.9.0\n", + " Uninstalling merlin-systems-0.9.0:\n", + " Successfully uninstalled merlin-systems-0.9.0\n", + "Successfully installed merlin-systems-0.7.0+61.g329cba4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was fd5d3fc Use tf.function for list column operations (#89)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/dataloader\n", + " * branch main -> FETCH_HEAD\n", + " 5b3fe46..dbf8816 main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating 5b3fe46..dbf8816\n", + "Fast-forward\n", + " .github/workflows/cpu-ci.yml | 81 -----\n", + " .github/workflows/cpu-packages.yml | 125 +++++++\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .pre-commit-config.yaml | 14 +-\n", + " ci/pr.gpu.Jenkinsfile | 44 +++\n", + " docs/README.md | 28 +-\n", + " examples/01a-Getting-started-Tensorflow.ipynb | 5 +-\n", + " examples/01b-Getting-started-Pytorch.ipynb | 9 +-\n", + " .../02-Multi-GPU-Tensorflow-with-Horovod.ipynb | 371 +++++++++++++++++++++\n", + " merlin/dataloader/jax.py | 3 +\n", + " merlin/dataloader/loader_base.py | 221 ++++--------\n", + " .../ops/embeddings/torch_embedding_op.py | 4 +-\n", + " merlin/dataloader/tensorflow.py | 9 +-\n", + " merlin/dataloader/torch.py | 49 ++-\n", + " merlin/dataloader/utils/tf/tf_trainer.py | 2 +-\n", + " .../test_multi_GPU_with_horovod_and_tensorflow.py | 28 ++\n", + " tests/unit/dataloader/test_tf_dataloader.py | 20 +-\n", + " tests/unit/dataloader/test_torch_dataloader.py | 38 +++\n", + " tox.ini | 1 +\n", + " 19 files changed, 781 insertions(+), 278 deletions(-)\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 ci/pr.gpu.Jenkinsfile\n", + " create mode 100644 examples/02-Multi-GPU-Tensorflow-with-Horovod.ipynb\n", + " create mode 100644 tests/examples/test_multi_GPU_with_horovod_and_tensorflow.py\n", + "Processing /dataloader\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-core>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+41.gdbf8816) (0.9.0+56.gaad0c874)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (22.0)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.5)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.0.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.56.4)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.64.1)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.12.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.5)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.19.6)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.5.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (11.4.1)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", + "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.22.4)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.39.1)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.12.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.57.0)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.4.3)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.4)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.4.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.26.13)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.1.3)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.1.2)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.9.4)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.7.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.11.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.4)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.1)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.0.0)\n", + "Building wheels for collected packages: merlin-dataloader\n", + " Building wheel for merlin-dataloader (PEP 517): started\n", + " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+41.gdbf8816-py3-none-any.whl size=40852 sha256=90d5b8cd5d1b74f242a2d155c11b3a4c34b029ef43f752c03f8f8b0a357be6b3\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-6c80kdug/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + "Successfully built merlin-dataloader\n", + "Installing collected packages: merlin-dataloader\n", + " Attempting uninstall: merlin-dataloader\n", + " Found existing installation: merlin-dataloader 0.0.4\n", + " Uninstalling merlin-dataloader-0.0.4:\n", + " Successfully uninstalled merlin-dataloader-0.0.4\n", + "Successfully installed merlin-dataloader-0.0.2+41.gdbf8816\n", + "Collecting matplotlib\n", + " Downloading matplotlib-3.7.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (9.2 MB)\n", + "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.22.4)\n", + "Requirement already satisfied: importlib-resources>=3.2.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from matplotlib) (5.10.2)\n", + "Collecting fonttools>=4.22.0\n", + " Downloading fonttools-4.39.0-py3-none-any.whl (1.0 MB)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (22.0)\n", + "Collecting cycler>=0.10\n", + " Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", + "Collecting contourpy>=1.0.1\n", + " Downloading contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (300 kB)\n", + "Collecting pillow>=6.2.0\n", + " Downloading Pillow-9.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (3.0.9)\n", + "Collecting kiwisolver>=1.0.1\n", + " Downloading kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (2.8.2)\n", + "Requirement already satisfied: zipp>=3.1.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from importlib-resources>=3.2.0; python_version < \"3.10\"->matplotlib) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.14.0)\n", + "Installing collected packages: fonttools, cycler, contourpy, pillow, kiwisolver, matplotlib\n", + "Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.0 kiwisolver-1.4.4 matplotlib-3.7.1 pillow-9.4.0\n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", + "cd /core && git checkout main && git pull origin main && pip install .\n", + "cd /nvtabular && git checkout main && git pull origin main && pip install .\n", + "cd /systems && git checkout main && git pull origin main && pip install .\n", + "cd /dataloader && git checkout main && git pull origin main && pip install .\n", + "pip install matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "152aee86", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.6.4)\n", + "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", + "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", + "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading...\n", + "From: https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "To: /workspace/rees46_ecom_dataset_small_for_ci.zip\n", + "100%|██████████| 43.4M/43.4M [00:08<00:00, 5.36MB/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease\n", + "Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease\n", + "Get:3 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Fetched 336 kB in 2s (148 kB/s)\n", + "Reading package lists...\n", + "Reading package lists...\n", + "Building dependency tree...\n", + "Reading state information...\n", + "unzip is already the newest version (6.0-25ubuntu1.1).\n", + "0 upgraded, 0 newly installed, 0 to remove and 84 not upgraded.\n", + "Archive: rees46_ecom_dataset_small_for_ci.zip\n", + " creating: ecom_dataset/0001/\n", + " inflating: ecom_dataset/0001/valid.parquet \n", + " extracting: ecom_dataset/0001/.zip \n", + " inflating: ecom_dataset/0001/train.parquet \n", + " inflating: ecom_dataset/0001/test.parquet \n", + " creating: ecom_dataset/0002/\n", + " inflating: ecom_dataset/0002/valid.parquet \n", + " inflating: ecom_dataset/0002/train.parquet \n", + " inflating: ecom_dataset/0002/test.parquet \n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "rm -rf ecom_dataset\n", + "mkdir -p ecom_dataset\n", + "\n", + "pip install gdown\n", + "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "apt-get update -y\n", + "apt-get install unzip -y\n", + "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-09 06:10:25.833595: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-09 06:10:28.225812: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:10:28.226230: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:10:28.226389: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:10:28.434063: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-09 06:10:28.435067: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:10:28.435273: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:10:28.435435: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:10:29.175980: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:10:29.176211: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:10:29.176375: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:10:29.176489: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-09 06:10:29.176551: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of starter hyperparams\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "mlp_block = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7f15a0a0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + "schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + ").to_merlin_schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "74ccc9a9", + "metadata": {}, + "outputs": [], + "source": [ + "train.schema = schema" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5a4c7ca3", + "metadata": {}, + "outputs": [], + "source": [ + "input_block = mm.InputBlockV2(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " embeddings=mm.Embeddings(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " sequence_combiner=None,\n", + " dim=d_model\n", + " ),\n", + " pre=mm.StochasticSwapNoise()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "34c739b3", + "metadata": {}, + "outputs": [], + "source": [ + "train.schema = train.schema.select_by_name('sess_pid_seq')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "14c35b2a", + "metadata": {}, + "outputs": [], + "source": [ + "xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "866f3249", + "metadata": {}, + "outputs": [], + "source": [ + "dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "288d08df", + "metadata": {}, + "outputs": [], + "source": [ + "mlp_block2 = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "064ea5ec", + "metadata": {}, + "outputs": [], + "source": [ + "prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "6c008e16", + "metadata": {}, + "outputs": [], + "source": [ + "model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "49b12d31", + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "d84a30d3", + "metadata": {}, + "outputs": [], + "source": [ + "model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[4])\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a9611ab", + "metadata": {}, + "outputs": [], + "source": [ + "# model_transformer.fit(\n", + "# train,\n", + "# batch_size=batch_size,\n", + "# epochs=n_epoch,\n", + "# pre=mm.SequencePredictRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "e7474131", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "665/665 [==============================] - 74s 107ms/step - loss: 8.9015 - recall_at_4: 0.0224 - mrr_at_4: 0.0129 - ndcg_at_4: 0.0153 - map_at_4: 0.0129 - precision_at_4: 0.0056 - regularization_loss: 0.0000e+00 - loss_batch: 8.8957\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "7bf839e3", + "metadata": {}, + "outputs": [], + "source": [ + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "15ccc448", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 8s 40ms/step - loss: 8.8326 - recall_at_4: 0.0502 - mrr_at_4: 0.0319 - ndcg_at_4: 0.0365 - map_at_4: 0.0319 - precision_at_4: 0.0126 - regularization_loss: 0.0000e+00 - loss_batch: 8.8396\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.832579612731934,\n", + " 'recall_at_4': 0.05087455362081528,\n", + " 'mrr_at_4': 0.030891483649611473,\n", + " 'ndcg_at_4': 0.0359138660132885,\n", + " 'map_at_4': 0.030891483649611473,\n", + " 'precision_at_4': 0.01271863840520382,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.142295837402344}" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/train_and_save_model_for_benchmarking.ipynb b/train_and_save_model_for_benchmarking.ipynb index 98a6460224..0f43a5dac9 100644 --- a/train_and_save_model_for_benchmarking.ipynb +++ b/train_and_save_model_for_benchmarking.ipynb @@ -763,7 +763,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "id": "8d9903e6", "metadata": {}, "outputs": [], From 73d08d74070ed0302055969cfffe905e7f7210fa Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Thu, 9 Mar 2023 16:20:22 +1000 Subject: [PATCH 03/15] update --- .../rees46_schema_modified.pbtxt | 0 .../reproducing_T4Rec_results.ipynb | 54 +++++++------------ ...rain_and_save_model_for_benchmarking.ipynb | 0 3 files changed, 20 insertions(+), 34 deletions(-) rename rees46_schema_modified.pbtxt => T4Rec_repro/rees46_schema_modified.pbtxt (100%) rename reproducing_T4Rec_results.ipynb => T4Rec_repro/reproducing_T4Rec_results.ipynb (92%) rename train_and_save_model_for_benchmarking.ipynb => T4Rec_repro/train_and_save_model_for_benchmarking.ipynb (100%) diff --git a/rees46_schema_modified.pbtxt b/T4Rec_repro/rees46_schema_modified.pbtxt similarity index 100% rename from rees46_schema_modified.pbtxt rename to T4Rec_repro/rees46_schema_modified.pbtxt diff --git a/reproducing_T4Rec_results.ipynb b/T4Rec_repro/reproducing_T4Rec_results.ipynb similarity index 92% rename from reproducing_T4Rec_results.ipynb rename to T4Rec_repro/reproducing_T4Rec_results.ipynb index 7b066f2f65..3191b7651f 100644 --- a/reproducing_T4Rec_results.ipynb +++ b/T4Rec_repro/reproducing_T4Rec_results.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "7f851659", + "id": "14beb6b6", "metadata": {}, "source": [ "These are logs from training the following model from the CI script from T4Rec (the trianing was for 5 epochs):\n", @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "c0369401", + "id": "7010a6a1", "metadata": {}, "source": [ "And here are the logs and the results, maybe reproducing that is something that we could work towards (the XLNet with MLM is what I used for benchmarking T4Rec, starting with it would be great)" @@ -22,7 +22,7 @@ }, { "cell_type": "markdown", - "id": "e26066be", + "id": "d4955dd7", "metadata": {}, "source": [ "03/09/2023 04:21:44 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: True\n", @@ -1417,41 +1417,27 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 27, "id": "e7474131", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "665/665 [==============================] - 74s 107ms/step - loss: 8.9015 - recall_at_4: 0.0224 - mrr_at_4: 0.0129 - ndcg_at_4: 0.0153 - map_at_4: 0.0129 - precision_at_4: 0.0056 - regularization_loss: 0.0000e+00 - loss_batch: 8.8957\n" + "ename": "TypeError", + "evalue": "('Keyword argument not understood:', 'transformer')", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[27], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m model_transformer\u001b[38;5;241m.\u001b[39mfit(\n\u001b[1;32m 2\u001b[0m train,\n\u001b[1;32m 3\u001b[0m batch_size\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[1;32m 4\u001b[0m epochs\u001b[38;5;241m=\u001b[39mn_epoch,\n\u001b[0;32m----> 5\u001b[0m pre\u001b[38;5;241m=\u001b[39m\u001b[43mmm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSequenceMaskRandom\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtransformer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mxlnet_block\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m )\n", + "File \u001b[0;32m/workspace/merlin/models/tf/transforms/sequence.py:469\u001b[0m, in \u001b[0;36mSequenceMaskRandom.__init__\u001b[0;34m(self, schema, target, masking_prob, **kwargs)\u001b[0m\n\u001b[1;32m 461\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 462\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 463\u001b[0m schema: Schema,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 467\u001b[0m ):\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmasking_prob \u001b[38;5;241m=\u001b[39m masking_prob\n\u001b[0;32m--> 469\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/workspace/merlin/models/tf/transforms/sequence.py:103\u001b[0m, in \u001b[0;36mSequenceTransform.__init__\u001b[0;34m(self, schema, target, pre, **kwargs)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pre:\n\u001b[1;32m 102\u001b[0m _pre \u001b[38;5;241m=\u001b[39m _pre\u001b[38;5;241m.\u001b[39mconnect(pre)\n\u001b[0;32m--> 103\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mpre\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_pre\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtarget \u001b[38;5;241m=\u001b[39m target\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtarget_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_target(target)\n", + "File \u001b[0;32m/workspace/merlin/models/tf/core/tabular.py:122\u001b[0m, in \u001b[0;36mTabularBlock.__init__\u001b[0;34m(self, pre, post, aggregation, schema, name, is_input, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 114\u001b[0m pre: Optional[BlockType] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 121\u001b[0m ):\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mset_pre(pre)\n", + "File \u001b[0;32m/workspace/merlin/models/tf/core/base.py:166\u001b[0m, in \u001b[0;36mBlock.__init__\u001b[0;34m(self, context, **kwargs)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, context: Optional[ModelContext] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 166\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mBlock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m context:\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_context(context)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/tensorflow/python/trackable/base.py:205\u001b[0m, in \u001b[0;36mno_automatic_dependency_tracking.._method_wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_self_setattr_tracking \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m \u001b[38;5;66;03m# pylint: disable=protected-access\u001b[39;00m\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 205\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 207\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_self_setattr_tracking \u001b[38;5;241m=\u001b[39m previous_value \u001b[38;5;66;03m# pylint: disable=protected-access\u001b[39;00m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/keras/engine/base_layer.py:335\u001b[0m, in \u001b[0;36mLayer.__init__\u001b[0;34m(self, trainable, name, dtype, dynamic, **kwargs)\u001b[0m\n\u001b[1;32m 324\u001b[0m allowed_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 325\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_dim\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 326\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_shape\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mimplementation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 333\u001b[0m }\n\u001b[1;32m 334\u001b[0m \u001b[38;5;66;03m# Validate optional keyword arguments.\u001b[39;00m\n\u001b[0;32m--> 335\u001b[0m \u001b[43mgeneric_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_kwargs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallowed_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 337\u001b[0m \u001b[38;5;66;03m# Mutable properties\u001b[39;00m\n\u001b[1;32m 338\u001b[0m \u001b[38;5;66;03m# Indicates whether the layer's weights are updated during training\u001b[39;00m\n\u001b[1;32m 339\u001b[0m \u001b[38;5;66;03m# and whether the layer's updates are run during training.\u001b[39;00m\n\u001b[1;32m 340\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\n\u001b[1;32m 341\u001b[0m \u001b[38;5;28misinstance\u001b[39m(trainable, \u001b[38;5;28mbool\u001b[39m)\n\u001b[1;32m 342\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 345\u001b[0m )\n\u001b[1;32m 346\u001b[0m ):\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/keras/utils/generic_utils.py:1269\u001b[0m, in \u001b[0;36mvalidate_kwargs\u001b[0;34m(kwargs, allowed_kwargs, error_message)\u001b[0m\n\u001b[1;32m 1267\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m kwarg \u001b[38;5;129;01min\u001b[39;00m kwargs:\n\u001b[1;32m 1268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwarg \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m allowed_kwargs:\n\u001b[0;32m-> 1269\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(error_message, kwarg)\n", + "\u001b[0;31mTypeError\u001b[0m: ('Keyword argument not understood:', 'transformer')" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -1459,7 +1445,7 @@ " train,\n", " batch_size=batch_size,\n", " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", ")" ] }, diff --git a/train_and_save_model_for_benchmarking.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking.ipynb similarity index 100% rename from train_and_save_model_for_benchmarking.ipynb rename to T4Rec_repro/train_and_save_model_for_benchmarking.ipynb From 60b457b057039a232a1166e6d2f220056044c249 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Thu, 9 Mar 2023 16:57:05 +1000 Subject: [PATCH 04/15] update --- T4Rec_repro/reproducing_T4Rec_results.ipynb | 522 +++++++++++--------- 1 file changed, 280 insertions(+), 242 deletions(-) diff --git a/T4Rec_repro/reproducing_T4Rec_results.ipynb b/T4Rec_repro/reproducing_T4Rec_results.ipynb index 3191b7651f..8788b157d2 100644 --- a/T4Rec_repro/reproducing_T4Rec_results.ipynb +++ b/T4Rec_repro/reproducing_T4Rec_results.ipynb @@ -269,49 +269,49 @@ " Preparing wheel metadata: finished with status 'done'\n", "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+6.ga92bdc24) (0.10.0)\n", "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+6.ga92bdc24) (0.0.4)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.56.4)\n", - "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.5)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7.1)\n", "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (3.19.6)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (22.0)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.5.0)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.64.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (8.0.0)\n", "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.2.5)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (8.0.0)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.56.4)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.64.1)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.12.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.2.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.12.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.5.0)\n", + "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.5)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7.1)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.4.3)\n", + "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow>=5.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.22.4)\n", "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.2.0)\n", "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.12.0)\n", "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.39.1)\n", "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (45.2.0)\n", "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (5.2.0)\n", - "Requirement already satisfied: numpy<1.24,>=1.18 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.22.4)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.8.2)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.0)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.8.2)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.4)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.2.0)\n", "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (8.1.3)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (5.9.4)\n", "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.0)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (3.1.2)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.4.0)\n", "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.7.0)\n", "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.26.13)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.2.0)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.4)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (5.9.4)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.2.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.57.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.1)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.4.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.1.0)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (3.11.0)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.14.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.1.1)\n", "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.1)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0.4)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.0.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0.1)\n" + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.1.1)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.0.0)\n" ] }, { @@ -321,8 +321,8 @@ "Building wheels for collected packages: merlin-models\n", " Building wheel for merlin-models (PEP 517): started\n", " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-models: filename=merlin_models-23.2.0+6.ga92bdc24-py3-none-any.whl size=374609 sha256=2aa872a5f1575151273bcc94d5c4b0205a1f22af84ab44d48d9f75d74f9daa93\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-l8ge0dm1/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + " Created wheel for merlin-models: filename=merlin_models-23.2.0+6.ga92bdc24-py3-none-any.whl size=374609 sha256=a5077403f59b4f6c38be0d098b696c96fde6e874ac02e12d04bba00c7dcb9ab2\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-rxmtwiq_/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", "Successfully built merlin-models\n", "Installing collected packages: merlin-models\n", " Attempting uninstall: merlin-models\n", @@ -483,61 +483,61 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (8.0.0)\n", "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.5.0)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (22.0)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (11.4.1)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n", "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (3.19.6)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.3.5)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (8.0.0)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (0.56.4)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n" + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.2.5)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (11.4.1)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.2.5)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.12.0)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (4.64.1)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (2.8.2)\n", - "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.22.4)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (5.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (45.2.0)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.3.5)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (0.56.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (22.0)\n", + "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow>=5.0.0->merlin-core==0.9.0+56.gaad0c874) (1.22.4)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.3.0)\n", "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (6.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (0.12.0)\n", "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.2.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.3.0)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (0.4.3)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.7.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (6.1)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.26.13)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.4)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (0.12.0)\n", "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (5.9.4)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.4)\n", "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.4.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (3.1.2)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (6.1)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.26.13)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.7.0)\n", "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (8.1.3)\n", "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.2.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (3.1.2)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (0.4.3)\n", "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.57.0)\n", "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.3.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.14.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (3.11.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (2022.7)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (0.39.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.1)\n", "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (4.1.0)\n", "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (6.0.4)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (3.11.0)\n", "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (6.0.1)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (4.0.0)\n", "Building wheels for collected packages: merlin-core\n", " Building wheel for merlin-core (PEP 517): started\n", " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-core: filename=merlin_core-0.9.0+56.gaad0c874-py3-none-any.whl size=152601 sha256=e6e379a2bc1756cddf2a2ed74086c0071fd68f95bba9432dae3f8096116fbb8a\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-nvai80xu/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + " Created wheel for merlin-core: filename=merlin_core-0.9.0+56.gaad0c874-py3-none-any.whl size=152601 sha256=dcee4602a77df64eb864c60e8cb155c6b8a165a9059ee943770248cef063bf37\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-hkriw5ee/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", "Successfully built merlin-core\n", "Installing collected packages: merlin-core\n", " Attempting uninstall: merlin-core\n", @@ -656,64 +656,64 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.9.0+56.gaad0c874)\n", - "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.0.4)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (1.9.3)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.64.1)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.0.4)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.9.0+56.gaad0c874)\n", + "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+42.g9b186ee9) (1.22.4)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.12.0)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.0.0)\n", "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.5)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.56.4)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.5.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.64.1)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (22.0)\n", "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (11.4.1)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.5)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.12.0)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.0.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.19.6)\n", "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", - "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+42.g9b186ee9) (1.22.4)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.56.4)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.19.6)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.5)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.5.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.57.0)\n", "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.4.3)\n", "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.0)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.7.0)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.0)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0)\n" + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.12.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.1)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.4.0)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.9.4)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (45.2.0)\n", "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.4)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.1.2)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.9.4)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.7.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.4.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.1)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.1.3)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.12.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.1.2)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.0)\n", "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.26.13)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.2.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (45.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.39.1)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.57.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.8.2)\n", "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.1.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.11.0)\n", "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.1.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.11.0)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.14.0)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.0.0)\n", "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.1)\n", "Building wheels for collected packages: nvtabular\n", " Building wheel for nvtabular (PEP 517): started\n", " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", - " Created wheel for nvtabular: filename=nvtabular-1.6.0+42.g9b186ee9-cp38-cp38-linux_x86_64.whl size=258506 sha256=33bd39a7ce6bd4d1b7e81ef0ecd16abcffc75944d1a9a8510902f42658baf22e\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-ws2h8usp/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + " Created wheel for nvtabular: filename=nvtabular-1.6.0+42.g9b186ee9-cp38-cp38-linux_x86_64.whl size=258506 sha256=4c4a37dcdcff0046a7edf1346f3664903218a14a689ef96388354d679c1a3da3\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-c7pdm8dg/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", "Successfully built nvtabular\n", "Installing collected packages: nvtabular\n", " Attempting uninstall: nvtabular\n", @@ -834,71 +834,71 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.28.1)\n", "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (0.9.0+56.gaad0c874)\n", - "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.28.1)\n", "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (1.6.0+42.g9b186ee9)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2019.11.28)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.8)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (1.26.13)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (22.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.19.6)\n", "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.5)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.5.0)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (11.4.1)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.5)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (22.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.5.0)\n", "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.0.0)\n", "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.64.1)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.12.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.19.6)\n", "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.56.4)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite-runtime==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.22.4)\n" + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (1.26.13)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2019.11.28)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.8)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.9.3)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite-runtime==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.9.3)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.22.4)\n", "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+61.g329cba4) (0.0.4)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.8.2)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.12.0)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7)\n", "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.12.0)\n", "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.0)\n", "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.4.3)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.1.2)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.0)\n", "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.1)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.0)\n", "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.9.4)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.1.3)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.0)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.7.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.1.2)\n", "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.4)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.57.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.7.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.57.0)\n", "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.2.0)\n", "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.39.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.2.0)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.14.0)\n", "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.4)\n", "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.1.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.1)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.11.0)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.0.0)\n", "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.0.0)\n", "Building wheels for collected packages: merlin-systems\n", " Building wheel for merlin-systems (PEP 517): started\n", " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+61.g329cba4-py3-none-any.whl size=99480 sha256=50ebea88cab88355f4a562867fa250a1754ad79ba82ab44a242f1451ff918f50\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-ig69oyt6/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+61.g329cba4-py3-none-any.whl size=99480 sha256=7400ab8e12273b15c96f94806974ef168f6bbc63e5a02a9fccf0905f0ea10f43\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-_zkkhk4v/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", "Successfully built merlin-systems\n", "Installing collected packages: merlin-systems\n", " Attempting uninstall: merlin-systems\n", @@ -970,61 +970,61 @@ " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", "Requirement already satisfied: merlin-core>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+41.gdbf8816) (0.9.0+56.gaad0c874)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (22.0)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.5)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.0.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (11.4.1)\n", "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.56.4)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.64.1)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.12.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.5)\n", "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.19.6)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.0.0)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (22.0)\n", "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.5.0)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (11.4.1)\n", "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", - "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.22.4)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.2.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (45.2.0)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.5)\n", + "Requirement already satisfied: numpy<1.24,>=1.18 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.22.4)\n", "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.39.1)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.12.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.2.0)\n", "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.0)\n", "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.4.3)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.4)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.4.0)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.0)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.26.13)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.12.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.1.3)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.9.4)\n", "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.1)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.4)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.26.13)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.7.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.4.0)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.1.2)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.9.4)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n" + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.7.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.14.0)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.11.0)\n", "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.1.0)\n", "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.4)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.1.1)\n", "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.14.0)\n", "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.1)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.0.0)\n", "Building wheels for collected packages: merlin-dataloader\n", " Building wheel for merlin-dataloader (PEP 517): started\n", " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+41.gdbf8816-py3-none-any.whl size=40852 sha256=90d5b8cd5d1b74f242a2d155c11b3a4c34b029ef43f752c03f8f8b0a357be6b3\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-6c80kdug/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+41.gdbf8816-py3-none-any.whl size=40852 sha256=25522e9c2124926ac2063828d36ae15009e18cb85666b6ebf5c29cdd24213231\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-vvfapbst/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", "Successfully built merlin-dataloader\n", "Installing collected packages: merlin-dataloader\n", " Attempting uninstall: merlin-dataloader\n", @@ -1034,24 +1034,24 @@ "Successfully installed merlin-dataloader-0.0.2+41.gdbf8816\n", "Collecting matplotlib\n", " Downloading matplotlib-3.7.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (9.2 MB)\n", - "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.22.4)\n", "Requirement already satisfied: importlib-resources>=3.2.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from matplotlib) (5.10.2)\n", - "Collecting fonttools>=4.22.0\n", - " Downloading fonttools-4.39.0-py3-none-any.whl (1.0 MB)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (22.0)\n", "Collecting cycler>=0.10\n", " Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", - "Collecting contourpy>=1.0.1\n", - " Downloading contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (300 kB)\n", - "Collecting pillow>=6.2.0\n", - " Downloading Pillow-9.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (3.0.9)\n", "Collecting kiwisolver>=1.0.1\n", " Downloading kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (2.8.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (22.0)\n", + "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.22.4)\n", + "Collecting fonttools>=4.22.0\n", + " Downloading fonttools-4.39.0-py3-none-any.whl (1.0 MB)\n", + "Collecting pillow>=6.2.0\n", + " Downloading Pillow-9.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n", + "Collecting contourpy>=1.0.1\n", + " Downloading contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (300 kB)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (3.0.9)\n", "Requirement already satisfied: zipp>=3.1.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from importlib-resources>=3.2.0; python_version < \"3.10\"->matplotlib) (3.11.0)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.14.0)\n", - "Installing collected packages: fonttools, cycler, contourpy, pillow, kiwisolver, matplotlib\n", + "Installing collected packages: cycler, kiwisolver, fonttools, pillow, contourpy, matplotlib\n", "Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.0 kiwisolver-1.4.4 matplotlib-3.7.1 pillow-9.4.0\n" ] } @@ -1069,7 +1069,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "152aee86", "metadata": {}, "outputs": [ @@ -1077,18 +1077,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.6.4)\n", - "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", + "Collecting gdown\n", + " Downloading gdown-4.6.4-py3-none-any.whl (14 kB)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", - "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", - "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n" + "Collecting PySocks!=1.5.7,>=1.5.6; extra == \"socks\"\n", + " Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", + "Installing collected packages: gdown, PySocks\n", + "Successfully installed PySocks-1.7.1 gdown-4.6.4\n" ] }, { @@ -1097,20 +1101,35 @@ "text": [ "Downloading...\n", "From: https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", - "To: /workspace/rees46_ecom_dataset_small_for_ci.zip\n", - "100%|██████████| 43.4M/43.4M [00:08<00:00, 5.36MB/s]\n" + "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", + "100%|██████████| 43.4M/43.4M [00:08<00:00, 5.42MB/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease\n", - "Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease\n", - "Get:3 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", - "Get:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", - "Get:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", - "Fetched 336 kB in 2s (148 kB/s)\n", + "Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", + "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [907 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", + "Get:4 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Get:5 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [1998 kB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", + "Get:9 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2539 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:11 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", + "Get:12 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1015 kB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1310 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2134 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3014 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", + "Fetched 26.5 MB in 11s (2470 kB/s)\n", "Reading package lists...\n", "Reading package lists...\n", "Building dependency tree...\n", @@ -1154,10 +1173,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-03-09 06:10:25.833595: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" + "2023-03-09 06:23:10.964331: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, { @@ -1171,21 +1188,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-03-09 06:10:28.225812: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:10:28.226230: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:10:28.226389: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:10:28.434063: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-03-09 06:10:28.435067: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:10:28.435273: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:10:28.435435: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:10:29.175980: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:10:29.176211: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:10:29.176375: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:10:29.176489: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-03-09 06:10:29.176551: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-09 06:23:13.408883: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:23:13.409336: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:23:13.409494: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-09 06:23:13.887706: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-09 06:23:13.888643: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:23:13.888853: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:23:13.889008: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:23:14.636457: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:23:14.636673: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:23:14.636835: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-09 06:23:14.636950: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-09 06:23:14.637016: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" ] } ], @@ -1225,7 +1244,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 4, "id": "8d9903e6", "metadata": {}, "outputs": [], @@ -1290,7 +1309,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 22, "id": "5a4c7ca3", "metadata": {}, "outputs": [], @@ -1302,13 +1321,13 @@ " sequence_combiner=None,\n", " dim=d_model\n", " ),\n", - " pre=mm.StochasticSwapNoise()\n", + "# pre=mm.StochasticSwapNoise()\n", ")" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 23, "id": "34c739b3", "metadata": {}, "outputs": [], @@ -1318,7 +1337,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 24, "id": "14c35b2a", "metadata": {}, "outputs": [], @@ -1328,7 +1347,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 25, "id": "866f3249", "metadata": {}, "outputs": [], @@ -1342,7 +1361,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 26, "id": "288d08df", "metadata": {}, "outputs": [], @@ -1356,7 +1375,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 27, "id": "064ea5ec", "metadata": {}, "outputs": [], @@ -1368,7 +1387,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 28, "id": "6c008e16", "metadata": {}, "outputs": [], @@ -1378,7 +1397,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 29, "id": "49b12d31", "metadata": {}, "outputs": [], @@ -1390,54 +1409,48 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 35, "id": "d84a30d3", "metadata": {}, "outputs": [], "source": [ "model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", - " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[4])\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10])\n", " )" ] }, { "cell_type": "code", - "execution_count": null, - "id": "9a9611ab", - "metadata": {}, - "outputs": [], - "source": [ - "# model_transformer.fit(\n", - "# train,\n", - "# batch_size=batch_size,\n", - "# epochs=n_epoch,\n", - "# pre=mm.SequencePredictRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 27, + "execution_count": 36, "id": "e7474131", "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "('Keyword argument not understood:', 'transformer')", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[27], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m model_transformer\u001b[38;5;241m.\u001b[39mfit(\n\u001b[1;32m 2\u001b[0m train,\n\u001b[1;32m 3\u001b[0m batch_size\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[1;32m 4\u001b[0m epochs\u001b[38;5;241m=\u001b[39mn_epoch,\n\u001b[0;32m----> 5\u001b[0m pre\u001b[38;5;241m=\u001b[39m\u001b[43mmm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSequenceMaskRandom\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtransformer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mxlnet_block\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m )\n", - "File \u001b[0;32m/workspace/merlin/models/tf/transforms/sequence.py:469\u001b[0m, in \u001b[0;36mSequenceMaskRandom.__init__\u001b[0;34m(self, schema, target, masking_prob, **kwargs)\u001b[0m\n\u001b[1;32m 461\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 462\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 463\u001b[0m schema: Schema,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 467\u001b[0m ):\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmasking_prob \u001b[38;5;241m=\u001b[39m masking_prob\n\u001b[0;32m--> 469\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/workspace/merlin/models/tf/transforms/sequence.py:103\u001b[0m, in \u001b[0;36mSequenceTransform.__init__\u001b[0;34m(self, schema, target, pre, **kwargs)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pre:\n\u001b[1;32m 102\u001b[0m _pre \u001b[38;5;241m=\u001b[39m _pre\u001b[38;5;241m.\u001b[39mconnect(pre)\n\u001b[0;32m--> 103\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mpre\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_pre\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtarget \u001b[38;5;241m=\u001b[39m target\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtarget_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_target(target)\n", - "File \u001b[0;32m/workspace/merlin/models/tf/core/tabular.py:122\u001b[0m, in \u001b[0;36mTabularBlock.__init__\u001b[0;34m(self, pre, post, aggregation, schema, name, is_input, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 114\u001b[0m pre: Optional[BlockType] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 121\u001b[0m ):\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mset_pre(pre)\n", - "File \u001b[0;32m/workspace/merlin/models/tf/core/base.py:166\u001b[0m, in \u001b[0;36mBlock.__init__\u001b[0;34m(self, context, **kwargs)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, context: Optional[ModelContext] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 166\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mBlock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m context:\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_context(context)\n", - "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/tensorflow/python/trackable/base.py:205\u001b[0m, in \u001b[0;36mno_automatic_dependency_tracking.._method_wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_self_setattr_tracking \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m \u001b[38;5;66;03m# pylint: disable=protected-access\u001b[39;00m\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 205\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 207\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_self_setattr_tracking \u001b[38;5;241m=\u001b[39m previous_value \u001b[38;5;66;03m# pylint: disable=protected-access\u001b[39;00m\n", - "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/keras/engine/base_layer.py:335\u001b[0m, in \u001b[0;36mLayer.__init__\u001b[0;34m(self, trainable, name, dtype, dynamic, **kwargs)\u001b[0m\n\u001b[1;32m 324\u001b[0m allowed_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 325\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_dim\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 326\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_shape\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mimplementation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 333\u001b[0m }\n\u001b[1;32m 334\u001b[0m \u001b[38;5;66;03m# Validate optional keyword arguments.\u001b[39;00m\n\u001b[0;32m--> 335\u001b[0m \u001b[43mgeneric_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_kwargs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallowed_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 337\u001b[0m \u001b[38;5;66;03m# Mutable properties\u001b[39;00m\n\u001b[1;32m 338\u001b[0m \u001b[38;5;66;03m# Indicates whether the layer's weights are updated during training\u001b[39;00m\n\u001b[1;32m 339\u001b[0m \u001b[38;5;66;03m# and whether the layer's updates are run during training.\u001b[39;00m\n\u001b[1;32m 340\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\n\u001b[1;32m 341\u001b[0m \u001b[38;5;28misinstance\u001b[39m(trainable, \u001b[38;5;28mbool\u001b[39m)\n\u001b[1;32m 342\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 345\u001b[0m )\n\u001b[1;32m 346\u001b[0m ):\n", - "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/keras/utils/generic_utils.py:1269\u001b[0m, in \u001b[0;36mvalidate_kwargs\u001b[0;34m(kwargs, allowed_kwargs, error_message)\u001b[0m\n\u001b[1;32m 1267\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m kwarg \u001b[38;5;129;01min\u001b[39;00m kwargs:\n\u001b[1;32m 1268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwarg \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m allowed_kwargs:\n\u001b[0;32m-> 1269\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(error_message, kwarg)\n", - "\u001b[0;31mTypeError\u001b[0m: ('Keyword argument not understood:', 'transformer')" + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 78s 110ms/step - loss: 3.7036 - recall_at_10: 0.6067 - mrr_at_10: 0.4806 - ndcg_at_10: 0.5108 - map_at_10: 0.4806 - precision_at_10: 0.0607 - regularization_loss: 0.0000e+00 - loss_batch: 3.7070\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 2.9681 - recall_at_10: 0.6940 - mrr_at_10: 0.5792 - ndcg_at_10: 0.6068 - map_at_10: 0.5792 - precision_at_10: 0.0694 - regularization_loss: 0.0000e+00 - loss_batch: 2.9733\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 2.5195 - recall_at_10: 0.7439 - mrr_at_10: 0.6367 - ndcg_at_10: 0.6625 - map_at_10: 0.6367 - precision_at_10: 0.0744 - regularization_loss: 0.0000e+00 - loss_batch: 2.5258\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 2.2286 - recall_at_10: 0.7810 - mrr_at_10: 0.6800 - ndcg_at_10: 0.7043 - map_at_10: 0.6800 - precision_at_10: 0.0781 - regularization_loss: 0.0000e+00 - loss_batch: 2.2364\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 2.0158 - recall_at_10: 0.8031 - mrr_at_10: 0.7071 - ndcg_at_10: 0.7302 - map_at_10: 0.7071 - precision_at_10: 0.0803 - regularization_loss: 0.0000e+00 - loss_batch: 2.0250\n" ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1445,13 +1458,13 @@ " train,\n", " batch_size=batch_size,\n", " epochs=n_epoch,\n", - " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", ")" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 37, "id": "7bf839e3", "metadata": {}, "outputs": [], @@ -1461,7 +1474,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 38, "id": "15ccc448", "metadata": {}, "outputs": [ @@ -1469,23 +1482,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "84/84 [==============================] - 8s 40ms/step - loss: 8.8326 - recall_at_4: 0.0502 - mrr_at_4: 0.0319 - ndcg_at_4: 0.0365 - map_at_4: 0.0319 - precision_at_4: 0.0126 - regularization_loss: 0.0000e+00 - loss_batch: 8.8396\n" + "84/84 [==============================] - 8s 40ms/step - loss: 8.7361 - recall_at_10: 0.1869 - mrr_at_10: 0.0721 - ndcg_at_10: 0.0988 - map_at_10: 0.0721 - precision_at_10: 0.0187 - regularization_loss: 0.0000e+00 - loss_batch: 8.7682\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.832579612731934,\n", - " 'recall_at_4': 0.05087455362081528,\n", - " 'mrr_at_4': 0.030891483649611473,\n", - " 'ndcg_at_4': 0.0359138660132885,\n", - " 'map_at_4': 0.030891483649611473,\n", - " 'precision_at_4': 0.01271863840520382,\n", + "{'loss': 8.73610782623291,\n", + " 'recall_at_10': 0.1859131157398224,\n", + " 'mrr_at_10': 0.07267787307500839,\n", + " 'ndcg_at_10': 0.09902743250131607,\n", + " 'map_at_10': 0.07267787307500839,\n", + " 'precision_at_10': 0.01859130710363388,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.142295837402344}" + " 'loss_batch': 10.154594421386719}" ] }, - "execution_count": 49, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1498,6 +1511,31 @@ " return_dict=True\n", ")" ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "17fd65b9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'eval_/next-item/ndcg_at_10': 0.08305524289608002,\n", + " 'eval_/next-item/ndcg_at_20': 0.09936655312776566,\n", + " 'eval_/next-item/recall_at_10': 0.15436746180057526,\n", + " 'eval_/next-item/recall_at_20': 0.2190323770046234,\n", + " 'eval_/loss': 8.334789276123047}" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{'eval_/next-item/ndcg_at_10': 0.08305524289608002, 'eval_/next-item/ndcg_at_20': 0.09936655312776566, 'eval_/next-item/recall_at_10': 0.15436746180057526, 'eval_/next-item/recall_at_20': 0.2190323770046234, 'eval_/loss': 8.334789276123047}" + ] } ], "metadata": { From 9c0d2d1e0dcec0bc7f70d99723afe2460ec23ad2 Mon Sep 17 00:00:00 2001 From: sararb Date: Thu, 9 Mar 2023 20:58:43 +0000 Subject: [PATCH 05/15] new version of reproducing_T4Rec_results with mlm training --- .../reproducing_T4Rec_results_v1.ipynb | 821 ++++++++++++++++++ 1 file changed, 821 insertions(+) create mode 100644 T4Rec_repro/reproducing_T4Rec_results_v1.ipynb diff --git a/T4Rec_repro/reproducing_T4Rec_results_v1.ipynb b/T4Rec_repro/reproducing_T4Rec_results_v1.ipynb new file mode 100644 index 0000000000..7048c3725f --- /dev/null +++ b/T4Rec_repro/reproducing_T4Rec_results_v1.ipynb @@ -0,0 +1,821 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "14beb6b6", + "metadata": {}, + "source": [ + "These are logs from training the following model from the CI script from T4Rec (the trianing was for 5 epochs):\n", + "\n", + "`### XLNet (MLM) - Item Id feature\n", + "python3 transf_exp_main_modified.py --output_dir ./tmp/ --overwrite_output_dir --do_train --do_eval --validate_every 10 --logging_steps 20 --save_steps 0 --data_path $DATA_PATH --features_schema_path $FEATURE_SCHEMA_PATH --fp16 --data_loader_engine merlin --start_time_window_index 1 --final_time_window_index 2 --time_window_folder_pad_digits 4 --model_type xlnet --loss_type cross_entropy --per_device_eval_batch_size 128 --similarity_type concat_mlp --tf_out_activation tanh --inp_merge mlp --learning_rate_warmup_steps 0 --learning_rate_schedule linear_with_warmup --hidden_act gelu --num_train_epochs $NUM_EPOCHS --dataloader_drop_last --compute_metrics_each_n_steps 1 --session_seq_length_max 20 --eval_on_last_item_seq_only --mf_constrained_embeddings --layer_norm_featurewise --attn_type bi --mlm --per_device_train_batch_size 128 --learning_rate 0.0006667377132554976 --dropout 0.0 --input_dropout 0.1 --weight_decay 3.910060265627374e-05 --d_model 192 --item_embedding_dim 448 --n_layer 3 --n_head 16 --label_smoothing 0.0 --stochastic_shared_embeddings_replacement_prob 0.1 --item_id_embeddings_init_std 0.11 --other_embeddings_init_std 0.02 --mlm_probability 0.30000000000000004 --eval_on_test_set --seed 100 --report_to none\n", + "`" + ] + }, + { + "cell_type": "markdown", + "id": "7010a6a1", + "metadata": {}, + "source": [ + "And here are the logs and the results, maybe reproducing that is something that we could work towards (the XLNet with MLM is what I used for benchmarking T4Rec, starting with it would be great)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "54d6ef61", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Models\n", + " * [new branch] ci/horovod -> origin/ci/horovod\n", + " * [new branch] codespell_fix -> origin/codespell_fix\n", + " 16fb4149..b1c10317 fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", + " 95462360..a69adf75 gh-pages -> origin/gh-pages\n", + " * [new branch] mtl_example -> origin/mtl_example\n", + " cb431a8a..b90e9a1b release-22.12 -> origin/release-22.12\n", + " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", + " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", + " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", + " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", + " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", + " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", + " + 169f3df5...06eecddd tf/output-block -> origin/tf/output-block (forced update)\n", + " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", + " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", + " + 0a65d603...9f53e8ff update_07 -> origin/update_07 (forced update)\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + "error: Your local changes to the following files would be overwritten by checkout:\n", + "\tT4Rec_repro/reproducing_T4Rec_results.ipynb\n", + "Please commit your changes or stash them before you switch branches.\n", + "Aborting\n", + "Warning: you are leaving 2 commits behind, not connected to\n", + "any of your branches:\n", + "\n", + " e284ebd Merge branch 'main' of https://github.com/NVIDIA-Merlin/core into HEAD\n", + " b2372e4 Merge branch 'main' of https://github.com/NVIDIA-Merlin/core into HEAD\n", + "\n", + "If you want to keep them by creating a new branch, this may be a good time\n", + "to do so with:\n", + "\n", + " git branch e284ebd\n", + "\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is behind 'origin/main' by 22 commits, and can be fast-forwarded.\n", + " (use \"git pull\" to update your local branch)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/core\n", + " * branch main -> FETCH_HEAD\n", + " 5dbafa68..aad0c874 main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating cd96ca5f..aad0c874\n", + "Fast-forward\n", + " .github/release-drafter.yml | 44 +--\n", + " .github/workflows/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", + " .../ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", + " .../workflows/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", + " .../workflows/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/workflows/ISSUE_TEMPLATE/task.md | 5 +-\n", + " .github/workflows/cpu-ci.yml | 145 +++-------\n", + " .github/workflows/cpu-models.yml | 52 ++--\n", + " .github/workflows/cpu-nvtabular.yml | 52 ++--\n", + " .github/workflows/cpu-packages.yml | 126 +++++++++\n", + " .github/workflows/cpu-systems.yml | 52 ++--\n", + " .github/workflows/docs-preview-pr.yaml | 2 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 30 +-\n", + " .github/workflows/release-drafter.yaml | 2 +-\n", + " .pre-commit-config.yaml | 55 ++--\n", + " .prettierignore | 2 +\n", + " CLA.md | 9 +-\n", + " CONTRIBUTING.md | 28 +-\n", + " README.md | 68 ++---\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " docs/README.md | 49 ++--\n", + " merlin/core/compat.py | 59 +++-\n", + " merlin/core/dispatch.py | 51 +++-\n", + " merlin/dag/__init__.py | 1 +\n", + " merlin/dag/base_operator.py | 30 +-\n", + " merlin/dag/dictarray.py | 3 +-\n", + " merlin/dag/executors.py | 107 ++++---\n", + " merlin/dag/graph.py | 20 ++\n", + " merlin/dag/node.py | 2 +-\n", + " merlin/dag/utils.py | 69 +++++\n", + " merlin/dispatch/lazy.py | 152 ++++++++++\n", + " merlin/dtypes/__init__.py | 60 ++++\n", + " merlin/dtypes/aliases.py | 52 ++++\n", + " merlin/dtypes/base.py | 178 ++++++++++++\n", + " merlin/dtypes/mapping.py | 173 ++++++++++++\n", + " merlin/dtypes/mappings/__init__.py | 18 ++\n", + " merlin/dtypes/mappings/cudf.py | 57 ++++\n", + " merlin/dtypes/mappings/numpy.py | 52 ++++\n", + " merlin/dtypes/mappings/pandas.py | 38 +++\n", + " merlin/dtypes/mappings/python.py | 31 ++\n", + " merlin/dtypes/mappings/tf.py | 52 ++++\n", + " merlin/dtypes/mappings/torch.py | 43 +++\n", + " merlin/dtypes/mappings/triton.py | 53 ++++\n", + " merlin/dtypes/registry.py | 142 ++++++++++\n", + " merlin/dtypes/shape.py | 183 ++++++++++++\n", + " merlin/io/avro.py | 4 -\n", + " merlin/io/csv.py | 1 -\n", + " merlin/io/dask.py | 6 +-\n", + " merlin/io/dataset.py | 19 +-\n", + " merlin/io/fsspec_utils.py | 8 +-\n", + " merlin/io/parquet.py | 8 -\n", + " merlin/io/writer.py | 1 -\n", + " merlin/schema/io/tensorflow_metadata.py | 86 +++---\n", + " merlin/schema/schema.py | 298 +++++++++++---------\n", + " merlin/table/__init__.py | 24 ++\n", + " merlin/table/conversions.py | 135 +++++++++\n", + " merlin/table/cupy_column.py | 92 ++++++\n", + " merlin/table/numpy_column.py | 100 +++++++\n", + " merlin/table/tensor_column.py | 217 ++++++++++++++\n", + " merlin/table/tensor_table.py | 222 +++++++++++++++\n", + " merlin/table/tensorflow_column.py | 159 +++++++++++\n", + " merlin/table/torch_column.py | 124 ++++++++\n", + " requirements.txt | 5 +-\n", + " tests/conftest.py | 16 +-\n", + " tests/unit/core/test_dispatch.py | 19 ++\n", + " tests/unit/core/test_version.py | 4 +\n", + " tests/unit/dag/test_dag_utils.py | 31 ++\n", + " tests/unit/dispatch/test_lazy_dispatch.py | 61 ++++\n", + " tests/unit/dtypes/test_module.py | 48 ++++\n", + " tests/unit/dtypes/test_shape.py | 222 +++++++++++++++\n", + " tests/unit/io/test_io.py | 27 +-\n", + " tests/unit/schema/test_column_schemas.py | 142 ++++++----\n", + " tests/unit/schema/test_schema.py | 7 +-\n", + " tests/unit/schema/test_schema_io.py | 27 +-\n", + " tests/unit/table/test_convert_column.py | 75 +++++\n", + " tests/unit/table/test_tensor_column.py | 186 ++++++++++++\n", + " tests/unit/table/test_tensor_table.py | 311 +++++++++++++++++++++\n", + " tests/unit/utils/test_utils.py | 3 -\n", + " tox.ini | 4 +\n", + " 80 files changed, 4413 insertions(+), 672 deletions(-)\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .prettierignore\n", + " create mode 100644 merlin/dag/utils.py\n", + " create mode 100644 merlin/dispatch/lazy.py\n", + " create mode 100644 merlin/dtypes/__init__.py\n", + " create mode 100644 merlin/dtypes/aliases.py\n", + " create mode 100644 merlin/dtypes/base.py\n", + " create mode 100644 merlin/dtypes/mapping.py\n", + " create mode 100644 merlin/dtypes/mappings/__init__.py\n", + " create mode 100644 merlin/dtypes/mappings/cudf.py\n", + " create mode 100644 merlin/dtypes/mappings/numpy.py\n", + " create mode 100644 merlin/dtypes/mappings/pandas.py\n", + " create mode 100644 merlin/dtypes/mappings/python.py\n", + " create mode 100644 merlin/dtypes/mappings/tf.py\n", + " create mode 100644 merlin/dtypes/mappings/torch.py\n", + " create mode 100644 merlin/dtypes/mappings/triton.py\n", + " create mode 100644 merlin/dtypes/registry.py\n", + " create mode 100644 merlin/dtypes/shape.py\n", + " create mode 100644 merlin/table/__init__.py\n", + " create mode 100644 merlin/table/conversions.py\n", + " create mode 100644 merlin/table/cupy_column.py\n", + " create mode 100644 merlin/table/numpy_column.py\n", + " create mode 100644 merlin/table/tensor_column.py\n", + " create mode 100644 merlin/table/tensor_table.py\n", + " create mode 100644 merlin/table/tensorflow_column.py\n", + " create mode 100644 merlin/table/torch_column.py\n", + " create mode 100644 tests/unit/dag/test_dag_utils.py\n", + " create mode 100644 tests/unit/dispatch/test_lazy_dispatch.py\n", + " create mode 100644 tests/unit/dtypes/test_module.py\n", + " create mode 100644 tests/unit/dtypes/test_shape.py\n", + " create mode 100644 tests/unit/table/test_convert_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_table.py\n", + "Processing /core\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (0.56.4)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.2.5)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.5.0)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.3.5)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (8.0.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (11.4.1)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.12.0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: Operation cancelled by user\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error while terminating subprocess (pid=1791146): \n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", + "cd /core && git checkout main && git pull origin main && pip install .\n", + "cd /nvtabular && git checkout main && git pull origin main && pip install .\n", + "cd /systems && git checkout main && git pull origin main && pip install .\n", + "cd /dataloader && git checkout main && git pull origin main && pip install .\n", + "pip install matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "152aee86", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "rm -rf ecom_dataset\n", + "mkdir -p ecom_dataset\n", + "\n", + "pip install gdown\n", + "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "apt-get update -y\n", + "apt-get install unzip -y\n", + "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-09 18:01:08.237320: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-09 18:01:17.553146: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-09 18:01:17.554189: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24570 MB memory: -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:65:00.0, compute capability: 8.6\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# Set the hyperparams similar to T4Rec benchmark script\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "weight_decay = 3.910060265627374e-05 \n", + "n_epoch = 5\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3\n", + "input_dropout = 0.1\n", + "initializer_range = 0.02\n", + "layer_norm_eps = 1e-12\n", + "dropout = 0\n", + "mlm_prob = 0.3\n", + "eval_on_test_set = True\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e181e9c0", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "if eval_on_test_set: \n", + " valid = Dataset(\"ecom_dataset/0002/test.parquet\")\n", + "else: \n", + " valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d9f121dc", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cec55f74", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.models.tf.core.tabular import TabularBlock\n", + "\n", + "# Create equivalent class of T4Rec's TabularDroupout\n", + "class TabularDropout(TabularBlock):\n", + " \"\"\"\n", + " Applies dropout transformation.\n", + " \"\"\"\n", + "\n", + " def __init__(self, dropout_rate=0.0):\n", + " super().__init__()\n", + " self.dropout = tf.keras.layers.Dropout(dropout_rate)\n", + "\n", + " def forward(self, inputs, **kwargs):\n", + " outputs = {key: self.dropout(val) for key, val in inputs.items()} # type: ignore\n", + " return outputs\n", + "\n", + "# Create equivalent class of T4Rec's 'layer-norm'\n", + "class TabularNorm(TabularBlock):\n", + " \"\"\"\n", + " Applies layr-norm transformation.\n", + " \"\"\"\n", + "\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.layer_norm = tf.keras.layers.LayerNormalization()\n", + "\n", + " def forward(self, inputs, **kwargs):\n", + " outputs = {key: self.layer_norm(val) for key, val in inputs.items()} # type: ignore\n", + " return outputs\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "72a286ba", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + "schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + ").to_merlin_schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "71f59155", + "metadata": {}, + "outputs": [], + "source": [ + "# we only use the item-id as input to the model\n", + "schema_model = schema.select_by_tag(Tags.ITEM_ID)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d07aa5f1", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.models.tf import InputBlockV2\n", + "import tensorflow as tf\n", + "input_block = InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " #This is equivalent of torch.nn.init.normal_\n", + " embeddings_initializer=tf.keras.initializers.RandomNormal(\n", + " mean=0.0,\n", + " stddev=item_id_embeddings_init_std\n", + " ),\n", + " sequence_combiner=None,\n", + " ),\n", + " #pre=mm.StochasticSwapNoise(schema_model, replacement_prob=0.1) # This is not working with sequences transforms\n", + " # we apply dropout and layer-norm as post-processing steps before aggregation\n", + " post=TabularDropout(input_dropout).connect(TabularNorm())\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "74b6d3d9", + "metadata": {}, + "outputs": [], + "source": [ + "# projet the output of the input block into the same dimension as d_model (equivalent of d_output in T4Rec)\n", + "mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "14c35b2a", + "metadata": {}, + "outputs": [], + "source": [ + "# set the xlnet block with the necessary parameters\n", + "xlnet_block = mm.XLNetBlock(\n", + " d_model=d_model, \n", + " n_head=n_head, \n", + " n_layer=n_layer, \n", + " attn_type='bi', \n", + " hidden_act='gelu', \n", + " initializer_range=initializer_range, \n", + " layer_norm_eps=layer_norm_eps, \n", + " dropout=0\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "866f3249", + "metadata": {}, + "outputs": [], + "source": [ + "dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4beb1a9f", + "metadata": {}, + "outputs": [], + "source": [ + "# Project the output of the transformer to the same dimension as `item_embedding_dim`\n", + "# this is needed for weight-tying\n", + "mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "064ea5ec", + "metadata": {}, + "outputs": [], + "source": [ + "# set next-item prediction task with weight tying option by providing the embeddings table of the `item-id` \n", + "# as the `to_call` layer\n", + "prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "6c008e16", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-09 18:01:20.111251: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n" + ] + } + ], + "source": [ + "# Create the end-to-end Keras model\n", + "model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8b89e82b", + "metadata": {}, + "outputs": [], + "source": [ + "# Implements optimizer with linear decay of the learning rate. This is what T4Rec pytorch-trainer is using. \n", + "\n", + "# For that we will use the custom optimizer `AdamWeightDecay` provided by HuggingFace\n", + "from transformers.optimization_tf import AdamWeightDecay\n", + "\n", + "\n", + "num_warmup_steps = 0\n", + "# compute the total steps in the training iteration:\n", + "import math\n", + "steps_per_epoch = math.floor(train.compute().shape[0] / batch_size)\n", + "total_step = steps_per_epoch * n_epoch\n", + "\n", + "# Set the linear-decay learning scheduler\n", + "lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(\n", + " initial_learning_rate=learning_rate,\n", + " decay_steps=total_step,\n", + " power=1,\n", + ")\n", + "# Set the optimizer with the `weight_decay` rate\n", + "if weight_decay > 0.0:\n", + " optimizer = AdamWeightDecay(\n", + " learning_rate=lr_schedule,\n", + " weight_decay_rate=weight_decay,\n", + " )\n", + "else: \n", + " optimizer = AdamWeightDecay(learning_rate=lr_schedule)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "d84a30d3", + "metadata": {}, + "outputs": [], + "source": [ + "# compile the model with ranking metrics computed at 10 and 20 thresholds\n", + "model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10, 20])\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "d8cc8e14", + "metadata": {}, + "outputs": [], + "source": [ + "# Align the schema of the dataloader and the schema used by the model\n", + "train.schema = schema_model" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "e7474131", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-09 18:01:47.567695: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_5/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_5/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_5/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_5/xl_net_block/sequential_block_8/replace_masked_embeddings/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model/sequential_block_5/xl_net_block/sequential_block_8/replace_masked_embeddings/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_5/xl_net_block/sequential_block_8/replace_masked_embeddings/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_5/xl_net_block/sequential_block_8/replace_masked_embeddings/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_5/xl_net_block/sequential_block_8/replace_masked_embeddings/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_5/xl_net_block/sequential_block_8/replace_masked_embeddings/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-09 18:02:45.911807: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model/sequential_block_5/xl_net_block/sequential_block_8/replace_masked_embeddings/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 202s 204ms/step - loss: 13.5418 - recall_at_10: 0.0333 - mrr_at_10: 0.0120 - ndcg_at_10: 0.0170 - map_at_10: 0.0120 - precision_at_10: 0.0033 - recall_at_20: 0.0524 - mrr_at_20: 0.0133 - ndcg_at_20: 0.0218 - map_at_20: 0.0133 - precision_at_20: 0.0026 - regularization_loss: 0.0000e+00 - loss_batch: 13.5668\n", + "Epoch 2/5\n", + "677/677 [==============================] - 135s 200ms/step - loss: 12.3608 - recall_at_10: 0.0430 - mrr_at_10: 0.0149 - ndcg_at_10: 0.0214 - map_at_10: 0.0149 - precision_at_10: 0.0043 - recall_at_20: 0.0656 - mrr_at_20: 0.0164 - ndcg_at_20: 0.0271 - map_at_20: 0.0164 - precision_at_20: 0.0033 - regularization_loss: 0.0000e+00 - loss_batch: 12.3602\n", + "Epoch 3/5\n", + "677/677 [==============================] - 139s 205ms/step - loss: 12.0906 - recall_at_10: 0.0435 - mrr_at_10: 0.0154 - ndcg_at_10: 0.0219 - map_at_10: 0.0154 - precision_at_10: 0.0043 - recall_at_20: 0.0672 - mrr_at_20: 0.0170 - ndcg_at_20: 0.0279 - map_at_20: 0.0170 - precision_at_20: 0.0034 - regularization_loss: 0.0000e+00 - loss_batch: 12.0902\n", + "Epoch 4/5\n", + "677/677 [==============================] - 134s 197ms/step - loss: 11.8980 - recall_at_10: 0.0423 - mrr_at_10: 0.0158 - ndcg_at_10: 0.0220 - map_at_10: 0.0158 - precision_at_10: 0.0042 - recall_at_20: 0.0648 - mrr_at_20: 0.0173 - ndcg_at_20: 0.0276 - map_at_20: 0.0173 - precision_at_20: 0.0032 - regularization_loss: 0.0000e+00 - loss_batch: 11.8978\n", + "Epoch 5/5\n", + "677/677 [==============================] - 136s 200ms/step - loss: 11.7795 - recall_at_10: 0.0421 - mrr_at_10: 0.0154 - ndcg_at_10: 0.0216 - map_at_10: 0.0154 - precision_at_10: 0.0042 - recall_at_20: 0.0659 - mrr_at_20: 0.0170 - ndcg_at_20: 0.0276 - map_at_20: 0.0170 - precision_at_20: 0.0033 - regularization_loss: 0.0000e+00 - loss_batch: 11.7790\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Launch a training iteration with `n_epoch` epochs\n", + "# For mlm, we need to use `SequenceMaskRandom` and specify the masking probability\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block, masking_prob=mlm_prob)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "7bf839e3", + "metadata": {}, + "outputs": [], + "source": [ + "# Evaluate using `SequenceMaskLast` to mask the last item only\n", + "valid.schema = schema_model\n", + "predict_last = mm.SequenceMaskLast(schema=valid.schema, target=target, transformer=xlnet_block)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "15ccc448", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "2023-03-09 18:28:21.499587: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model/sequential_block_5/xl_net_block/sequential_block_8/replace_masked_embeddings/RaggedWhere/Assert/AssertGuard/branch_executed/_23\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 34s 128ms/step - loss: 11.7879 - recall_at_10: 0.0458 - mrr_at_10: 0.0155 - ndcg_at_10: 0.0225 - map_at_10: 0.0155 - precision_at_10: 0.0046 - recall_at_20: 0.0711 - mrr_at_20: 0.0171 - ndcg_at_20: 0.0288 - map_at_20: 0.0171 - precision_at_20: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 11.7897\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 11.787938117980957,\n", + " 'recall_at_10': 0.0489937923848629,\n", + " 'mrr_at_10': 0.017020391300320625,\n", + " 'ndcg_at_10': 0.024413621053099632,\n", + " 'map_at_10': 0.017020391300320625,\n", + " 'precision_at_10': 0.004899379797279835,\n", + " 'recall_at_20': 0.07645288854837418,\n", + " 'mrr_at_20': 0.018829816952347755,\n", + " 'ndcg_at_20': 0.03123626857995987,\n", + " 'map_at_20': 0.018829816952347755,\n", + " 'precision_at_20': 0.003822644241154194,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 11.866037368774414}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "17fd65b9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'eval_/next-item/ndcg_at_10': 0.08305524289608002,\n", + " 'eval_/next-item/ndcg_at_20': 0.09936655312776566,\n", + " 'eval_/next-item/recall_at_10': 0.15436746180057526,\n", + " 'eval_/next-item/recall_at_20': 0.2190323770046234,\n", + " 'eval_/loss': 8.334789276123047}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{'eval_/next-item/ndcg_at_10': 0.08305524289608002, 'eval_/next-item/ndcg_at_20': 0.09936655312776566, 'eval_/next-item/recall_at_10': 0.15436746180057526, 'eval_/next-item/recall_at_20': 0.2190323770046234, 'eval_/loss': 8.334789276123047}" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 1881cb1a20cf059c3cae14c97b9acbd0fcfede2e Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Mon, 13 Mar 2023 14:22:06 +1000 Subject: [PATCH 06/15] update --- T4Rec_repro/train_runs/clm_item_id_min.ipynb | 640 +++++++++++++++++++ T4Rec_repro/train_runs/mlm_item_id_min.ipynb | 640 +++++++++++++++++++ 2 files changed, 1280 insertions(+) create mode 100644 T4Rec_repro/train_runs/clm_item_id_min.ipynb create mode 100644 T4Rec_repro/train_runs/mlm_item_id_min.ipynb diff --git a/T4Rec_repro/train_runs/clm_item_id_min.ipynb b/T4Rec_repro/train_runs/clm_item_id_min.ipynb new file mode 100644 index 0000000000..ff5eabbe86 --- /dev/null +++ b/T4Rec_repro/train_runs/clm_item_id_min.ipynb @@ -0,0 +1,640 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-10 13:19:41.332031: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-10 13:19:43.702598: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:43.703049: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:43.703227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-10 13:19:44.148806: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-10 13:19:44.149822: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.150030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.150185: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.891194: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.891419: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.891582: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.891696: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-10 13:19:44.891761: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of starter hyperparams\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " '../',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + "\n", + " input_block = mm.InputBlockV2(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " embeddings=mm.Embeddings(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " sequence_combiner=None,\n", + " dim=d_model\n", + " ),\n", + " # pre=mm.StochasticSwapNoise()\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name('sess_pid_seq')\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "markdown", + "id": "46b9f788", + "metadata": {}, + "source": [ + "# Run 1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e7474131", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-10 13:19:51.258201: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 83s 110ms/step - loss: 8.9114 - recall_at_10: 0.0409 - mrr_at_10: 0.0151 - ndcg_at_10: 0.0211 - map_at_10: 0.0151 - precision_at_10: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.9101\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 5.7403 - recall_at_10: 0.2994 - mrr_at_10: 0.1857 - ndcg_at_10: 0.2126 - map_at_10: 0.1857 - precision_at_10: 0.0299 - regularization_loss: 0.0000e+00 - loss_batch: 5.7358\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.8640 - recall_at_10: 0.5785 - mrr_at_10: 0.4536 - ndcg_at_10: 0.4835 - map_at_10: 0.4536 - precision_at_10: 0.0579 - regularization_loss: 0.0000e+00 - loss_batch: 3.8700\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.0672 - recall_at_10: 0.6808 - mrr_at_10: 0.5664 - ndcg_at_10: 0.5939 - map_at_10: 0.5664 - precision_at_10: 0.0681 - regularization_loss: 0.0000e+00 - loss_batch: 3.0720\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 2.6008 - recall_at_10: 0.7369 - mrr_at_10: 0.6298 - ndcg_at_10: 0.6556 - map_at_10: 0.6298 - precision_at_10: 0.0737 - regularization_loss: 0.0000e+00 - loss_batch: 2.6062\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.7419 - recall_at_10: 0.1679 - mrr_at_10: 0.0639 - ndcg_at_10: 0.0881 - map_at_10: 0.0639 - precision_at_10: 0.0168 - regularization_loss: 0.0000e+00 - loss_batch: 8.7705\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.741933822631836,\n", + " 'recall_at_10': 0.16701146960258484,\n", + " 'mrr_at_10': 0.06411589682102203,\n", + " 'ndcg_at_10': 0.08810190856456757,\n", + " 'map_at_10': 0.06411589682102203,\n", + " 'precision_at_10': 0.016701148822903633,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.003721237182617}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a070554f", + "metadata": {}, + "source": [ + "# Run 2" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "566e2f90", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 9.0406 - recall_at_10: 0.0356 - mrr_at_10: 0.0130 - ndcg_at_10: 0.0183 - map_at_10: 0.0130 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.0326\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.0845 - recall_at_10: 0.2649 - mrr_at_10: 0.1565 - ndcg_at_10: 0.1821 - map_at_10: 0.1565 - precision_at_10: 0.0265 - regularization_loss: 0.0000e+00 - loss_batch: 6.0807\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 4.2293 - recall_at_10: 0.5184 - mrr_at_10: 0.3883 - ndcg_at_10: 0.4194 - map_at_10: 0.3883 - precision_at_10: 0.0518 - regularization_loss: 0.0000e+00 - loss_batch: 4.2323\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.3441 - recall_at_10: 0.6468 - mrr_at_10: 0.5229 - ndcg_at_10: 0.5526 - map_at_10: 0.5229 - precision_at_10: 0.0647 - regularization_loss: 0.0000e+00 - loss_batch: 3.3486\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 2.8789 - recall_at_10: 0.7051 - mrr_at_10: 0.5871 - ndcg_at_10: 0.6155 - map_at_10: 0.5871 - precision_at_10: 0.0705 - regularization_loss: 0.0000e+00 - loss_batch: 2.8854\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.8279 - recall_at_10: 0.1584 - mrr_at_10: 0.0624 - ndcg_at_10: 0.0847 - map_at_10: 0.0624 - precision_at_10: 0.0158 - regularization_loss: 0.0000e+00 - loss_batch: 8.8674\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.827858924865723,\n", + " 'recall_at_10': 0.15591499209403992,\n", + " 'mrr_at_10': 0.06090494617819786,\n", + " 'ndcg_at_10': 0.08297329396009445,\n", + " 'map_at_10': 0.06090494617819786,\n", + " 'precision_at_10': 0.01559150218963623,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.57563304901123}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8c785bb1", + "metadata": {}, + "source": [ + "# Run 3" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7cc9685e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 8.9635 - recall_at_10: 0.0396 - mrr_at_10: 0.0146 - ndcg_at_10: 0.0204 - map_at_10: 0.0146 - precision_at_10: 0.0040 - regularization_loss: 0.0000e+00 - loss_batch: 8.9589\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.2358 - recall_at_10: 0.2417 - mrr_at_10: 0.1391 - ndcg_at_10: 0.1633 - map_at_10: 0.1391 - precision_at_10: 0.0242 - regularization_loss: 0.0000e+00 - loss_batch: 6.2350\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 4.4725 - recall_at_10: 0.4996 - mrr_at_10: 0.3675 - ndcg_at_10: 0.3991 - map_at_10: 0.3675 - precision_at_10: 0.0500 - regularization_loss: 0.0000e+00 - loss_batch: 4.4748\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.6489 - recall_at_10: 0.6128 - mrr_at_10: 0.4839 - ndcg_at_10: 0.5148 - map_at_10: 0.4839 - precision_at_10: 0.0613 - regularization_loss: 0.0000e+00 - loss_batch: 3.6543\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.2122 - recall_at_10: 0.6626 - mrr_at_10: 0.5353 - ndcg_at_10: 0.5659 - map_at_10: 0.5353 - precision_at_10: 0.0663 - regularization_loss: 0.0000e+00 - loss_batch: 3.2164\n", + "84/84 [==============================] - 7s 39ms/step - loss: 8.8321 - recall_at_10: 0.1434 - mrr_at_10: 0.0582 - ndcg_at_10: 0.0781 - map_at_10: 0.0582 - precision_at_10: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 8.8607\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.832069396972656,\n", + " 'recall_at_10': 0.1426556259393692,\n", + " 'mrr_at_10': 0.05639006569981575,\n", + " 'ndcg_at_10': 0.07650619745254517,\n", + " 'map_at_10': 0.05639006569981575,\n", + " 'precision_at_10': 0.01426556333899498,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.100401878356934}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7b90a1c5", + "metadata": {}, + "source": [ + "# Run 4" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "66f1dbfe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 9.0739 - recall_at_10: 0.0361 - mrr_at_10: 0.0125 - ndcg_at_10: 0.0180 - map_at_10: 0.0125 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.0756\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.4023 - recall_at_10: 0.2372 - mrr_at_10: 0.1349 - ndcg_at_10: 0.1591 - map_at_10: 0.1349 - precision_at_10: 0.0237 - regularization_loss: 0.0000e+00 - loss_batch: 6.4020\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 4.7934 - recall_at_10: 0.4544 - mrr_at_10: 0.3194 - ndcg_at_10: 0.3516 - map_at_10: 0.3194 - precision_at_10: 0.0454 - regularization_loss: 0.0000e+00 - loss_batch: 4.7958\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.7131 - recall_at_10: 0.5913 - mrr_at_10: 0.4595 - ndcg_at_10: 0.4911 - map_at_10: 0.4595 - precision_at_10: 0.0591 - regularization_loss: 0.0000e+00 - loss_batch: 3.7160\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.0900 - recall_at_10: 0.6752 - mrr_at_10: 0.5537 - ndcg_at_10: 0.5829 - map_at_10: 0.5537 - precision_at_10: 0.0675 - regularization_loss: 0.0000e+00 - loss_batch: 3.0945\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.9225 - recall_at_10: 0.1426 - mrr_at_10: 0.0581 - ndcg_at_10: 0.0778 - map_at_10: 0.0581 - precision_at_10: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 8.9683\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.922541618347168,\n", + " 'recall_at_10': 0.14425428211688995,\n", + " 'mrr_at_10': 0.057682257145643234,\n", + " 'ndcg_at_10': 0.077837273478508,\n", + " 'map_at_10': 0.057682257145643234,\n", + " 'precision_at_10': 0.014425428584218025,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.947548866271973}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a1734c21", + "metadata": {}, + "source": [ + "# Run 5" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "03b380f7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 9.0454 - recall_at_10: 0.0381 - mrr_at_10: 0.0139 - ndcg_at_10: 0.0195 - map_at_10: 0.0139 - precision_at_10: 0.0038 - regularization_loss: 0.0000e+00 - loss_batch: 9.0386\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.3489 - recall_at_10: 0.2430 - mrr_at_10: 0.1385 - ndcg_at_10: 0.1632 - map_at_10: 0.1385 - precision_at_10: 0.0243 - regularization_loss: 0.0000e+00 - loss_batch: 6.3435\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 4.7853 - recall_at_10: 0.4602 - mrr_at_10: 0.3227 - ndcg_at_10: 0.3555 - map_at_10: 0.3227 - precision_at_10: 0.0460 - regularization_loss: 0.0000e+00 - loss_batch: 4.7868\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.6873 - recall_at_10: 0.6026 - mrr_at_10: 0.4710 - ndcg_at_10: 0.5025 - map_at_10: 0.4710 - precision_at_10: 0.0603 - regularization_loss: 0.0000e+00 - loss_batch: 3.6936\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.0298 - recall_at_10: 0.6856 - mrr_at_10: 0.5650 - ndcg_at_10: 0.5940 - map_at_10: 0.5650 - precision_at_10: 0.0686 - regularization_loss: 0.0000e+00 - loss_batch: 3.0363\n", + "84/84 [==============================] - 8s 40ms/step - loss: 8.6711 - recall_at_10: 0.1505 - mrr_at_10: 0.0595 - ndcg_at_10: 0.0807 - map_at_10: 0.0595 - precision_at_10: 0.0151 - regularization_loss: 0.0000e+00 - loss_batch: 8.6999\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.671070098876953,\n", + " 'recall_at_10': 0.15074290335178375,\n", + " 'mrr_at_10': 0.05898994952440262,\n", + " 'ndcg_at_10': 0.08035662025213242,\n", + " 'map_at_10': 0.05898994952440262,\n", + " 'precision_at_10': 0.015074292197823524,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.946744918823242}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fe3b07c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/T4Rec_repro/train_runs/mlm_item_id_min.ipynb b/T4Rec_repro/train_runs/mlm_item_id_min.ipynb new file mode 100644 index 0000000000..df90cc786b --- /dev/null +++ b/T4Rec_repro/train_runs/mlm_item_id_min.ipynb @@ -0,0 +1,640 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-10 13:57:07.721314: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-10 13:57:10.129984: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:10.130437: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:10.130617: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-10 13:57:10.581209: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-10 13:57:10.582030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:10.582283: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:10.582439: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:11.330242: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:11.330454: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:11.330615: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:11.330728: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-10 13:57:11.330790: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of starter hyperparams\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " '../',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + "\n", + " input_block = mm.InputBlockV2(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " embeddings=mm.Embeddings(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " sequence_combiner=None,\n", + " dim=d_model\n", + " ),\n", + " # pre=mm.StochasticSwapNoise()\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name('sess_pid_seq')\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "markdown", + "id": "78302207", + "metadata": {}, + "source": [ + "# Run 1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e7474131", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-10 13:57:17.631317: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 82s 110ms/step - loss: 8.8265 - recall_at_10: 0.0432 - mrr_at_10: 0.0166 - ndcg_at_10: 0.0228 - map_at_10: 0.0166 - precision_at_10: 0.0043 - regularization_loss: 0.0000e+00 - loss_batch: 8.8191\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 5.8014 - recall_at_10: 0.3091 - mrr_at_10: 0.1936 - ndcg_at_10: 0.2210 - map_at_10: 0.1936 - precision_at_10: 0.0309 - regularization_loss: 0.0000e+00 - loss_batch: 5.8019\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 4.1718 - recall_at_10: 0.5397 - mrr_at_10: 0.4080 - ndcg_at_10: 0.4394 - map_at_10: 0.4080 - precision_at_10: 0.0540 - regularization_loss: 0.0000e+00 - loss_batch: 4.1734\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.2806 - recall_at_10: 0.6585 - mrr_at_10: 0.5362 - ndcg_at_10: 0.5656 - map_at_10: 0.5362 - precision_at_10: 0.0658 - regularization_loss: 0.0000e+00 - loss_batch: 3.2849\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 2.8188 - recall_at_10: 0.7125 - mrr_at_10: 0.6007 - ndcg_at_10: 0.6276 - map_at_10: 0.6007 - precision_at_10: 0.0712 - regularization_loss: 0.0000e+00 - loss_batch: 2.8246\n", + "84/84 [==============================] - 7s 39ms/step - loss: 8.8107 - recall_at_10: 0.1511 - mrr_at_10: 0.0623 - ndcg_at_10: 0.0829 - map_at_10: 0.0623 - precision_at_10: 0.0151 - regularization_loss: 0.0000e+00 - loss_batch: 8.8298\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.810694694519043,\n", + " 'recall_at_10': 0.15318788588047028,\n", + " 'mrr_at_10': 0.06131112948060036,\n", + " 'ndcg_at_10': 0.08268804848194122,\n", + " 'map_at_10': 0.06131112948060036,\n", + " 'precision_at_10': 0.015318789519369602,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.6568603515625}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3513d28a", + "metadata": {}, + "source": [ + "# Run 2" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2e624551", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 9.1281 - recall_at_10: 0.0359 - mrr_at_10: 0.0128 - ndcg_at_10: 0.0181 - map_at_10: 0.0128 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.1243\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.7038 - recall_at_10: 0.1907 - mrr_at_10: 0.1006 - ndcg_at_10: 0.1218 - map_at_10: 0.1006 - precision_at_10: 0.0191 - regularization_loss: 0.0000e+00 - loss_batch: 6.6971\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 4.9471 - recall_at_10: 0.4404 - mrr_at_10: 0.3077 - ndcg_at_10: 0.3393 - map_at_10: 0.3077 - precision_at_10: 0.0440 - regularization_loss: 0.0000e+00 - loss_batch: 4.9478\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.9842 - recall_at_10: 0.5607 - mrr_at_10: 0.4197 - ndcg_at_10: 0.4534 - map_at_10: 0.4197 - precision_at_10: 0.0561 - regularization_loss: 0.0000e+00 - loss_batch: 3.9878\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.3262 - recall_at_10: 0.6442 - mrr_at_10: 0.5172 - ndcg_at_10: 0.5477 - map_at_10: 0.5172 - precision_at_10: 0.0644 - regularization_loss: 0.0000e+00 - loss_batch: 3.3307\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.9716 - recall_at_10: 0.1277 - mrr_at_10: 0.0513 - ndcg_at_10: 0.0692 - map_at_10: 0.0513 - precision_at_10: 0.0128 - regularization_loss: 0.0000e+00 - loss_batch: 8.9960\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.971626281738281,\n", + " 'recall_at_10': 0.12817378342151642,\n", + " 'mrr_at_10': 0.05082216113805771,\n", + " 'ndcg_at_10': 0.06883765012025833,\n", + " 'map_at_10': 0.05082216113805771,\n", + " 'precision_at_10': 0.012817380018532276,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.049013137817383}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d42dea65", + "metadata": {}, + "source": [ + "# Run 3" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "97e7322c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 8.9307 - recall_at_10: 0.0396 - mrr_at_10: 0.0142 - ndcg_at_10: 0.0201 - map_at_10: 0.0142 - precision_at_10: 0.0040 - regularization_loss: 0.0000e+00 - loss_batch: 8.9265\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 5.9376 - recall_at_10: 0.2951 - mrr_at_10: 0.1842 - ndcg_at_10: 0.2105 - map_at_10: 0.1842 - precision_at_10: 0.0295 - regularization_loss: 0.0000e+00 - loss_batch: 5.9350\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 4.3616 - recall_at_10: 0.5184 - mrr_at_10: 0.3844 - ndcg_at_10: 0.4164 - map_at_10: 0.3844 - precision_at_10: 0.0518 - regularization_loss: 0.0000e+00 - loss_batch: 4.3657\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.4916 - recall_at_10: 0.6319 - mrr_at_10: 0.5057 - ndcg_at_10: 0.5359 - map_at_10: 0.5057 - precision_at_10: 0.0632 - regularization_loss: 0.0000e+00 - loss_batch: 3.4969\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.0021 - recall_at_10: 0.6889 - mrr_at_10: 0.5684 - ndcg_at_10: 0.5973 - map_at_10: 0.5684 - precision_at_10: 0.0689 - regularization_loss: 0.0000e+00 - loss_batch: 3.0072\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.7983 - recall_at_10: 0.1534 - mrr_at_10: 0.0599 - ndcg_at_10: 0.0816 - map_at_10: 0.0599 - precision_at_10: 0.0153 - regularization_loss: 0.0000e+00 - loss_batch: 8.8378\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.798320770263672,\n", + " 'recall_at_10': 0.15647922456264496,\n", + " 'mrr_at_10': 0.05985381081700325,\n", + " 'ndcg_at_10': 0.08228185027837753,\n", + " 'map_at_10': 0.05985381081700325,\n", + " 'precision_at_10': 0.015647921711206436,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.545936584472656}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "610da911", + "metadata": {}, + "source": [ + "# Run 4" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9e0f0891", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 8.8791 - recall_at_10: 0.0414 - mrr_at_10: 0.0155 - ndcg_at_10: 0.0215 - map_at_10: 0.0155 - precision_at_10: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.8746\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 5.5817 - recall_at_10: 0.3289 - mrr_at_10: 0.2127 - ndcg_at_10: 0.2403 - map_at_10: 0.2127 - precision_at_10: 0.0329 - regularization_loss: 0.0000e+00 - loss_batch: 5.5795\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.8784 - recall_at_10: 0.5761 - mrr_at_10: 0.4489 - ndcg_at_10: 0.4793 - map_at_10: 0.4489 - precision_at_10: 0.0576 - regularization_loss: 0.0000e+00 - loss_batch: 3.8833\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.0679 - recall_at_10: 0.6797 - mrr_at_10: 0.5656 - ndcg_at_10: 0.5930 - map_at_10: 0.5656 - precision_at_10: 0.0680 - regularization_loss: 0.0000e+00 - loss_batch: 3.0749\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 2.5693 - recall_at_10: 0.7397 - mrr_at_10: 0.6350 - ndcg_at_10: 0.6602 - map_at_10: 0.6350 - precision_at_10: 0.0740 - regularization_loss: 0.0000e+00 - loss_batch: 2.5767\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.6399 - recall_at_10: 0.1581 - mrr_at_10: 0.0621 - ndcg_at_10: 0.0844 - map_at_10: 0.0621 - precision_at_10: 0.0158 - regularization_loss: 0.0000e+00 - loss_batch: 8.6637\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.63992691040039,\n", + " 'recall_at_10': 0.1588301658630371,\n", + " 'mrr_at_10': 0.06323756277561188,\n", + " 'ndcg_at_10': 0.0855293795466423,\n", + " 'map_at_10': 0.06323756277561188,\n", + " 'precision_at_10': 0.01588302105665207,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.691500663757324}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6cffc60d", + "metadata": {}, + "source": [ + "# Run 5" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6981ff6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 8.9605 - recall_at_10: 0.0390 - mrr_at_10: 0.0141 - ndcg_at_10: 0.0199 - map_at_10: 0.0141 - precision_at_10: 0.0039 - regularization_loss: 0.0000e+00 - loss_batch: 8.9571\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.1194 - recall_at_10: 0.2618 - mrr_at_10: 0.1575 - ndcg_at_10: 0.1821 - map_at_10: 0.1575 - precision_at_10: 0.0262 - regularization_loss: 0.0000e+00 - loss_batch: 6.1199\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 4.4762 - recall_at_10: 0.5000 - mrr_at_10: 0.3647 - ndcg_at_10: 0.3970 - map_at_10: 0.3647 - precision_at_10: 0.0500 - regularization_loss: 0.0000e+00 - loss_batch: 4.4783\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.6222 - recall_at_10: 0.6166 - mrr_at_10: 0.4884 - ndcg_at_10: 0.5191 - map_at_10: 0.4884 - precision_at_10: 0.0617 - regularization_loss: 0.0000e+00 - loss_batch: 3.6248\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.1115 - recall_at_10: 0.6744 - mrr_at_10: 0.5505 - ndcg_at_10: 0.5803 - map_at_10: 0.5505 - precision_at_10: 0.0674 - regularization_loss: 0.0000e+00 - loss_batch: 3.1192\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.8991 - recall_at_10: 0.1457 - mrr_at_10: 0.0572 - ndcg_at_10: 0.0776 - map_at_10: 0.0572 - precision_at_10: 0.0146 - regularization_loss: 0.0000e+00 - loss_batch: 8.9238\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.899141311645508,\n", + " 'recall_at_10': 0.14763964712619781,\n", + " 'mrr_at_10': 0.05743885040283203,\n", + " 'ndcg_at_10': 0.07836496829986572,\n", + " 'map_at_10': 0.05743885040283203,\n", + " 'precision_at_10': 0.014763964340090752,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.991716384887695}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d195f16d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 3c73f8e7b84ae1bbf2b23f3d32ace43790542ee2 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Tue, 14 Mar 2023 17:17:53 +1000 Subject: [PATCH 07/15] update --- ...lm_item_id_min.ipynb => clm_item_id.ipynb} | 262 +++--- T4Rec_repro/train_runs/mlm_item_id.ipynb | 808 ++++++++++++++++++ T4Rec_repro/train_runs/mlm_item_id_min.ipynb | 640 -------------- 3 files changed, 949 insertions(+), 761 deletions(-) rename T4Rec_repro/train_runs/{clm_item_id_min.ipynb => clm_item_id.ipynb} (60%) create mode 100644 T4Rec_repro/train_runs/mlm_item_id.ipynb delete mode 100644 T4Rec_repro/train_runs/mlm_item_id_min.ipynb diff --git a/T4Rec_repro/train_runs/clm_item_id_min.ipynb b/T4Rec_repro/train_runs/clm_item_id.ipynb similarity index 60% rename from T4Rec_repro/train_runs/clm_item_id_min.ipynb rename to T4Rec_repro/train_runs/clm_item_id.ipynb index ff5eabbe86..afe12df6f5 100644 --- a/T4Rec_repro/train_runs/clm_item_id_min.ipynb +++ b/T4Rec_repro/train_runs/clm_item_id.ipynb @@ -10,7 +10,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-03-10 13:19:41.332031: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-03-13 20:54:36.957592: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, @@ -27,21 +27,21 @@ "text": [ "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-03-10 13:19:43.702598: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:43.703049: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:43.703227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.345898: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.346296: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.346453: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", - "2023-03-10 13:19:44.148806: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-03-13 20:54:39.777830: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-03-10 13:19:44.149822: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.150030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.150185: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.891194: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.891419: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.891582: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.891696: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-03-10 13:19:44.891761: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + "2023-03-13 20:54:39.778681: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.778886: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.779040: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:40.525430: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:40.525647: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:40.525808: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:40.525922: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-13 20:54:40.525983: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" ] } ], @@ -66,7 +66,7 @@ "outputs": [], "source": [ "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", - "valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" + "valid = Dataset(\"ecom_dataset/0002/test.parquet\")" ] }, { @@ -93,7 +93,9 @@ "n_head = 16\n", "batch_size = 128\n", "learning_rate = 0.0006667377132554976\n", - "n_epoch = 5" + "n_epoch = 5\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" ] }, { @@ -105,7 +107,7 @@ "source": [ "def get_model():\n", " mlp_block = mm.MLPBlock(\n", - " [128,d_model],\n", + " [d_model],\n", " activation='relu',\n", " no_activation_last_layer=True,\n", " )\n", @@ -118,16 +120,16 @@ " ).to_merlin_schema()\n", "\n", " train.schema = schema\n", - "\n", + " \n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", " input_block = mm.InputBlockV2(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " embeddings=mm.Embeddings(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " sequence_combiner=None,\n", - " dim=d_model\n", - " ),\n", - " # pre=mm.StochasticSwapNoise()\n", - " )\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", "\n", " train.schema = train.schema.select_by_name('sess_pid_seq')\n", "\n", @@ -140,7 +142,7 @@ " )\n", "\n", " mlp_block2 = mm.MLPBlock(\n", - " [128,d_model],\n", + " [item_embedding_dim],\n", " activation='relu',\n", " no_activation_last_layer=True,\n", " )\n", @@ -156,7 +158,7 @@ " )\n", "\n", " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", - " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10])\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", " )\n", " return model_transformer, xlnet_block" ] @@ -171,46 +173,23 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "e7474131", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch 1/5\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-03-10 13:19:51.258201: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_5/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", " warnings.warn(\n" ] }, @@ -218,33 +197,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 83s 110ms/step - loss: 8.9114 - recall_at_10: 0.0409 - mrr_at_10: 0.0151 - ndcg_at_10: 0.0211 - map_at_10: 0.0151 - precision_at_10: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.9101\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_5/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 150s 214ms/step - loss: 8.4001 - recall_at_20: 0.0827 - mrr_at_20: 0.0323 - ndcg_at_20: 0.0433 - map_at_20: 0.0323 - precision_at_20: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.3857\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 5.7403 - recall_at_10: 0.2994 - mrr_at_10: 0.1857 - ndcg_at_10: 0.2126 - map_at_10: 0.1857 - precision_at_10: 0.0299 - regularization_loss: 0.0000e+00 - loss_batch: 5.7358\n", + "677/677 [==============================] - 159s 234ms/step - loss: 3.2173 - recall_at_20: 0.6874 - mrr_at_20: 0.5632 - ndcg_at_20: 0.5917 - map_at_20: 0.5632 - precision_at_20: 0.0344 - regularization_loss: 0.0000e+00 - loss_batch: 3.2233\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.8640 - recall_at_10: 0.5785 - mrr_at_10: 0.4536 - ndcg_at_10: 0.4835 - map_at_10: 0.4536 - precision_at_10: 0.0579 - regularization_loss: 0.0000e+00 - loss_batch: 3.8700\n", + "677/677 [==============================] - 159s 235ms/step - loss: 2.0390 - recall_at_20: 0.8298 - mrr_at_20: 0.7342 - ndcg_at_20: 0.7561 - map_at_20: 0.7342 - precision_at_20: 0.0415 - regularization_loss: 0.0000e+00 - loss_batch: 2.0462\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.0672 - recall_at_10: 0.6808 - mrr_at_10: 0.5664 - ndcg_at_10: 0.5939 - map_at_10: 0.5664 - precision_at_10: 0.0681 - regularization_loss: 0.0000e+00 - loss_batch: 3.0720\n", + "677/677 [==============================] - 160s 235ms/step - loss: 1.5995 - recall_at_20: 0.8662 - mrr_at_20: 0.7825 - ndcg_at_20: 0.8016 - map_at_20: 0.7825 - precision_at_20: 0.0433 - regularization_loss: 0.0000e+00 - loss_batch: 1.6068\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 2.6008 - recall_at_10: 0.7369 - mrr_at_10: 0.6298 - ndcg_at_10: 0.6556 - map_at_10: 0.6298 - precision_at_10: 0.0737 - regularization_loss: 0.0000e+00 - loss_batch: 2.6062\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.7419 - recall_at_10: 0.1679 - mrr_at_10: 0.0639 - ndcg_at_10: 0.0881 - map_at_10: 0.0639 - precision_at_10: 0.0168 - regularization_loss: 0.0000e+00 - loss_batch: 8.7705\n" + "677/677 [==============================] - 160s 236ms/step - loss: 1.3356 - recall_at_20: 0.8955 - mrr_at_20: 0.8085 - ndcg_at_20: 0.8284 - map_at_20: 0.8085 - precision_at_20: 0.0448 - regularization_loss: 0.0000e+00 - loss_batch: 1.3422\n", + "84/84 [==============================] - 13s 89ms/step - loss: 8.9283 - recall_at_20: 0.3217 - mrr_at_20: 0.1205 - ndcg_at_20: 0.1651 - map_at_20: 0.1205 - precision_at_20: 0.0161 - regularization_loss: 0.0000e+00 - loss_batch: 8.9661\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.741933822631836,\n", - " 'recall_at_10': 0.16701146960258484,\n", - " 'mrr_at_10': 0.06411589682102203,\n", - " 'ndcg_at_10': 0.08810190856456757,\n", - " 'map_at_10': 0.06411589682102203,\n", - " 'precision_at_10': 0.016701148822903633,\n", + "{'loss': 8.928336143493652,\n", + " 'recall_at_20': 0.32508933544158936,\n", + " 'mrr_at_20': 0.11867032200098038,\n", + " 'ndcg_at_20': 0.16441309452056885,\n", + " 'map_at_20': 0.11867032200098038,\n", + " 'precision_at_20': 0.016254469752311707,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.003721237182617}" + " 'loss_batch': 10.598859786987305}" ] }, - "execution_count": 6, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -267,6 +246,47 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": 12, + "id": "117174c6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"model_5\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " sequential_block_29 (Sequen multiple 176252608 \n", + " tialBlock) \n", + " \n", + " sequential_block_30 (Sequen multiple 86464 \n", + " tialBlock) \n", + " \n", + " sess_pid_seq/categorical_ou multiple 175110449 \n", + " tput (CategoricalOutput) \n", + " \n", + " model_context_5 (ModelConte multiple 0 \n", + " xt) \n", + " \n", + " prepare_features_11 (Prepar multiple 0 \n", + " eFeatures) \n", + " \n", + "=================================================================\n", + "Total params: 176,729,074\n", + "Trainable params: 176,729,073\n", + "Non-trainable params: 1\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model_transformer.summary()" + ] + }, { "cell_type": "markdown", "id": "a070554f", @@ -310,29 +330,29 @@ "output_type": "stream", "text": [ "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 9.0406 - recall_at_10: 0.0356 - mrr_at_10: 0.0130 - ndcg_at_10: 0.0183 - map_at_10: 0.0130 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.0326\n", + "677/677 [==============================] - 105s 146ms/step - loss: 7.2092 - recall_at_20: 0.1524 - mrr_at_20: 0.0873 - ndcg_at_20: 0.1018 - map_at_20: 0.0873 - precision_at_20: 0.0076 - regularization_loss: 0.0000e+00 - loss_batch: 7.2024\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.0845 - recall_at_10: 0.2649 - mrr_at_10: 0.1565 - ndcg_at_10: 0.1821 - map_at_10: 0.1565 - precision_at_10: 0.0265 - regularization_loss: 0.0000e+00 - loss_batch: 6.0807\n", + "677/677 [==============================] - 100s 147ms/step - loss: 2.8315 - recall_at_20: 0.7410 - mrr_at_20: 0.6393 - ndcg_at_20: 0.6625 - map_at_20: 0.6393 - precision_at_20: 0.0370 - regularization_loss: 0.0000e+00 - loss_batch: 2.8376\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 4.2293 - recall_at_10: 0.5184 - mrr_at_10: 0.3883 - ndcg_at_10: 0.4194 - map_at_10: 0.3883 - precision_at_10: 0.0518 - regularization_loss: 0.0000e+00 - loss_batch: 4.2323\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.9715 - recall_at_20: 0.8361 - mrr_at_20: 0.7449 - ndcg_at_20: 0.7658 - map_at_20: 0.7449 - precision_at_20: 0.0418 - regularization_loss: 0.0000e+00 - loss_batch: 1.9781\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.3441 - recall_at_10: 0.6468 - mrr_at_10: 0.5229 - ndcg_at_10: 0.5526 - map_at_10: 0.5229 - precision_at_10: 0.0647 - regularization_loss: 0.0000e+00 - loss_batch: 3.3486\n", + "677/677 [==============================] - 100s 148ms/step - loss: 1.5735 - recall_at_20: 0.8706 - mrr_at_20: 0.7848 - ndcg_at_20: 0.8044 - map_at_20: 0.7848 - precision_at_20: 0.0435 - regularization_loss: 0.0000e+00 - loss_batch: 1.5798\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 2.8789 - recall_at_10: 0.7051 - mrr_at_10: 0.5871 - ndcg_at_10: 0.6155 - map_at_10: 0.5871 - precision_at_10: 0.0705 - regularization_loss: 0.0000e+00 - loss_batch: 2.8854\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.8279 - recall_at_10: 0.1584 - mrr_at_10: 0.0624 - ndcg_at_10: 0.0847 - map_at_10: 0.0624 - precision_at_10: 0.0158 - regularization_loss: 0.0000e+00 - loss_batch: 8.8674\n" + "677/677 [==============================] - 100s 147ms/step - loss: 1.3392 - recall_at_20: 0.8979 - mrr_at_20: 0.8098 - ndcg_at_20: 0.8299 - map_at_20: 0.8098 - precision_at_20: 0.0449 - regularization_loss: 0.0000e+00 - loss_batch: 1.3459\n", + "84/84 [==============================] - 8s 44ms/step - loss: 8.8929 - recall_at_20: 0.3268 - mrr_at_20: 0.1240 - ndcg_at_20: 0.1687 - map_at_20: 0.1240 - precision_at_20: 0.0163 - regularization_loss: 0.0000e+00 - loss_batch: 8.9432\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.827858924865723,\n", - " 'recall_at_10': 0.15591499209403992,\n", - " 'mrr_at_10': 0.06090494617819786,\n", - " 'ndcg_at_10': 0.08297329396009445,\n", - " 'map_at_10': 0.06090494617819786,\n", - " 'precision_at_10': 0.01559150218963623,\n", + "{'loss': 8.892891883850098,\n", + " 'recall_at_20': 0.3253714442253113,\n", + " 'mrr_at_20': 0.11890144646167755,\n", + " 'ndcg_at_20': 0.16443441808223724,\n", + " 'map_at_20': 0.11890144646167755,\n", + " 'precision_at_20': 0.016268571838736534,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.57563304901123}" + " 'loss_batch': 11.120135307312012}" ] }, "execution_count": 7, @@ -393,29 +413,29 @@ "output_type": "stream", "text": [ "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 8.9635 - recall_at_10: 0.0396 - mrr_at_10: 0.0146 - ndcg_at_10: 0.0204 - map_at_10: 0.0146 - precision_at_10: 0.0040 - regularization_loss: 0.0000e+00 - loss_batch: 8.9589\n", + "677/677 [==============================] - 105s 146ms/step - loss: 7.7130 - recall_at_20: 0.1178 - mrr_at_20: 0.0575 - ndcg_at_20: 0.0708 - map_at_20: 0.0575 - precision_at_20: 0.0059 - regularization_loss: 0.0000e+00 - loss_batch: 7.7064\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.2358 - recall_at_10: 0.2417 - mrr_at_10: 0.1391 - ndcg_at_10: 0.1633 - map_at_10: 0.1391 - precision_at_10: 0.0242 - regularization_loss: 0.0000e+00 - loss_batch: 6.2350\n", + "677/677 [==============================] - 100s 147ms/step - loss: 2.9553 - recall_at_20: 0.7234 - mrr_at_20: 0.6112 - ndcg_at_20: 0.6369 - map_at_20: 0.6112 - precision_at_20: 0.0362 - regularization_loss: 0.0000e+00 - loss_batch: 2.9622\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 4.4725 - recall_at_10: 0.4996 - mrr_at_10: 0.3675 - ndcg_at_10: 0.3991 - map_at_10: 0.3675 - precision_at_10: 0.0500 - regularization_loss: 0.0000e+00 - loss_batch: 4.4748\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.9463 - recall_at_20: 0.8379 - mrr_at_20: 0.7465 - ndcg_at_20: 0.7675 - map_at_20: 0.7465 - precision_at_20: 0.0419 - regularization_loss: 0.0000e+00 - loss_batch: 1.9539\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.6489 - recall_at_10: 0.6128 - mrr_at_10: 0.4839 - ndcg_at_10: 0.5148 - map_at_10: 0.4839 - precision_at_10: 0.0613 - regularization_loss: 0.0000e+00 - loss_batch: 3.6543\n", + "677/677 [==============================] - 100s 148ms/step - loss: 1.5422 - recall_at_20: 0.8713 - mrr_at_20: 0.7875 - ndcg_at_20: 0.8066 - map_at_20: 0.7875 - precision_at_20: 0.0436 - regularization_loss: 0.0000e+00 - loss_batch: 1.5490\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.2122 - recall_at_10: 0.6626 - mrr_at_10: 0.5353 - ndcg_at_10: 0.5659 - map_at_10: 0.5353 - precision_at_10: 0.0663 - regularization_loss: 0.0000e+00 - loss_batch: 3.2164\n", - "84/84 [==============================] - 7s 39ms/step - loss: 8.8321 - recall_at_10: 0.1434 - mrr_at_10: 0.0582 - ndcg_at_10: 0.0781 - map_at_10: 0.0582 - precision_at_10: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 8.8607\n" + "677/677 [==============================] - 100s 147ms/step - loss: 1.3310 - recall_at_20: 0.8989 - mrr_at_20: 0.8116 - ndcg_at_20: 0.8315 - map_at_20: 0.8116 - precision_at_20: 0.0449 - regularization_loss: 0.0000e+00 - loss_batch: 1.3377\n", + "84/84 [==============================] - 7s 43ms/step - loss: 8.8519 - recall_at_20: 0.3266 - mrr_at_20: 0.1215 - ndcg_at_20: 0.1670 - map_at_20: 0.1215 - precision_at_20: 0.0163 - regularization_loss: 0.0000e+00 - loss_batch: 8.8791\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.832069396972656,\n", - " 'recall_at_10': 0.1426556259393692,\n", - " 'mrr_at_10': 0.05639006569981575,\n", - " 'ndcg_at_10': 0.07650619745254517,\n", - " 'map_at_10': 0.05639006569981575,\n", - " 'precision_at_10': 0.01426556333899498,\n", + "{'loss': 8.851947784423828,\n", + " 'recall_at_20': 0.3281925916671753,\n", + " 'mrr_at_20': 0.11986491084098816,\n", + " 'ndcg_at_20': 0.16598893702030182,\n", + " 'map_at_20': 0.11986491084098816,\n", + " 'precision_at_20': 0.016409626230597496,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.100401878356934}" + " 'loss_batch': 10.054880142211914}" ] }, "execution_count": 8, @@ -476,29 +496,29 @@ "output_type": "stream", "text": [ "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 9.0739 - recall_at_10: 0.0361 - mrr_at_10: 0.0125 - ndcg_at_10: 0.0180 - map_at_10: 0.0125 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.0756\n", + "677/677 [==============================] - 105s 146ms/step - loss: 7.6534 - recall_at_20: 0.1225 - mrr_at_20: 0.0618 - ndcg_at_20: 0.0752 - map_at_20: 0.0618 - precision_at_20: 0.0061 - regularization_loss: 0.0000e+00 - loss_batch: 7.6446\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.4023 - recall_at_10: 0.2372 - mrr_at_10: 0.1349 - ndcg_at_10: 0.1591 - map_at_10: 0.1349 - precision_at_10: 0.0237 - regularization_loss: 0.0000e+00 - loss_batch: 6.4020\n", + "677/677 [==============================] - 100s 147ms/step - loss: 2.9167 - recall_at_20: 0.7304 - mrr_at_20: 0.6236 - ndcg_at_20: 0.6481 - map_at_20: 0.6236 - precision_at_20: 0.0365 - regularization_loss: 0.0000e+00 - loss_batch: 2.9215\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 4.7934 - recall_at_10: 0.4544 - mrr_at_10: 0.3194 - ndcg_at_10: 0.3516 - map_at_10: 0.3194 - precision_at_10: 0.0454 - regularization_loss: 0.0000e+00 - loss_batch: 4.7958\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.9719 - recall_at_20: 0.8351 - mrr_at_20: 0.7428 - ndcg_at_20: 0.7639 - map_at_20: 0.7428 - precision_at_20: 0.0418 - regularization_loss: 0.0000e+00 - loss_batch: 1.9820\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.7131 - recall_at_10: 0.5913 - mrr_at_10: 0.4595 - ndcg_at_10: 0.4911 - map_at_10: 0.4595 - precision_at_10: 0.0591 - regularization_loss: 0.0000e+00 - loss_batch: 3.7160\n", + "677/677 [==============================] - 100s 148ms/step - loss: 1.5807 - recall_at_20: 0.8694 - mrr_at_20: 0.7840 - ndcg_at_20: 0.8035 - map_at_20: 0.7840 - precision_at_20: 0.0435 - regularization_loss: 0.0000e+00 - loss_batch: 1.5877\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.0900 - recall_at_10: 0.6752 - mrr_at_10: 0.5537 - ndcg_at_10: 0.5829 - map_at_10: 0.5537 - precision_at_10: 0.0675 - regularization_loss: 0.0000e+00 - loss_batch: 3.0945\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.9225 - recall_at_10: 0.1426 - mrr_at_10: 0.0581 - ndcg_at_10: 0.0778 - map_at_10: 0.0581 - precision_at_10: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 8.9683\n" + "677/677 [==============================] - 100s 147ms/step - loss: 1.3320 - recall_at_20: 0.8979 - mrr_at_20: 0.8110 - ndcg_at_20: 0.8308 - map_at_20: 0.8110 - precision_at_20: 0.0449 - regularization_loss: 0.0000e+00 - loss_batch: 1.3375\n", + "84/84 [==============================] - 7s 44ms/step - loss: 8.9484 - recall_at_20: 0.3267 - mrr_at_20: 0.1212 - ndcg_at_20: 0.1669 - map_at_20: 0.1212 - precision_at_20: 0.0163 - regularization_loss: 0.0000e+00 - loss_batch: 8.9818\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.922541618347168,\n", - " 'recall_at_10': 0.14425428211688995,\n", - " 'mrr_at_10': 0.057682257145643234,\n", - " 'ndcg_at_10': 0.077837273478508,\n", - " 'map_at_10': 0.057682257145643234,\n", - " 'precision_at_10': 0.014425428584218025,\n", + "{'loss': 8.948363304138184,\n", + " 'recall_at_20': 0.3253714442253113,\n", + " 'mrr_at_20': 0.11741983145475388,\n", + " 'ndcg_at_20': 0.16352491080760956,\n", + " 'map_at_20': 0.11741983145475388,\n", + " 'precision_at_20': 0.016268571838736534,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.947548866271973}" + " 'loss_batch': 10.429142951965332}" ] }, "execution_count": 9, @@ -559,29 +579,29 @@ "output_type": "stream", "text": [ "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 9.0454 - recall_at_10: 0.0381 - mrr_at_10: 0.0139 - ndcg_at_10: 0.0195 - map_at_10: 0.0139 - precision_at_10: 0.0038 - regularization_loss: 0.0000e+00 - loss_batch: 9.0386\n", + "677/677 [==============================] - 105s 147ms/step - loss: 7.2975 - recall_at_20: 0.1426 - mrr_at_20: 0.0798 - ndcg_at_20: 0.0937 - map_at_20: 0.0798 - precision_at_20: 0.0071 - regularization_loss: 0.0000e+00 - loss_batch: 7.2845\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.3489 - recall_at_10: 0.2430 - mrr_at_10: 0.1385 - ndcg_at_10: 0.1632 - map_at_10: 0.1385 - precision_at_10: 0.0243 - regularization_loss: 0.0000e+00 - loss_batch: 6.3435\n", + "677/677 [==============================] - 100s 147ms/step - loss: 2.8422 - recall_at_20: 0.7408 - mrr_at_20: 0.6384 - ndcg_at_20: 0.6618 - map_at_20: 0.6384 - precision_at_20: 0.0370 - regularization_loss: 0.0000e+00 - loss_batch: 2.8481\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 4.7853 - recall_at_10: 0.4602 - mrr_at_10: 0.3227 - ndcg_at_10: 0.3555 - map_at_10: 0.3227 - precision_at_10: 0.0460 - regularization_loss: 0.0000e+00 - loss_batch: 4.7868\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.9844 - recall_at_20: 0.8348 - mrr_at_20: 0.7417 - ndcg_at_20: 0.7630 - map_at_20: 0.7417 - precision_at_20: 0.0417 - regularization_loss: 0.0000e+00 - loss_batch: 1.9915\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.6873 - recall_at_10: 0.6026 - mrr_at_10: 0.4710 - ndcg_at_10: 0.5025 - map_at_10: 0.4710 - precision_at_10: 0.0603 - regularization_loss: 0.0000e+00 - loss_batch: 3.6936\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.5814 - recall_at_20: 0.8700 - mrr_at_20: 0.7848 - ndcg_at_20: 0.8043 - map_at_20: 0.7848 - precision_at_20: 0.0435 - regularization_loss: 0.0000e+00 - loss_batch: 1.5882\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.0298 - recall_at_10: 0.6856 - mrr_at_10: 0.5650 - ndcg_at_10: 0.5940 - map_at_10: 0.5650 - precision_at_10: 0.0686 - regularization_loss: 0.0000e+00 - loss_batch: 3.0363\n", - "84/84 [==============================] - 8s 40ms/step - loss: 8.6711 - recall_at_10: 0.1505 - mrr_at_10: 0.0595 - ndcg_at_10: 0.0807 - map_at_10: 0.0595 - precision_at_10: 0.0151 - regularization_loss: 0.0000e+00 - loss_batch: 8.6999\n" + "677/677 [==============================] - 100s 147ms/step - loss: 1.3337 - recall_at_20: 0.8990 - mrr_at_20: 0.8101 - ndcg_at_20: 0.8304 - map_at_20: 0.8101 - precision_at_20: 0.0450 - regularization_loss: 0.0000e+00 - loss_batch: 1.3407\n", + "84/84 [==============================] - 8s 44ms/step - loss: 8.8888 - recall_at_20: 0.3240 - mrr_at_20: 0.1225 - ndcg_at_20: 0.1671 - map_at_20: 0.1225 - precision_at_20: 0.0162 - regularization_loss: 0.0000e+00 - loss_batch: 8.9304\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.671070098876953,\n", - " 'recall_at_10': 0.15074290335178375,\n", - " 'mrr_at_10': 0.05898994952440262,\n", - " 'ndcg_at_10': 0.08035662025213242,\n", - " 'map_at_10': 0.05898994952440262,\n", - " 'precision_at_10': 0.015074292197823524,\n", + "{'loss': 8.888774871826172,\n", + " 'recall_at_20': 0.3228324353694916,\n", + " 'mrr_at_20': 0.11914832890033722,\n", + " 'ndcg_at_20': 0.16426056623458862,\n", + " 'map_at_20': 0.11914832890033722,\n", + " 'precision_at_20': 0.01614162139594555,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.946744918823242}" + " 'loss_batch': 10.727699279785156}" ] }, "execution_count": 10, diff --git a/T4Rec_repro/train_runs/mlm_item_id.ipynb b/T4Rec_repro/train_runs/mlm_item_id.ipynb new file mode 100644 index 0000000000..508b4d6aa5 --- /dev/null +++ b/T4Rec_repro/train_runs/mlm_item_id.ipynb @@ -0,0 +1,808 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:26:22.114565: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-13 20:26:24.538242: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:24.538645: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:24.538803: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-13 20:26:24.965689: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-13 20:26:24.966631: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:24.966839: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:24.966994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:25.703328: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:25.703539: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:25.703699: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:25.703813: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-13 20:26:25.703876: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/test.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 5\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " '../',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + "\n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", + " input_block = mm.InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name('sess_pid_seq')\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "markdown", + "id": "78302207", + "metadata": {}, + "source": [ + "# Run 1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e7474131", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_5/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_5/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-14 06:41:41.374760: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 164s 230ms/step - loss: 9.4924 - recall_at_20: 0.0704 - mrr_at_20: 0.0174 - ndcg_at_20: 0.0288 - map_at_20: 0.0174 - precision_at_20: 0.0035 - regularization_loss: 0.0000e+00 - loss_batch: 9.4899\n", + "Epoch 2/5\n", + "677/677 [==============================] - 159s 234ms/step - loss: 8.0755 - recall_at_20: 0.1759 - mrr_at_20: 0.0480 - ndcg_at_20: 0.0758 - map_at_20: 0.0480 - precision_at_20: 0.0088 - regularization_loss: 0.0000e+00 - loss_batch: 8.0726\n", + "Epoch 3/5\n", + "677/677 [==============================] - 159s 235ms/step - loss: 7.3926 - recall_at_20: 0.2427 - mrr_at_20: 0.0671 - ndcg_at_20: 0.1053 - map_at_20: 0.0671 - precision_at_20: 0.0121 - regularization_loss: 0.0000e+00 - loss_batch: 7.3887\n", + "Epoch 4/5\n", + "677/677 [==============================] - 159s 235ms/step - loss: 6.9299 - recall_at_20: 0.2932 - mrr_at_20: 0.0821 - ndcg_at_20: 0.1281 - map_at_20: 0.0821 - precision_at_20: 0.0147 - regularization_loss: 0.0000e+00 - loss_batch: 6.9255\n", + "Epoch 5/5\n", + "677/677 [==============================] - 143s 211ms/step - loss: 6.5825 - recall_at_20: 0.3350 - mrr_at_20: 0.0951 - ndcg_at_20: 0.1476 - map_at_20: 0.0951 - precision_at_20: 0.0167 - regularization_loss: 0.0000e+00 - loss_batch: 6.5791\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-14 06:54:43.265476: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 49ms/step - loss: 8.3507 - recall_at_20: 0.2332 - mrr_at_20: 0.0720 - ndcg_at_20: 0.1070 - map_at_20: 0.0720 - precision_at_20: 0.0117 - regularization_loss: 0.0000e+00 - loss_batch: 8.3848\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.350717544555664,\n", + " 'recall_at_20': 0.23180365562438965,\n", + " 'mrr_at_20': 0.06943727284669876,\n", + " 'ndcg_at_20': 0.10483581572771072,\n", + " 'map_at_20': 0.06943727284669876,\n", + " 'precision_at_20': 0.011590182781219482,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.85844612121582}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "02b2e706", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"model_5\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " sequential_block_29 (Sequen multiple 176252800 \n", + " tialBlock) \n", + " \n", + " sequential_block_30 (Sequen multiple 86464 \n", + " tialBlock) \n", + " \n", + " sess_pid_seq/categorical_ou multiple 175110449 \n", + " tput (CategoricalOutput) \n", + " \n", + " model_context_5 (ModelConte multiple 0 \n", + " xt) \n", + " \n", + " prepare_features_11 (Prepar multiple 0 \n", + " eFeatures) \n", + " \n", + "=================================================================\n", + "Total params: 176,729,266\n", + "Trainable params: 176,729,265\n", + "Non-trainable params: 1\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model_transformer.summary()" + ] + }, + { + "cell_type": "markdown", + "id": "3513d28a", + "metadata": {}, + "source": [ + "# Run 2" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2e624551", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:31:46.363004: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 65s 84ms/step - loss: 9.4953 - recall_at_20: 0.0663 - mrr_at_20: 0.0167 - ndcg_at_20: 0.0274 - map_at_20: 0.0167 - precision_at_20: 0.0033 - regularization_loss: 0.0000e+00 - loss_batch: 9.4908\n", + "Epoch 2/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 8.1077 - recall_at_20: 0.1712 - mrr_at_20: 0.0474 - ndcg_at_20: 0.0744 - map_at_20: 0.0474 - precision_at_20: 0.0086 - regularization_loss: 0.0000e+00 - loss_batch: 8.1021\n", + "Epoch 3/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 7.3969 - recall_at_20: 0.2444 - mrr_at_20: 0.0671 - ndcg_at_20: 0.1057 - map_at_20: 0.0671 - precision_at_20: 0.0122 - regularization_loss: 0.0000e+00 - loss_batch: 7.3975\n", + "Epoch 4/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 6.9683 - recall_at_20: 0.2853 - mrr_at_20: 0.0794 - ndcg_at_20: 0.1243 - map_at_20: 0.0794 - precision_at_20: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 6.9657\n", + "Epoch 5/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.6522 - recall_at_20: 0.3234 - mrr_at_20: 0.0917 - ndcg_at_20: 0.1423 - map_at_20: 0.0917 - precision_at_20: 0.0162 - regularization_loss: 0.0000e+00 - loss_batch: 6.6482\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:36:37.576034: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 48ms/step - loss: 8.3509 - recall_at_20: 0.2300 - mrr_at_20: 0.0691 - ndcg_at_20: 0.1041 - map_at_20: 0.0691 - precision_at_20: 0.0115 - regularization_loss: 0.0000e+00 - loss_batch: 8.3545\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.350946426391602,\n", + " 'recall_at_20': 0.22926461696624756,\n", + " 'mrr_at_20': 0.06758848577737808,\n", + " 'ndcg_at_20': 0.10286629945039749,\n", + " 'map_at_20': 0.06758848577737808,\n", + " 'precision_at_20': 0.011463231407105923,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 8.509391784667969}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d42dea65", + "metadata": {}, + "source": [ + "# Run 3" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "97e7322c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:36:51.268625: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 65s 84ms/step - loss: 9.4386 - recall_at_20: 0.0722 - mrr_at_20: 0.0190 - ndcg_at_20: 0.0305 - map_at_20: 0.0190 - precision_at_20: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.4342\n", + "Epoch 2/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 8.0171 - recall_at_20: 0.1837 - mrr_at_20: 0.0502 - ndcg_at_20: 0.0792 - map_at_20: 0.0502 - precision_at_20: 0.0092 - regularization_loss: 0.0000e+00 - loss_batch: 8.0103\n", + "Epoch 3/5\n", + "677/677 [==============================] - 58s 85ms/step - loss: 7.3722 - recall_at_20: 0.2467 - mrr_at_20: 0.0691 - ndcg_at_20: 0.1078 - map_at_20: 0.0691 - precision_at_20: 0.0123 - regularization_loss: 0.0000e+00 - loss_batch: 7.3658\n", + "Epoch 4/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.9592 - recall_at_20: 0.2892 - mrr_at_20: 0.0807 - ndcg_at_20: 0.1262 - map_at_20: 0.0807 - precision_at_20: 0.0145 - regularization_loss: 0.0000e+00 - loss_batch: 6.9549\n", + "Epoch 5/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.6706 - recall_at_20: 0.3194 - mrr_at_20: 0.0899 - ndcg_at_20: 0.1401 - map_at_20: 0.0899 - precision_at_20: 0.0160 - regularization_loss: 0.0000e+00 - loss_batch: 6.6659\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:41:42.865959: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 49ms/step - loss: 8.3114 - recall_at_20: 0.2264 - mrr_at_20: 0.0687 - ndcg_at_20: 0.1030 - map_at_20: 0.0687 - precision_at_20: 0.0113 - regularization_loss: 0.0000e+00 - loss_batch: 8.3190\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.311356544494629,\n", + " 'recall_at_20': 0.22738386690616608,\n", + " 'mrr_at_20': 0.0663006603717804,\n", + " 'ndcg_at_20': 0.10139463096857071,\n", + " 'map_at_20': 0.0663006603717804,\n", + " 'precision_at_20': 0.011369192972779274,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 8.649133682250977}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "610da911", + "metadata": {}, + "source": [ + "# Run 4" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9e0f0891", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:41:56.776497: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 65s 84ms/step - loss: 9.4786 - recall_at_20: 0.0673 - mrr_at_20: 0.0176 - ndcg_at_20: 0.0283 - map_at_20: 0.0176 - precision_at_20: 0.0034 - regularization_loss: 0.0000e+00 - loss_batch: 9.4794\n", + "Epoch 2/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 8.1173 - recall_at_20: 0.1692 - mrr_at_20: 0.0454 - ndcg_at_20: 0.0723 - map_at_20: 0.0454 - precision_at_20: 0.0085 - regularization_loss: 0.0000e+00 - loss_batch: 8.1128\n", + "Epoch 3/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 7.4296 - recall_at_20: 0.2409 - mrr_at_20: 0.0664 - ndcg_at_20: 0.1044 - map_at_20: 0.0664 - precision_at_20: 0.0120 - regularization_loss: 0.0000e+00 - loss_batch: 7.4268\n", + "Epoch 4/5\n", + "677/677 [==============================] - 58s 85ms/step - loss: 6.9533 - recall_at_20: 0.2861 - mrr_at_20: 0.0778 - ndcg_at_20: 0.1232 - map_at_20: 0.0778 - precision_at_20: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 6.9502\n", + "Epoch 5/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.6322 - recall_at_20: 0.3285 - mrr_at_20: 0.0931 - ndcg_at_20: 0.1445 - map_at_20: 0.0931 - precision_at_20: 0.0164 - regularization_loss: 0.0000e+00 - loss_batch: 6.6306\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:46:48.752036: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 49ms/step - loss: 8.3535 - recall_at_20: 0.2280 - mrr_at_20: 0.0700 - ndcg_at_20: 0.1046 - map_at_20: 0.0700 - precision_at_20: 0.0114 - regularization_loss: 0.0000e+00 - loss_batch: 8.3763\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.353541374206543,\n", + " 'recall_at_20': 0.23067519068717957,\n", + " 'mrr_at_20': 0.06726308912038803,\n", + " 'ndcg_at_20': 0.10282379388809204,\n", + " 'map_at_20': 0.06726308912038803,\n", + " 'precision_at_20': 0.011533760465681553,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.360955238342285}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6cffc60d", + "metadata": {}, + "source": [ + "# Run 5" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6981ff6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:47:02.588234: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 65s 84ms/step - loss: 9.4909 - recall_at_20: 0.0707 - mrr_at_20: 0.0184 - ndcg_at_20: 0.0297 - map_at_20: 0.0184 - precision_at_20: 0.0035 - regularization_loss: 0.0000e+00 - loss_batch: 9.4882\n", + "Epoch 2/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 8.1387 - recall_at_20: 0.1653 - mrr_at_20: 0.0453 - ndcg_at_20: 0.0713 - map_at_20: 0.0453 - precision_at_20: 0.0083 - regularization_loss: 0.0000e+00 - loss_batch: 8.1347\n", + "Epoch 3/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 7.4398 - recall_at_20: 0.2387 - mrr_at_20: 0.0662 - ndcg_at_20: 0.1038 - map_at_20: 0.0662 - precision_at_20: 0.0119 - regularization_loss: 0.0000e+00 - loss_batch: 7.4371\n", + "Epoch 4/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.9831 - recall_at_20: 0.2878 - mrr_at_20: 0.0810 - ndcg_at_20: 0.1261 - map_at_20: 0.0810 - precision_at_20: 0.0144 - regularization_loss: 0.0000e+00 - loss_batch: 6.9787\n", + "Epoch 5/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.6535 - recall_at_20: 0.3246 - mrr_at_20: 0.0905 - ndcg_at_20: 0.1416 - map_at_20: 0.0905 - precision_at_20: 0.0162 - regularization_loss: 0.0000e+00 - loss_batch: 6.6479\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:51:54.265885: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 49ms/step - loss: 8.3766 - recall_at_20: 0.2347 - mrr_at_20: 0.0690 - ndcg_at_20: 0.1050 - map_at_20: 0.0690 - precision_at_20: 0.0117 - regularization_loss: 0.0000e+00 - loss_batch: 8.3785\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.376553535461426,\n", + " 'recall_at_20': 0.23227383196353912,\n", + " 'mrr_at_20': 0.0675581842660904,\n", + " 'ndcg_at_20': 0.10343420505523682,\n", + " 'map_at_20': 0.0675581842660904,\n", + " 'precision_at_20': 0.011613693088293076,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 8.46284294128418}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d195f16d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/T4Rec_repro/train_runs/mlm_item_id_min.ipynb b/T4Rec_repro/train_runs/mlm_item_id_min.ipynb deleted file mode 100644 index df90cc786b..0000000000 --- a/T4Rec_repro/train_runs/mlm_item_id_min.ipynb +++ /dev/null @@ -1,640 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "ceb3ae93", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-03-10 13:57:07.721314: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-03-10 13:57:10.129984: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:10.130437: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:10.130617: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2023-03-10 13:57:10.581209: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-03-10 13:57:10.582030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:10.582283: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:10.582439: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:11.330242: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:11.330454: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:11.330615: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:11.330728: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-03-10 13:57:11.330790: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" - ] - } - ], - "source": [ - "import os\n", - "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", - "import gc\n", - "import numpy as np\n", - "\n", - "import tensorflow as tf\n", - "\n", - "from merlin.schema.tags import Tags\n", - "from merlin.io.dataset import Dataset\n", - "import merlin.models.tf as mm" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "11647dd3", - "metadata": {}, - "outputs": [], - "source": [ - "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", - "valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "4ab4e0fb", - "metadata": {}, - "outputs": [], - "source": [ - "target = 'sess_pid_seq'" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "8d9903e6", - "metadata": {}, - "outputs": [], - "source": [ - "# a couple of starter hyperparams\n", - "\n", - "d_model = 192\n", - "n_layer = 3\n", - "n_head = 16\n", - "batch_size = 128\n", - "learning_rate = 0.0006667377132554976\n", - "n_epoch = 5" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a6ade14a", - "metadata": {}, - "outputs": [], - "source": [ - "def get_model():\n", - " mlp_block = mm.MLPBlock(\n", - " [128,d_model],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", - "\n", - " from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", - "\n", - " schema = TensorflowMetadata.from_proto_text_file(\n", - " '../',\n", - " file_name='rees46_schema_modified.pbtxt'\n", - " ).to_merlin_schema()\n", - "\n", - " train.schema = schema\n", - "\n", - " input_block = mm.InputBlockV2(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " embeddings=mm.Embeddings(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " sequence_combiner=None,\n", - " dim=d_model\n", - " ),\n", - " # pre=mm.StochasticSwapNoise()\n", - " )\n", - "\n", - " train.schema = train.schema.select_by_name('sess_pid_seq')\n", - "\n", - " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", - "\n", - " dense_block = mm.SequentialBlock(\n", - " input_block,\n", - " mlp_block,\n", - " xlnet_block\n", - " )\n", - "\n", - " mlp_block2 = mm.MLPBlock(\n", - " [128,d_model],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", - "\n", - " prediction_task = mm.CategoricalOutput(\n", - " to_call=input_block[\"categorical\"][target],\n", - " )\n", - "\n", - " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", - "\n", - " optimizer = tf.keras.optimizers.Adam(\n", - " learning_rate=learning_rate,\n", - " )\n", - "\n", - " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", - " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10])\n", - " )\n", - " return model_transformer, xlnet_block" - ] - }, - { - "cell_type": "markdown", - "id": "78302207", - "metadata": {}, - "source": [ - "# Run 1" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "e7474131", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-03-10 13:57:17.631317: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 82s 110ms/step - loss: 8.8265 - recall_at_10: 0.0432 - mrr_at_10: 0.0166 - ndcg_at_10: 0.0228 - map_at_10: 0.0166 - precision_at_10: 0.0043 - regularization_loss: 0.0000e+00 - loss_batch: 8.8191\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 5.8014 - recall_at_10: 0.3091 - mrr_at_10: 0.1936 - ndcg_at_10: 0.2210 - map_at_10: 0.1936 - precision_at_10: 0.0309 - regularization_loss: 0.0000e+00 - loss_batch: 5.8019\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 4.1718 - recall_at_10: 0.5397 - mrr_at_10: 0.4080 - ndcg_at_10: 0.4394 - map_at_10: 0.4080 - precision_at_10: 0.0540 - regularization_loss: 0.0000e+00 - loss_batch: 4.1734\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.2806 - recall_at_10: 0.6585 - mrr_at_10: 0.5362 - ndcg_at_10: 0.5656 - map_at_10: 0.5362 - precision_at_10: 0.0658 - regularization_loss: 0.0000e+00 - loss_batch: 3.2849\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 2.8188 - recall_at_10: 0.7125 - mrr_at_10: 0.6007 - ndcg_at_10: 0.6276 - map_at_10: 0.6007 - precision_at_10: 0.0712 - regularization_loss: 0.0000e+00 - loss_batch: 2.8246\n", - "84/84 [==============================] - 7s 39ms/step - loss: 8.8107 - recall_at_10: 0.1511 - mrr_at_10: 0.0623 - ndcg_at_10: 0.0829 - map_at_10: 0.0623 - precision_at_10: 0.0151 - regularization_loss: 0.0000e+00 - loss_batch: 8.8298\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.810694694519043,\n", - " 'recall_at_10': 0.15318788588047028,\n", - " 'mrr_at_10': 0.06131112948060036,\n", - " 'ndcg_at_10': 0.08268804848194122,\n", - " 'map_at_10': 0.06131112948060036,\n", - " 'precision_at_10': 0.015318789519369602,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.6568603515625}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "3513d28a", - "metadata": {}, - "source": [ - "# Run 2" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "2e624551", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 9.1281 - recall_at_10: 0.0359 - mrr_at_10: 0.0128 - ndcg_at_10: 0.0181 - map_at_10: 0.0128 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.1243\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.7038 - recall_at_10: 0.1907 - mrr_at_10: 0.1006 - ndcg_at_10: 0.1218 - map_at_10: 0.1006 - precision_at_10: 0.0191 - regularization_loss: 0.0000e+00 - loss_batch: 6.6971\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 4.9471 - recall_at_10: 0.4404 - mrr_at_10: 0.3077 - ndcg_at_10: 0.3393 - map_at_10: 0.3077 - precision_at_10: 0.0440 - regularization_loss: 0.0000e+00 - loss_batch: 4.9478\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.9842 - recall_at_10: 0.5607 - mrr_at_10: 0.4197 - ndcg_at_10: 0.4534 - map_at_10: 0.4197 - precision_at_10: 0.0561 - regularization_loss: 0.0000e+00 - loss_batch: 3.9878\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.3262 - recall_at_10: 0.6442 - mrr_at_10: 0.5172 - ndcg_at_10: 0.5477 - map_at_10: 0.5172 - precision_at_10: 0.0644 - regularization_loss: 0.0000e+00 - loss_batch: 3.3307\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.9716 - recall_at_10: 0.1277 - mrr_at_10: 0.0513 - ndcg_at_10: 0.0692 - map_at_10: 0.0513 - precision_at_10: 0.0128 - regularization_loss: 0.0000e+00 - loss_batch: 8.9960\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.971626281738281,\n", - " 'recall_at_10': 0.12817378342151642,\n", - " 'mrr_at_10': 0.05082216113805771,\n", - " 'ndcg_at_10': 0.06883765012025833,\n", - " 'map_at_10': 0.05082216113805771,\n", - " 'precision_at_10': 0.012817380018532276,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.049013137817383}" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "d42dea65", - "metadata": {}, - "source": [ - "# Run 3" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "97e7322c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 8.9307 - recall_at_10: 0.0396 - mrr_at_10: 0.0142 - ndcg_at_10: 0.0201 - map_at_10: 0.0142 - precision_at_10: 0.0040 - regularization_loss: 0.0000e+00 - loss_batch: 8.9265\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 5.9376 - recall_at_10: 0.2951 - mrr_at_10: 0.1842 - ndcg_at_10: 0.2105 - map_at_10: 0.1842 - precision_at_10: 0.0295 - regularization_loss: 0.0000e+00 - loss_batch: 5.9350\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 4.3616 - recall_at_10: 0.5184 - mrr_at_10: 0.3844 - ndcg_at_10: 0.4164 - map_at_10: 0.3844 - precision_at_10: 0.0518 - regularization_loss: 0.0000e+00 - loss_batch: 4.3657\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.4916 - recall_at_10: 0.6319 - mrr_at_10: 0.5057 - ndcg_at_10: 0.5359 - map_at_10: 0.5057 - precision_at_10: 0.0632 - regularization_loss: 0.0000e+00 - loss_batch: 3.4969\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.0021 - recall_at_10: 0.6889 - mrr_at_10: 0.5684 - ndcg_at_10: 0.5973 - map_at_10: 0.5684 - precision_at_10: 0.0689 - regularization_loss: 0.0000e+00 - loss_batch: 3.0072\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.7983 - recall_at_10: 0.1534 - mrr_at_10: 0.0599 - ndcg_at_10: 0.0816 - map_at_10: 0.0599 - precision_at_10: 0.0153 - regularization_loss: 0.0000e+00 - loss_batch: 8.8378\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.798320770263672,\n", - " 'recall_at_10': 0.15647922456264496,\n", - " 'mrr_at_10': 0.05985381081700325,\n", - " 'ndcg_at_10': 0.08228185027837753,\n", - " 'map_at_10': 0.05985381081700325,\n", - " 'precision_at_10': 0.015647921711206436,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.545936584472656}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "610da911", - "metadata": {}, - "source": [ - "# Run 4" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "9e0f0891", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 8.8791 - recall_at_10: 0.0414 - mrr_at_10: 0.0155 - ndcg_at_10: 0.0215 - map_at_10: 0.0155 - precision_at_10: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.8746\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 5.5817 - recall_at_10: 0.3289 - mrr_at_10: 0.2127 - ndcg_at_10: 0.2403 - map_at_10: 0.2127 - precision_at_10: 0.0329 - regularization_loss: 0.0000e+00 - loss_batch: 5.5795\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.8784 - recall_at_10: 0.5761 - mrr_at_10: 0.4489 - ndcg_at_10: 0.4793 - map_at_10: 0.4489 - precision_at_10: 0.0576 - regularization_loss: 0.0000e+00 - loss_batch: 3.8833\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.0679 - recall_at_10: 0.6797 - mrr_at_10: 0.5656 - ndcg_at_10: 0.5930 - map_at_10: 0.5656 - precision_at_10: 0.0680 - regularization_loss: 0.0000e+00 - loss_batch: 3.0749\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 2.5693 - recall_at_10: 0.7397 - mrr_at_10: 0.6350 - ndcg_at_10: 0.6602 - map_at_10: 0.6350 - precision_at_10: 0.0740 - regularization_loss: 0.0000e+00 - loss_batch: 2.5767\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.6399 - recall_at_10: 0.1581 - mrr_at_10: 0.0621 - ndcg_at_10: 0.0844 - map_at_10: 0.0621 - precision_at_10: 0.0158 - regularization_loss: 0.0000e+00 - loss_batch: 8.6637\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.63992691040039,\n", - " 'recall_at_10': 0.1588301658630371,\n", - " 'mrr_at_10': 0.06323756277561188,\n", - " 'ndcg_at_10': 0.0855293795466423,\n", - " 'map_at_10': 0.06323756277561188,\n", - " 'precision_at_10': 0.01588302105665207,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.691500663757324}" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6cffc60d", - "metadata": {}, - "source": [ - "# Run 5" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "6981ff6e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 8.9605 - recall_at_10: 0.0390 - mrr_at_10: 0.0141 - ndcg_at_10: 0.0199 - map_at_10: 0.0141 - precision_at_10: 0.0039 - regularization_loss: 0.0000e+00 - loss_batch: 8.9571\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.1194 - recall_at_10: 0.2618 - mrr_at_10: 0.1575 - ndcg_at_10: 0.1821 - map_at_10: 0.1575 - precision_at_10: 0.0262 - regularization_loss: 0.0000e+00 - loss_batch: 6.1199\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 4.4762 - recall_at_10: 0.5000 - mrr_at_10: 0.3647 - ndcg_at_10: 0.3970 - map_at_10: 0.3647 - precision_at_10: 0.0500 - regularization_loss: 0.0000e+00 - loss_batch: 4.4783\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.6222 - recall_at_10: 0.6166 - mrr_at_10: 0.4884 - ndcg_at_10: 0.5191 - map_at_10: 0.4884 - precision_at_10: 0.0617 - regularization_loss: 0.0000e+00 - loss_batch: 3.6248\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.1115 - recall_at_10: 0.6744 - mrr_at_10: 0.5505 - ndcg_at_10: 0.5803 - map_at_10: 0.5505 - precision_at_10: 0.0674 - regularization_loss: 0.0000e+00 - loss_batch: 3.1192\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.8991 - recall_at_10: 0.1457 - mrr_at_10: 0.0572 - ndcg_at_10: 0.0776 - map_at_10: 0.0572 - precision_at_10: 0.0146 - regularization_loss: 0.0000e+00 - loss_batch: 8.9238\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.899141311645508,\n", - " 'recall_at_10': 0.14763964712619781,\n", - " 'mrr_at_10': 0.05743885040283203,\n", - " 'ndcg_at_10': 0.07836496829986572,\n", - " 'map_at_10': 0.05743885040283203,\n", - " 'precision_at_10': 0.014763964340090752,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.991716384887695}" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d195f16d", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From f17303f91c76b29f15bbb25240166797e7391e04 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Wed, 29 Mar 2023 10:37:43 +1000 Subject: [PATCH 08/15] update --- T4Rec_repro/reproducing_T4Rec_results.ipynb | 769 ++++--- .../reproducing_T4Rec_results_v1.ipynb | 53 +- ...rain_and_save_model_for_benchmarking.ipynb | 2023 +++++++++++------ 3 files changed, 1828 insertions(+), 1017 deletions(-) diff --git a/T4Rec_repro/reproducing_T4Rec_results.ipynb b/T4Rec_repro/reproducing_T4Rec_results.ipynb index 8788b157d2..68da1ce2d2 100644 --- a/T4Rec_repro/reproducing_T4Rec_results.ipynb +++ b/T4Rec_repro/reproducing_T4Rec_results.ipynb @@ -232,17 +232,18 @@ "From https://github.com/NVIDIA-Merlin/Models\n", " * [new branch] ci/horovod -> origin/ci/horovod\n", " * [new branch] codespell_fix -> origin/codespell_fix\n", - " 16fb4149..b1c10317 fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " 16fb4149..c9d3baf4 fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", - " 95462360..a69adf75 gh-pages -> origin/gh-pages\n", + " 95462360..28fb60ad gh-pages -> origin/gh-pages\n", " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", - " 835ad186..e7fe759c main -> origin/main\n", + " 835ad186..a5ac5668 main -> origin/main\n", " * [new branch] mtl_example -> origin/mtl_example\n", " cb431a8a..b90e9a1b release-22.12 -> origin/release-22.12\n", " * [new branch] release-23.02 -> origin/release-23.02\n", " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", + " * [new branch] tf/dlrm_dropout_fix -> origin/tf/dlrm_dropout_fix\n", " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", @@ -253,7 +254,7 @@ " + 0a65d603...9f53e8ff update_07 -> origin/update_07 (forced update)\n", " * [new tag] v23.02.00 -> v23.02.00\n", "Previous HEAD position was cb431a8a Fix the serialization of `SequenceSummary` block (#927)\n", - "HEAD is now at a92bdc24 adjust sample_weights to targets shape\n" + "HEAD is now at a86201ee add masking support to SequencePredictRandom transform\n" ] }, { @@ -267,51 +268,51 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+6.ga92bdc24) (0.10.0)\n", - "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+6.ga92bdc24) (0.0.4)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (3.19.6)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (22.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.2.5)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (8.0.0)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.56.4)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.64.1)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.12.0)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.5.0)\n", - "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.5)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7.1)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.4.3)\n", - "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow>=5.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.22.4)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.2.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.12.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (5.2.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.3.0)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.8.2)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.4)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.2.0)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (8.1.3)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (5.9.4)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (3.1.2)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.7.0)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.26.13)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.1)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.4.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0.4)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.1.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (3.11.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.14.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (2.1.1)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+6.ga92bdc24) (4.0.0)\n" + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.10.0)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.0.4)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", + "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.5)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.0.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.64.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.56.4)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.12.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (22.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.5)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.20.3)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.5.0)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.1.2)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.7.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.4.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.26.13)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.1.3)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.12.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.9.4)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.1)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.4)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.8.2)\n", + "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.22.4)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.2.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.4.3)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.11.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.1.0)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.1)\n" ] }, { @@ -321,15 +322,15 @@ "Building wheels for collected packages: merlin-models\n", " Building wheel for merlin-models (PEP 517): started\n", " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-models: filename=merlin_models-23.2.0+6.ga92bdc24-py3-none-any.whl size=374609 sha256=a5077403f59b4f6c38be0d098b696c96fde6e874ac02e12d04bba00c7dcb9ab2\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-rxmtwiq_/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + " Created wheel for merlin-models: filename=merlin_models-23.2.0+7.ga86201ee-py3-none-any.whl size=374647 sha256=87b9a3e64295c03a7c839101199cff72fbe4b9793525bfc6e01a305d87bdeb70\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-jl8gwtl2/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", "Successfully built merlin-models\n", "Installing collected packages: merlin-models\n", " Attempting uninstall: merlin-models\n", " Found existing installation: merlin-models 0.11.0\n", " Uninstalling merlin-models-0.11.0:\n", " Successfully uninstalled merlin-models-0.11.0\n", - "Successfully installed merlin-models-23.2.0+6.ga92bdc24\n" + "Successfully installed merlin-models-23.2.0+7.ga86201ee\n" ] }, { @@ -353,14 +354,14 @@ "text": [ "From https://github.com/NVIDIA-Merlin/core\n", " * branch main -> FETCH_HEAD\n", - " cd96ca5f..aad0c874 main -> origin/main\n" + " cd96ca5f..2d60d237 main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Updating cd96ca5f..aad0c874\n", + "Updating cd96ca5f..2d60d237\n", "Fast-forward\n", " .github/release-drafter.yml | 44 +--\n", " .github/workflows/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", @@ -416,7 +417,8 @@ " merlin/io/parquet.py | 8 -\n", " merlin/io/writer.py | 1 -\n", " merlin/schema/io/tensorflow_metadata.py | 86 +++---\n", - " merlin/schema/schema.py | 298 +++++++++++---------\n", + " merlin/schema/schema.py | 312 ++++++++++++---------\n", + " merlin/schema/tags.py | 1 +\n", " merlin/table/__init__.py | 24 ++\n", " merlin/table/conversions.py | 135 +++++++++\n", " merlin/table/cupy_column.py | 92 ++++++\n", @@ -435,14 +437,14 @@ " tests/unit/dtypes/test_shape.py | 222 +++++++++++++++\n", " tests/unit/io/test_io.py | 27 +-\n", " tests/unit/schema/test_column_schemas.py | 142 ++++++----\n", - " tests/unit/schema/test_schema.py | 7 +-\n", + " tests/unit/schema/test_schema.py | 22 +-\n", " tests/unit/schema/test_schema_io.py | 27 +-\n", " tests/unit/table/test_convert_column.py | 75 +++++\n", " tests/unit/table/test_tensor_column.py | 186 ++++++++++++\n", - " tests/unit/table/test_tensor_table.py | 311 +++++++++++++++++++++\n", + " tests/unit/table/test_tensor_table.py | 311 ++++++++++++++++++++\n", " tests/unit/utils/test_utils.py | 3 -\n", " tox.ini | 4 +\n", - " 80 files changed, 4413 insertions(+), 672 deletions(-)\n", + " 81 files changed, 4441 insertions(+), 674 deletions(-)\n", " create mode 100644 .github/workflows/cpu-packages.yml\n", " create mode 100644 .prettierignore\n", " create mode 100644 merlin/dag/utils.py\n", @@ -483,68 +485,68 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (4.64.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (8.0.0)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.5.0)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (2022.7.1)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (3.19.6)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.2.5)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (11.4.1)\n" + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (3.20.3)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (1.12.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (2022.5.0)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (1.3.5)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (1.2.5)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (8.0.0)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (2022.7.1)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.12.0)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (1.3.5)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (0.56.4)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+56.gaad0c874) (22.0)\n", - "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow>=5.0.0->merlin-core==0.9.0+56.gaad0c874) (1.22.4)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.3.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (6.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.2.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (0.12.0)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (5.9.4)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.4)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.4.0)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (3.1.2)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (6.1)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.26.13)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.7.0)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (8.1.3)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.2.0)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (0.4.3)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.3.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (2022.7)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (5.2.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (45.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (0.39.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (2.1.1)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core==0.9.0+56.gaad0c874) (1.0.1)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (6.0.4)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+56.gaad0c874) (1.14.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+56.gaad0c874) (3.11.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+56.gaad0c874) (4.0.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (22.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (11.4.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (0.56.4)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (4.64.1)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (2022.7.1)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.57.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (2.8.2)\n", + "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.22.4)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (2022.7)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (0.4.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (3.1.2)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.2.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (6.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (5.9.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.0.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (0.12.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.26.13)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.7.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (6.1)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.2.0)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (8.1.3)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.0.4)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.4.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (0.39.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (45.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.3.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.14.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (4.1.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.0.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (3.11.0)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (6.0.1)\n", "Building wheels for collected packages: merlin-core\n", " Building wheel for merlin-core (PEP 517): started\n", " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-core: filename=merlin_core-0.9.0+56.gaad0c874-py3-none-any.whl size=152601 sha256=dcee4602a77df64eb864c60e8cb155c6b8a165a9059ee943770248cef063bf37\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-hkriw5ee/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + " Created wheel for merlin-core: filename=merlin_core-0.9.0+60.g2d60d237-py3-none-any.whl size=152708 sha256=ab6c9a4d283317c7ed47d7747512d0e18bf6fa1737cca54ab89c1d48e2c96db9\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-7ssc07tr/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", "Successfully built merlin-core\n", "Installing collected packages: merlin-core\n", " Attempting uninstall: merlin-core\n", " Found existing installation: merlin-core 0.10.0\n", " Uninstalling merlin-core-0.10.0:\n", " Successfully uninstalled merlin-core-0.10.0\n", - "Successfully installed merlin-core-0.9.0+56.gaad0c874\n" + "Successfully installed merlin-core-0.9.0+60.g2d60d237\n" ] }, { @@ -657,63 +659,63 @@ " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (1.9.3)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.9.0+60.g2d60d237)\n", "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.0.4)\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.9.0+56.gaad0c874)\n", "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+42.g9b186ee9) (1.22.4)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.12.0)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.0.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (11.4.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.20.3)\n", "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.5)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.64.1)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (22.0)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (11.4.1)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.56.4)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.19.6)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.5)\n", "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.5.0)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.5)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.0.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.64.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.56.4)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.57.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.4.3)\n", "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.12.0)\n" + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.4.3)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.8.2)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (45.2.0)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.4)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.9.4)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.7.0)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.4.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.1)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.1.3)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.1.2)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.1)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.4.0)\n", "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.26.13)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.8.2)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.4)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.7.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.9.4)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.12.0)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.1.3)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.39.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (45.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.4)\n", "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.1.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.11.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.1.1)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.14.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.11.0)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.0.0)\n", "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.1)\n", "Building wheels for collected packages: nvtabular\n", " Building wheel for nvtabular (PEP 517): started\n", " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", - " Created wheel for nvtabular: filename=nvtabular-1.6.0+42.g9b186ee9-cp38-cp38-linux_x86_64.whl size=258506 sha256=4c4a37dcdcff0046a7edf1346f3664903218a14a689ef96388354d679c1a3da3\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-c7pdm8dg/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + " Created wheel for nvtabular: filename=nvtabular-1.6.0+42.g9b186ee9-cp38-cp38-linux_x86_64.whl size=258506 sha256=7fa5d632d0c409afe6a05751cc52198f7212ca34522a5f921f5856426f211255\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-3v8i8892/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", "Successfully built nvtabular\n", "Installing collected packages: nvtabular\n", " Attempting uninstall: nvtabular\n", @@ -834,71 +836,71 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (0.9.0+56.gaad0c874)\n", - "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.28.1)\n", "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (0.9.0+60.g2d60d237)\n", "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (1.6.0+42.g9b186ee9)\n", + "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.28.1)\n", "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (22.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.19.6)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.9.3)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.22.4)\n", "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.5)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (22.0)\n", "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (11.4.1)\n", "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.5)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.5.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.56.4)\n", "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.0.0)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.64.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.20.3)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.12.0)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.56.4)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.0.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.5.0)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+61.g329cba4) (0.0.4)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (1.26.13)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2019.11.28)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.8)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.9.3)\n" + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.22.4)\n", - "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+61.g329cba4) (0.0.4)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.8.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.8)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2019.11.28)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.8.2)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0)\n", "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.12.0)\n", "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.12.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.39.1)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.1.3)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.1)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.7.0)\n", "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.0)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.9.4)\n", "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.1.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.1.2)\n", "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.4)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.7.0)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.9.4)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.1.2)\n", "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.57.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (45.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.39.1)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.0)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.14.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.4)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.1.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.1)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.11.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.4)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.1)\n", "Building wheels for collected packages: merlin-systems\n", " Building wheel for merlin-systems (PEP 517): started\n", " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+61.g329cba4-py3-none-any.whl size=99480 sha256=7400ab8e12273b15c96f94806974ef168f6bbc63e5a02a9fccf0905f0ea10f43\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-_zkkhk4v/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+61.g329cba4-py3-none-any.whl size=99480 sha256=decd8362d6a784bc41ada2c225ba4d7965da1f4de845871c0baa332cf9223cad\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-sv7sdyf1/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", "Successfully built merlin-systems\n", "Installing collected packages: merlin-systems\n", " Attempting uninstall: merlin-systems\n", @@ -929,14 +931,14 @@ "text": [ "From https://github.com/NVIDIA-Merlin/dataloader\n", " * branch main -> FETCH_HEAD\n", - " 5b3fe46..dbf8816 main -> origin/main\n" + " 5b3fe46..ce2215d main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Updating 5b3fe46..dbf8816\n", + "Updating 5b3fe46..ce2215d\n", "Fast-forward\n", " .github/workflows/cpu-ci.yml | 81 -----\n", " .github/workflows/cpu-packages.yml | 125 +++++++\n", @@ -949,15 +951,18 @@ " .../02-Multi-GPU-Tensorflow-with-Horovod.ipynb | 371 +++++++++++++++++++++\n", " merlin/dataloader/jax.py | 3 +\n", " merlin/dataloader/loader_base.py | 221 ++++--------\n", + " merlin/dataloader/ops/embeddings/embedding_op.py | 4 +-\n", " .../ops/embeddings/torch_embedding_op.py | 4 +-\n", " merlin/dataloader/tensorflow.py | 9 +-\n", " merlin/dataloader/torch.py | 49 ++-\n", " merlin/dataloader/utils/tf/tf_trainer.py | 2 +-\n", " .../test_multi_GPU_with_horovod_and_tensorflow.py | 28 ++\n", " tests/unit/dataloader/test_tf_dataloader.py | 20 +-\n", + " tests/unit/dataloader/test_tf_embeddings.py | 24 +-\n", " tests/unit/dataloader/test_torch_dataloader.py | 38 +++\n", + " tests/unit/dataloader/test_torch_embeddings.py | 12 +-\n", " tox.ini | 1 +\n", - " 19 files changed, 781 insertions(+), 278 deletions(-)\n", + " 22 files changed, 801 insertions(+), 298 deletions(-)\n", " create mode 100644 .github/workflows/cpu-packages.yml\n", " create mode 100644 ci/pr.gpu.Jenkinsfile\n", " create mode 100644 examples/02-Multi-GPU-Tensorflow-with-Horovod.ipynb\n", @@ -969,90 +974,80 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-core>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+41.gdbf8816) (0.9.0+56.gaad0c874)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (11.4.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.56.4)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.12.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.19.6)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.5)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.64.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.0.0)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (22.0)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.5.0)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.5)\n", - "Requirement already satisfied: numpy<1.24,>=1.18 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.22.4)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.2.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.4.3)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.12.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.1.3)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.9.4)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.1)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.0)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.4)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.26.13)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.7.0)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.4.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.1.2)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7)\n" + "Requirement already satisfied: merlin-core>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+43.gce2215d) (0.9.0+60.g2d60d237)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.7.1)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.5.0)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.7.1)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.12.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (11.4.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.3.5)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.56.4)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (8.0.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (3.20.3)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (4.64.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (22.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.2.5)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (5.9.4)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.1)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.0.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.0.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.7.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (3.1.2)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (8.1.3)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.4.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.2.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.26.13)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.12.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.3.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.57.0)\n", + "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.22.4)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.7)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.39.1)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.11.0)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.4)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.1.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.14.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.0.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.2.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.0.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (3.11.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (4.1.0)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.0.1)\n", "Building wheels for collected packages: merlin-dataloader\n", " Building wheel for merlin-dataloader (PEP 517): started\n", " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+41.gdbf8816-py3-none-any.whl size=40852 sha256=25522e9c2124926ac2063828d36ae15009e18cb85666b6ebf5c29cdd24213231\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-vvfapbst/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+43.gce2215d-py3-none-any.whl size=40867 sha256=50cce97c1e4f2bd217079e464c511215d0b1d243b2aedb49afff090eb31e10ae\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-amzw6h6k/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", "Successfully built merlin-dataloader\n", "Installing collected packages: merlin-dataloader\n", " Attempting uninstall: merlin-dataloader\n", " Found existing installation: merlin-dataloader 0.0.4\n", " Uninstalling merlin-dataloader-0.0.4:\n", " Successfully uninstalled merlin-dataloader-0.0.4\n", - "Successfully installed merlin-dataloader-0.0.2+41.gdbf8816\n", - "Collecting matplotlib\n", - " Downloading matplotlib-3.7.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (9.2 MB)\n", - "Requirement already satisfied: importlib-resources>=3.2.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from matplotlib) (5.10.2)\n", - "Collecting cycler>=0.10\n", - " Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", - "Collecting kiwisolver>=1.0.1\n", - " Downloading kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)\n", + "Successfully installed merlin-dataloader-0.0.2+43.gce2215d\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.8/dist-packages (3.6.2)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (2.8.2)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.0.6)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (0.11.0)\n", + "Requirement already satisfied: numpy>=1.19 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.22.4)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (3.0.9)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (4.38.0)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (22.0)\n", - "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.22.4)\n", - "Collecting fonttools>=4.22.0\n", - " Downloading fonttools-4.39.0-py3-none-any.whl (1.0 MB)\n", - "Collecting pillow>=6.2.0\n", - " Downloading Pillow-9.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n", - "Collecting contourpy>=1.0.1\n", - " Downloading contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (300 kB)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (3.0.9)\n", - "Requirement already satisfied: zipp>=3.1.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from importlib-resources>=3.2.0; python_version < \"3.10\"->matplotlib) (3.11.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.14.0)\n", - "Installing collected packages: cycler, kiwisolver, fonttools, pillow, contourpy, matplotlib\n", - "Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.0 kiwisolver-1.4.4 matplotlib-3.7.1 pillow-9.4.0\n" + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (9.3.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.4.4)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.14.0)\n" ] } ], @@ -1079,18 +1074,18 @@ "text": [ "Collecting gdown\n", " Downloading gdown-4.6.4-py3-none-any.whl (14 kB)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", - "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", "Collecting PySocks!=1.5.7,>=1.5.6; extra == \"socks\"\n", " Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", "Installing collected packages: gdown, PySocks\n", "Successfully installed PySocks-1.7.1 gdown-4.6.4\n" ] @@ -1102,40 +1097,40 @@ "Downloading...\n", "From: https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", - "100%|██████████| 43.4M/43.4M [00:08<00:00, 5.42MB/s]\n" + "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.08MB/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", - "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [907 kB]\n", - "Get:3 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", - "Get:4 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", - "Get:5 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [1998 kB]\n", - "Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", - "Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", - "Get:8 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", - "Get:9 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2539 kB]\n", - "Get:10 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", - "Get:11 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", - "Get:12 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1015 kB]\n", + "Get:1 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", + "Get:2 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Get:3 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [1998 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:7 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", + "Get:8 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2544 kB]\n", + "Get:9 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1017 kB]\n", + "Get:10 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", + "Get:11 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [920 kB]\n", + "Get:12 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", "Get:13 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", "Get:14 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", - "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", - "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1310 kB]\n", - "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2134 kB]\n", - "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3014 kB]\n", - "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", - "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", - "Fetched 26.5 MB in 11s (2470 kB/s)\n", + "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3019 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2134 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1312 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", + "Fetched 26.5 MB in 9s (3028 kB/s)\n", "Reading package lists...\n", "Reading package lists...\n", "Building dependency tree...\n", "Reading state information...\n", "unzip is already the newest version (6.0-25ubuntu1.1).\n", - "0 upgraded, 0 newly installed, 0 to remove and 84 not upgraded.\n", + "0 upgraded, 0 newly installed, 0 to remove and 88 not upgraded.\n", "Archive: rees46_ecom_dataset_small_for_ci.zip\n", " creating: ecom_dataset/0001/\n", " inflating: ecom_dataset/0001/valid.parquet \n", @@ -1165,7 +1160,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "ceb3ae93", "metadata": {}, "outputs": [ @@ -1173,7 +1168,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-03-09 06:23:10.964331: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-03-13 08:23:26.109904: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, @@ -1190,21 +1185,21 @@ "text": [ "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-03-09 06:23:13.408883: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:23:13.409336: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:23:13.409494: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:23:29.137620: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:23:29.138164: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:23:29.138302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", - "2023-03-09 06:23:13.887706: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-03-13 08:23:29.697679: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-03-09 06:23:13.888643: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:23:13.888853: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:23:13.889008: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:23:14.636457: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:23:14.636673: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:23:14.636835: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-09 06:23:14.636950: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-03-09 06:23:14.637016: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + "2023-03-13 08:23:29.698582: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:23:29.698767: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:23:29.698900: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:23:30.652140: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:23:30.652338: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:23:30.652477: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:23:30.652590: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-13 08:23:30.652656: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" ] } ], @@ -1223,7 +1218,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "11647dd3", "metadata": {}, "outputs": [], @@ -1234,7 +1229,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "4ab4e0fb", "metadata": {}, "outputs": [], @@ -1244,7 +1239,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 16, "id": "8d9903e6", "metadata": {}, "outputs": [], @@ -1256,12 +1251,14 @@ "n_head = 16\n", "batch_size = 128\n", "learning_rate = 0.0006667377132554976\n", - "n_epoch = 5" + "n_epoch = 5\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "id": "a6ade14a", "metadata": {}, "outputs": [], @@ -1275,19 +1272,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "id": "7f15a0a0", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", "\n", @@ -1299,35 +1287,91 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 14, "id": "74ccc9a9", "metadata": {}, "outputs": [], "source": [ - "train.schema = schema" + "# we only use the item-id as input to the model\n", + "schema_model = schema.select_by_tag(Tags.ITEM_ID)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 1, "id": "5a4c7ca3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 08:58:39.475828: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-13 08:58:41.961797: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:58:41.962213: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:58:41.962371: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-13 08:58:42.406474: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-13 08:58:42.407434: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:58:42.407642: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:58:42.407804: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:58:43.170053: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:58:43.170266: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:58:43.170428: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:58:43.170555: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'schema_model' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m InputBlockV2\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mtf\u001b[39;00m\n\u001b[1;32m 3\u001b[0m input_block \u001b[38;5;241m=\u001b[39m InputBlockV2(\n\u001b[0;32m----> 4\u001b[0m \u001b[43mschema_model\u001b[49m,\n\u001b[1;32m 5\u001b[0m categorical\u001b[38;5;241m=\u001b[39mmm\u001b[38;5;241m.\u001b[39mEmbeddings(\n\u001b[1;32m 6\u001b[0m schema_model\u001b[38;5;241m.\u001b[39mselect_by_tag(Tags\u001b[38;5;241m.\u001b[39mCATEGORICAL),\n\u001b[1;32m 7\u001b[0m dim\u001b[38;5;241m=\u001b[39mitem_embedding_dim,\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m#This is equivalent of torch.nn.init.normal_\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# embeddings_initializer=tf.keras.initializers.RandomNormal(\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;66;03m# mean=0.0,\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# stddev=item_id_embeddings_init_std\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# ),\u001b[39;00m\n\u001b[1;32m 13\u001b[0m sequence_combiner\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 14\u001b[0m )\n\u001b[1;32m 15\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'schema_model' is not defined" + ] + } + ], "source": [ - "input_block = mm.InputBlockV2(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " embeddings=mm.Embeddings(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " sequence_combiner=None,\n", - " dim=d_model\n", - " ),\n", - "# pre=mm.StochasticSwapNoise()\n", - ")" + "from merlin.models.tf import InputBlockV2\n", + "import tensorflow as tf\n", + "input_block = InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " #This is equivalent of torch.nn.init.normal_\n", + "# embeddings_initializer=tf.keras.initializers.RandomNormal(\n", + "# mean=0.0,\n", + "# stddev=item_id_embeddings_init_std\n", + "# ),\n", + " sequence_combiner=None,\n", + " )\n", + " )" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 18, "id": "34c739b3", "metadata": {}, "outputs": [], @@ -1337,7 +1381,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 19, "id": "14c35b2a", "metadata": {}, "outputs": [], @@ -1347,7 +1391,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 20, "id": "866f3249", "metadata": {}, "outputs": [], @@ -1361,7 +1405,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 21, "id": "288d08df", "metadata": {}, "outputs": [], @@ -1375,7 +1419,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 22, "id": "064ea5ec", "metadata": {}, "outputs": [], @@ -1387,7 +1431,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 23, "id": "6c008e16", "metadata": {}, "outputs": [], @@ -1397,7 +1441,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 24, "id": "49b12d31", "metadata": {}, "outputs": [], @@ -1409,7 +1453,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 25, "id": "d84a30d3", "metadata": {}, "outputs": [], @@ -1421,36 +1465,52 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 27, "id": "e7474131", "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 78s 110ms/step - loss: 3.7036 - recall_at_10: 0.6067 - mrr_at_10: 0.4806 - ndcg_at_10: 0.5108 - map_at_10: 0.4806 - precision_at_10: 0.0607 - regularization_loss: 0.0000e+00 - loss_batch: 3.7070\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 2.9681 - recall_at_10: 0.6940 - mrr_at_10: 0.5792 - ndcg_at_10: 0.6068 - map_at_10: 0.5792 - precision_at_10: 0.0694 - regularization_loss: 0.0000e+00 - loss_batch: 2.9733\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 2.5195 - recall_at_10: 0.7439 - mrr_at_10: 0.6367 - ndcg_at_10: 0.6625 - map_at_10: 0.6367 - precision_at_10: 0.0744 - regularization_loss: 0.0000e+00 - loss_batch: 2.5258\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 2.2286 - recall_at_10: 0.7810 - mrr_at_10: 0.6800 - ndcg_at_10: 0.7043 - map_at_10: 0.6800 - precision_at_10: 0.0781 - regularization_loss: 0.0000e+00 - loss_batch: 2.2364\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 2.0158 - recall_at_10: 0.8031 - mrr_at_10: 0.7071 - ndcg_at_10: 0.7302 - map_at_10: 0.7071 - precision_at_10: 0.0803 - regularization_loss: 0.0000e+00 - loss_batch: 2.0250\n" + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" ] }, { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" + "ename": "LinkerError", + "evalue": "[222] Call to cuLinkAddData results in UNKNOWN_CUDA_ERROR\nptxas application ptx input, line 9; fatal : Unsupported .version 7.8; current version is '7.7'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mLinkerError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[27], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmodel_transformer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_epoch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mpre\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSequencePredictNext\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtransformer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mxlnet_block\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/models/base.py:1363\u001b[0m, in \u001b[0;36mBaseModel.fit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing, train_metrics_steps, pre, **kwargs)\u001b[0m\n\u001b[1;32m 1360\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrain_pre, SequenceTransform):\n\u001b[1;32m 1361\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrain_pre\u001b[38;5;241m.\u001b[39mconfigure_for_train()\n\u001b[0;32m-> 1363\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfit_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1365\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pre:\n\u001b[1;32m 1366\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrain_pre\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 67\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 68\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28mNone\u001b[39m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/tensorflow.py:154\u001b[0m, in \u001b[0;36mLoader.__getitem__\u001b[0;34m(self, index)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, index):\n\u001b[1;32m 147\u001b[0m \u001b[38;5;124;03m\"\"\"Gets batch at position `index`.\u001b[39;00m\n\u001b[1;32m 148\u001b[0m \n\u001b[1;32m 149\u001b[0m \u001b[38;5;124;03m Note: This returns the next batch in the iterator.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;124;03m don't currently support fetching a batch by index.\u001b[39;00m\n\u001b[1;32m 153\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 154\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mLoaderBase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__next__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/loader_base.py:251\u001b[0m, in \u001b[0;36mLoaderBase.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__next__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 250\u001b[0m \u001b[38;5;124;03m\"\"\"Get the next batch.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 251\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_next_batch\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/loader_base.py:322\u001b[0m, in \u001b[0;36mLoaderBase._get_next_batch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[38;5;66;03m# try to iterate through existing batches\u001b[39;00m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 322\u001b[0m batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_batch_itr)\n\u001b[1;32m 323\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# anticipate any more chunks getting created\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# if not, raise the StopIteration\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_working \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_buff\u001b[38;5;241m.\u001b[39mempty:\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/loader_base.py:362\u001b[0m, in \u001b[0;36mLoaderBase.make_tensors\u001b[0;34m(self, gdf, use_row_lengths)\u001b[0m\n\u001b[1;32m 359\u001b[0m split_idx \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_segment_lengths(\u001b[38;5;28mlen\u001b[39m(gdf))\n\u001b[1;32m 361\u001b[0m \u001b[38;5;66;03m# convert dataframe to framework-specific tensors\u001b[39;00m\n\u001b[0;32m--> 362\u001b[0m tensors_by_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 364\u001b[0m \u001b[38;5;66;03m# split them into batches and map to the framework-specific output format\u001b[39;00m\n\u001b[1;32m 365\u001b[0m tensor_batches \u001b[38;5;241m=\u001b[39m {}\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/nvtx/nvtx.py:101\u001b[0m, in \u001b[0;36mannotate.__call__..inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 100\u001b[0m libnvtx_push_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mattributes, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[0;32m--> 101\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 102\u001b[0m libnvtx_pop_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/loader_base.py:488\u001b[0m, in \u001b[0;36mLoaderBase._process_dataframe\u001b[0;34m(self, gdf)\u001b[0m\n\u001b[1;32m 485\u001b[0m column \u001b[38;5;241m=\u001b[39m gdf_i\u001b[38;5;241m.\u001b[39mpop(column_name)\n\u001b[1;32m 486\u001b[0m leaves, col_offsets \u001b[38;5;241m=\u001b[39m pull_apart_list(column, device\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[0;32m--> 488\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[43mleaves\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m, \u001b[38;5;28mlist\u001b[39m):\n\u001b[1;32m 489\u001b[0m leaves, nest_offsets \u001b[38;5;241m=\u001b[39m pull_apart_list(leaves, device\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m 490\u001b[0m col_offsets \u001b[38;5;241m=\u001b[39m nest_offsets\u001b[38;5;241m.\u001b[39miloc[col_offsets[:]]\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/nvtx/nvtx.py:101\u001b[0m, in \u001b[0;36mannotate.__call__..inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 100\u001b[0m libnvtx_push_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mattributes, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[0;32m--> 101\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 102\u001b[0m libnvtx_pop_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/series.py:1171\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m 1169\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miloc[arg]\n\u001b[1;32m 1170\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43marg\u001b[49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/nvtx/nvtx.py:101\u001b[0m, in \u001b[0;36mannotate.__call__..inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 100\u001b[0m libnvtx_push_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mattributes, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[0;32m--> 101\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 102\u001b[0m libnvtx_pop_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/series.py:255\u001b[0m, in \u001b[0;36m_SeriesLocIndexer.__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[1;32m 254\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 255\u001b[0m arg \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_loc_to_iloc(arg)\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mIndexError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m):\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(arg)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/series.py:294\u001b[0m, in \u001b[0;36m_SeriesLocIndexer._loc_to_iloc\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m found_index\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 294\u001b[0m found_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_frame\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39m_values\u001b[38;5;241m.\u001b[39mfind_first_value(\n\u001b[1;32m 295\u001b[0m arg, closest\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 296\u001b[0m )\n\u001b[1;32m 297\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m found_index\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mIndexError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m):\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/column/numerical.py:566\u001b[0m, in \u001b[0;36mNumericalColumn.find_first_value\u001b[0;34m(self, value, closest)\u001b[0m\n\u001b[1;32m 564\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m value \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax():\n\u001b[1;32m 565\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m--> 566\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_find_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mclosest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcudautils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfind_first\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/column/numerical.py:534\u001b[0m, in \u001b[0;36mNumericalColumn._find_value\u001b[0;34m(self, value, closest, find, compare)\u001b[0m\n\u001b[1;32m 532\u001b[0m found \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 533\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 534\u001b[0m found \u001b[38;5;241m=\u001b[39m \u001b[43mfind\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 535\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_array_view\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 536\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 537\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 538\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 539\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m found \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 540\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_monotonic_increasing \u001b[38;5;129;01mand\u001b[39;00m closest:\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/utils/cudautils.py:114\u001b[0m, in \u001b[0;36mfind_first\u001b[0;34m(arr, val, mask, compare)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfind_first\u001b[39m(arr, val, mask\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, compare\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meq\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 101\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;124;03m Returns the index of the first occurrence of *val* in *arr*..\u001b[39;00m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;124;03m Or the first occurrence of *arr* *compare* *val*, if *compare* is not eq\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03m compare: str ('gt', 'lt', or 'eq' (default))\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 114\u001b[0m found_col \u001b[38;5;241m=\u001b[39m \u001b[43mfind_index_of_val\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompare\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompare\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 115\u001b[0m found_col \u001b[38;5;241m=\u001b[39m found_col\u001b[38;5;241m.\u001b[39mfind_and_replace([arr\u001b[38;5;241m.\u001b[39msize], [\u001b[38;5;28;01mNone\u001b[39;00m], \u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 117\u001b[0m min_index \u001b[38;5;241m=\u001b[39m found_col\u001b[38;5;241m.\u001b[39mmin()\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/utils/cudautils.py:93\u001b[0m, in \u001b[0;36mfind_index_of_val\u001b[0;34m(arr, val, mask, compare)\u001b[0m\n\u001b[1;32m 89\u001b[0m gpu_mark_found_float\u001b[38;5;241m.\u001b[39mforall(found\u001b[38;5;241m.\u001b[39msize)(\n\u001b[1;32m 90\u001b[0m arr, val, found, arr\u001b[38;5;241m.\u001b[39msize\n\u001b[1;32m 91\u001b[0m )\n\u001b[1;32m 92\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 93\u001b[0m \u001b[43mgpu_mark_found_int\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforall\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfound\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 94\u001b[0m \u001b[43m \u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfound\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cudf\u001b[38;5;241m.\u001b[39mcore\u001b[38;5;241m.\u001b[39mcolumn\u001b[38;5;241m.\u001b[39mcolumn\u001b[38;5;241m.\u001b[39mas_column(found)\u001b[38;5;241m.\u001b[39mset_mask(mask)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/dispatcher.py:438\u001b[0m, in \u001b[0;36mForAll.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 436\u001b[0m specialized \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdispatcher\n\u001b[1;32m 437\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 438\u001b[0m specialized \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mspecialize\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 439\u001b[0m blockdim \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compute_thread_per_block(specialized)\n\u001b[1;32m 440\u001b[0m griddim \u001b[38;5;241m=\u001b[39m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mntasks \u001b[38;5;241m+\u001b[39m blockdim \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m blockdim\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/dispatcher.py:667\u001b[0m, in \u001b[0;36mCUDADispatcher.specialize\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 664\u001b[0m targetoptions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtargetoptions\n\u001b[1;32m 665\u001b[0m specialization \u001b[38;5;241m=\u001b[39m CUDADispatcher(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpy_func,\n\u001b[1;32m 666\u001b[0m targetoptions\u001b[38;5;241m=\u001b[39mtargetoptions)\n\u001b[0;32m--> 667\u001b[0m \u001b[43mspecialization\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile\u001b[49m\u001b[43m(\u001b[49m\u001b[43margtypes\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 668\u001b[0m specialization\u001b[38;5;241m.\u001b[39mdisable_compile()\n\u001b[1;32m 669\u001b[0m specialization\u001b[38;5;241m.\u001b[39m_specialized \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/dispatcher.py:796\u001b[0m, in \u001b[0;36mCUDADispatcher.compile\u001b[0;34m(self, sig)\u001b[0m\n\u001b[1;32m 794\u001b[0m kernel \u001b[38;5;241m=\u001b[39m _Kernel(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpy_func, argtypes, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtargetoptions)\n\u001b[1;32m 795\u001b[0m \u001b[38;5;66;03m# We call bind to force codegen, so that there is a cubin to cache\u001b[39;00m\n\u001b[0;32m--> 796\u001b[0m \u001b[43mkernel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbind\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cache\u001b[38;5;241m.\u001b[39msave_overload(sig, kernel)\n\u001b[1;32m 799\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madd_overload(kernel, argtypes)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/dispatcher.py:178\u001b[0m, in \u001b[0;36m_Kernel.bind\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mbind\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124;03m Force binding to current CUDA context\u001b[39;00m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 178\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_codelibrary\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cufunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/codegen.py:208\u001b[0m, in \u001b[0;36mCUDACodeLibrary.get_cufunc\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cufunc:\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cufunc\n\u001b[0;32m--> 208\u001b[0m cubin \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cubin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_capability\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 209\u001b[0m module \u001b[38;5;241m=\u001b[39m ctx\u001b[38;5;241m.\u001b[39mcreate_module_image(cubin)\n\u001b[1;32m 211\u001b[0m \u001b[38;5;66;03m# Load\u001b[39;00m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/codegen.py:181\u001b[0m, in \u001b[0;36mCUDACodeLibrary.get_cubin\u001b[0;34m(self, cc)\u001b[0m\n\u001b[1;32m 179\u001b[0m ptxes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ptxes(cc\u001b[38;5;241m=\u001b[39mcc)\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ptx \u001b[38;5;129;01min\u001b[39;00m ptxes:\n\u001b[0;32m--> 181\u001b[0m \u001b[43mlinker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_ptx\u001b[49m\u001b[43m(\u001b[49m\u001b[43mptx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m path \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_linking_files:\n\u001b[1;32m 183\u001b[0m linker\u001b[38;5;241m.\u001b[39madd_file_guess_ext(path)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/cudadrv/driver.py:2708\u001b[0m, in \u001b[0;36mCtypesLinker.add_ptx\u001b[0;34m(self, ptx, name)\u001b[0m\n\u001b[1;32m 2705\u001b[0m driver\u001b[38;5;241m.\u001b[39mcuLinkAddData(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle, enums\u001b[38;5;241m.\u001b[39mCU_JIT_INPUT_PTX,\n\u001b[1;32m 2706\u001b[0m ptxbuf, \u001b[38;5;28mlen\u001b[39m(ptx), namebuf, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 2707\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m CudaAPIError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m-> 2708\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LinkerError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (e, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39merror_log))\n", + "\u001b[0;31mLinkerError\u001b[0m: [222] Call to cuLinkAddData results in UNKNOWN_CUDA_ERROR\nptxas application ptx input, line 9; fatal : Unsupported .version 7.8; current version is '7.7'" + ] } ], "source": [ @@ -1464,7 +1524,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "7bf839e3", "metadata": {}, "outputs": [], @@ -1474,35 +1534,10 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "15ccc448", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "84/84 [==============================] - 8s 40ms/step - loss: 8.7361 - recall_at_10: 0.1869 - mrr_at_10: 0.0721 - ndcg_at_10: 0.0988 - map_at_10: 0.0721 - precision_at_10: 0.0187 - regularization_loss: 0.0000e+00 - loss_batch: 8.7682\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.73610782623291,\n", - " 'recall_at_10': 0.1859131157398224,\n", - " 'mrr_at_10': 0.07267787307500839,\n", - " 'ndcg_at_10': 0.09902743250131607,\n", - " 'map_at_10': 0.07267787307500839,\n", - " 'precision_at_10': 0.01859130710363388,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.154594421386719}" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model_transformer.evaluate(\n", " valid,\n", diff --git a/T4Rec_repro/reproducing_T4Rec_results_v1.ipynb b/T4Rec_repro/reproducing_T4Rec_results_v1.ipynb index 7048c3725f..03f58fac35 100644 --- a/T4Rec_repro/reproducing_T4Rec_results_v1.ipynb +++ b/T4Rec_repro/reproducing_T4Rec_results_v1.ipynb @@ -427,10 +427,59 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "id": "d07aa5f1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 08:59:03.639356: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-13 08:59:06.043292: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:59:06.043715: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:59:06.043867: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-13 08:59:06.482103: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-13 08:59:06.482984: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:59:06.483177: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:59:06.483311: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:59:07.217453: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:59:07.217641: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:59:07.217778: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 08:59:07.217903: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'schema_model' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m InputBlockV2\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mtf\u001b[39;00m\n\u001b[1;32m 3\u001b[0m input_block \u001b[38;5;241m=\u001b[39m InputBlockV2(\n\u001b[0;32m----> 4\u001b[0m \u001b[43mschema_model\u001b[49m,\n\u001b[1;32m 5\u001b[0m categorical\u001b[38;5;241m=\u001b[39mmm\u001b[38;5;241m.\u001b[39mEmbeddings(\n\u001b[1;32m 6\u001b[0m schema_model\u001b[38;5;241m.\u001b[39mselect_by_tag(Tags\u001b[38;5;241m.\u001b[39mCATEGORICAL),\n\u001b[1;32m 7\u001b[0m dim\u001b[38;5;241m=\u001b[39mitem_embedding_dim,\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m#This is equivalent of torch.nn.init.normal_\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# embeddings_initializer=tf.keras.initializers.RandomNormal(\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;66;03m# mean=0.0,\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# stddev=item_id_embeddings_init_std\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# ),\u001b[39;00m\n\u001b[1;32m 13\u001b[0m sequence_combiner\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 14\u001b[0m ),\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m#pre=mm.StochasticSwapNoise(schema_model, replacement_prob=0.1) # This is not working with sequences transforms\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# we apply dropout and layer-norm as post-processing steps before aggregation\u001b[39;00m\n\u001b[1;32m 17\u001b[0m post\u001b[38;5;241m=\u001b[39mTabularDropout(input_dropout)\u001b[38;5;241m.\u001b[39mconnect(TabularNorm())\n\u001b[1;32m 18\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'schema_model' is not defined" + ] + } + ], "source": [ "from merlin.models.tf import InputBlockV2\n", "import tensorflow as tf\n", diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking.ipynb index 0f43a5dac9..f6f757b496 100644 --- a/T4Rec_repro/train_and_save_model_for_benchmarking.ipynb +++ b/T4Rec_repro/train_and_save_model_for_benchmarking.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "54d6ef61", + "id": "d062ceda", "metadata": {}, "outputs": [ { @@ -11,22 +11,31 @@ "output_type": "stream", "text": [ "From https://github.com/NVIDIA-Merlin/Models\n", - " + 20a40d72...a92bdc24 tf/transformer-api -> origin/tf/transformer-api (forced update)\n", - "Warning: you are leaving 5 commits behind, not connected to\n", - "any of your branches:\n", - "\n", - " 20a40d72 fix masking of sequence-predict-next transform\n", - " dbd2d9c8 include PR comments\n", - " 1e642e87 update example notebook with the new API\n", - " e99e7985 add support of ragged tensor to weight tying\n", - " e87913d1 implement new design of the Transformer API on top of the release-23.02 branch\n", - "\n", - "If you want to keep them by creating a new branch, this may be a good time\n", - "to do so with:\n", - "\n", - " git branch 20a40d72\n", - "\n", - "HEAD is now at a92bdc24 adjust sample_weights to targets shape\n" + " * [new branch] ci/horovod -> origin/ci/horovod\n", + " * [new branch] codespell_fix -> origin/codespell_fix\n", + " 16fb4149..c9d3baf4 fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", + " 95462360..28fb60ad gh-pages -> origin/gh-pages\n", + " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", + " 835ad186..a5ac5668 main -> origin/main\n", + " * [new branch] mtl_example -> origin/mtl_example\n", + " cb431a8a..b90e9a1b release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", + " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", + " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", + " * [new branch] tf/dlrm_dropout_fix -> origin/tf/dlrm_dropout_fix\n", + " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", + " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", + " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", + " + 169f3df5...06eecddd tf/output-block -> origin/tf/output-block (forced update)\n", + " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", + " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", + " * [new branch] tf/transformer-api -> origin/tf/transformer-api\n", + " + 0a65d603...9f53e8ff update_07 -> origin/update_07 (forced update)\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + "Previous HEAD position was cb431a8a Fix the serialization of `SequenceSummary` block (#927)\n", + "HEAD is now at a86201ee add masking support to SequencePredictRandom transform\n" ] }, { @@ -40,76 +49,77 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==0.9.0+116.ga92bdc24) (0.0.2+41.gdbf8816)\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==0.9.0+116.ga92bdc24) (0.9.0+54.g29c7587a)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.3.5)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (0.56.4)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2022.7.1)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2022.5.0)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2022.7.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (22.0)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (4.64.1)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.12.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (3.19.6)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.2.5)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (8.0.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.8.2)\n", - "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.22.4)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2022.7)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (45.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (0.39.1)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (5.2.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.3.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (0.12.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.2.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (6.0)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.4.0)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.7.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (6.1)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.0.4)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (8.1.3)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.2.0)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.26.13)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (5.9.4)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (3.1.2)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.0.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.3.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.2.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.14.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (3.11.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (2.1.1)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (6.0.4)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (4.0.0)\n" + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.10.0)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.0.4)\n", + "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.5)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.12.0)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.0.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.19.6)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.5.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.64.1)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.5)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.56.4)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (22.0)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7)\n", + "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.22.4)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.8.2)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.57.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.1)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.4)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.4.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.26.13)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.9.4)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.7.0)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.1.3)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.12.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.1.2)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.39.1)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.14.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.4)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.1.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.11.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.0.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==0.9.0+116.ga92bdc24) (6.0.1)\n", "Building wheels for collected packages: merlin-models\n", " Building wheel for merlin-models (PEP 517): started\n", " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-models: filename=merlin_models-0.9.0+116.ga92bdc24-py3-none-any.whl size=374626 sha256=0b09335e9fef4f6221003e7ba9eb2e1e24b4bfdfd433c8211c5ea32aa2856aed\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-168j85q4/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + " Created wheel for merlin-models: filename=merlin_models-23.2.0+7.ga86201ee-py3-none-any.whl size=374647 sha256=7566d7a4a90814a6adae96ac4566fa227e750c3301334ed8ae3c852608af406f\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-naqyczcx/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", "Successfully built merlin-models\n", "Installing collected packages: merlin-models\n", " Attempting uninstall: merlin-models\n", - " Found existing installation: merlin-models 0.9.0+114.g20a40d72\n", - " Uninstalling merlin-models-0.9.0+114.g20a40d72:\n", - " Successfully uninstalled merlin-models-0.9.0+114.g20a40d72\n", - "Successfully installed merlin-models-0.9.0+116.ga92bdc24\n" + " Found existing installation: merlin-models 0.11.0\n", + " Uninstalling merlin-models-0.11.0:\n", + " Successfully uninstalled merlin-models-0.11.0\n", + "Successfully installed merlin-models-23.2.0+7.ga86201ee\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Already on 'main'\n" + "Previous HEAD position was 2fc6889 add schema parameter to the `repartition` method (#192)\n", + "Switched to branch 'main'\n" ] }, { @@ -124,14 +134,131 @@ "output_type": "stream", "text": [ "From https://github.com/NVIDIA-Merlin/core\n", - " * branch main -> FETCH_HEAD\n" + " * branch main -> FETCH_HEAD\n", + " cd96ca5f..2d60d237 main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Already up to date.\n", + "Updating cd96ca5f..2d60d237\n", + "Fast-forward\n", + " .github/release-drafter.yml | 44 +--\n", + " .github/workflows/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", + " .../ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", + " .../workflows/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", + " .../workflows/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/workflows/ISSUE_TEMPLATE/task.md | 5 +-\n", + " .github/workflows/cpu-ci.yml | 145 +++-------\n", + " .github/workflows/cpu-models.yml | 52 ++--\n", + " .github/workflows/cpu-nvtabular.yml | 52 ++--\n", + " .github/workflows/cpu-packages.yml | 126 +++++++++\n", + " .github/workflows/cpu-systems.yml | 52 ++--\n", + " .github/workflows/docs-preview-pr.yaml | 2 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 30 +-\n", + " .github/workflows/release-drafter.yaml | 2 +-\n", + " .pre-commit-config.yaml | 55 ++--\n", + " .prettierignore | 2 +\n", + " CLA.md | 9 +-\n", + " CONTRIBUTING.md | 28 +-\n", + " README.md | 68 ++---\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " docs/README.md | 49 ++--\n", + " merlin/core/compat.py | 59 +++-\n", + " merlin/core/dispatch.py | 51 +++-\n", + " merlin/dag/__init__.py | 1 +\n", + " merlin/dag/base_operator.py | 30 +-\n", + " merlin/dag/dictarray.py | 3 +-\n", + " merlin/dag/executors.py | 107 ++++---\n", + " merlin/dag/graph.py | 20 ++\n", + " merlin/dag/node.py | 2 +-\n", + " merlin/dag/utils.py | 69 +++++\n", + " merlin/dispatch/lazy.py | 152 ++++++++++\n", + " merlin/dtypes/__init__.py | 60 ++++\n", + " merlin/dtypes/aliases.py | 52 ++++\n", + " merlin/dtypes/base.py | 178 ++++++++++++\n", + " merlin/dtypes/mapping.py | 173 ++++++++++++\n", + " merlin/dtypes/mappings/__init__.py | 18 ++\n", + " merlin/dtypes/mappings/cudf.py | 57 ++++\n", + " merlin/dtypes/mappings/numpy.py | 52 ++++\n", + " merlin/dtypes/mappings/pandas.py | 38 +++\n", + " merlin/dtypes/mappings/python.py | 31 ++\n", + " merlin/dtypes/mappings/tf.py | 52 ++++\n", + " merlin/dtypes/mappings/torch.py | 43 +++\n", + " merlin/dtypes/mappings/triton.py | 53 ++++\n", + " merlin/dtypes/registry.py | 142 ++++++++++\n", + " merlin/dtypes/shape.py | 183 ++++++++++++\n", + " merlin/io/avro.py | 4 -\n", + " merlin/io/csv.py | 1 -\n", + " merlin/io/dask.py | 6 +-\n", + " merlin/io/dataset.py | 19 +-\n", + " merlin/io/fsspec_utils.py | 8 +-\n", + " merlin/io/parquet.py | 8 -\n", + " merlin/io/writer.py | 1 -\n", + " merlin/schema/io/tensorflow_metadata.py | 86 +++---\n", + " merlin/schema/schema.py | 312 ++++++++++++---------\n", + " merlin/schema/tags.py | 1 +\n", + " merlin/table/__init__.py | 24 ++\n", + " merlin/table/conversions.py | 135 +++++++++\n", + " merlin/table/cupy_column.py | 92 ++++++\n", + " merlin/table/numpy_column.py | 100 +++++++\n", + " merlin/table/tensor_column.py | 217 ++++++++++++++\n", + " merlin/table/tensor_table.py | 222 +++++++++++++++\n", + " merlin/table/tensorflow_column.py | 159 +++++++++++\n", + " merlin/table/torch_column.py | 124 ++++++++\n", + " requirements.txt | 5 +-\n", + " tests/conftest.py | 16 +-\n", + " tests/unit/core/test_dispatch.py | 19 ++\n", + " tests/unit/core/test_version.py | 4 +\n", + " tests/unit/dag/test_dag_utils.py | 31 ++\n", + " tests/unit/dispatch/test_lazy_dispatch.py | 61 ++++\n", + " tests/unit/dtypes/test_module.py | 48 ++++\n", + " tests/unit/dtypes/test_shape.py | 222 +++++++++++++++\n", + " tests/unit/io/test_io.py | 27 +-\n", + " tests/unit/schema/test_column_schemas.py | 142 ++++++----\n", + " tests/unit/schema/test_schema.py | 22 +-\n", + " tests/unit/schema/test_schema_io.py | 27 +-\n", + " tests/unit/table/test_convert_column.py | 75 +++++\n", + " tests/unit/table/test_tensor_column.py | 186 ++++++++++++\n", + " tests/unit/table/test_tensor_table.py | 311 ++++++++++++++++++++\n", + " tests/unit/utils/test_utils.py | 3 -\n", + " tox.ini | 4 +\n", + " 81 files changed, 4441 insertions(+), 674 deletions(-)\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .prettierignore\n", + " create mode 100644 merlin/dag/utils.py\n", + " create mode 100644 merlin/dispatch/lazy.py\n", + " create mode 100644 merlin/dtypes/__init__.py\n", + " create mode 100644 merlin/dtypes/aliases.py\n", + " create mode 100644 merlin/dtypes/base.py\n", + " create mode 100644 merlin/dtypes/mapping.py\n", + " create mode 100644 merlin/dtypes/mappings/__init__.py\n", + " create mode 100644 merlin/dtypes/mappings/cudf.py\n", + " create mode 100644 merlin/dtypes/mappings/numpy.py\n", + " create mode 100644 merlin/dtypes/mappings/pandas.py\n", + " create mode 100644 merlin/dtypes/mappings/python.py\n", + " create mode 100644 merlin/dtypes/mappings/tf.py\n", + " create mode 100644 merlin/dtypes/mappings/torch.py\n", + " create mode 100644 merlin/dtypes/mappings/triton.py\n", + " create mode 100644 merlin/dtypes/registry.py\n", + " create mode 100644 merlin/dtypes/shape.py\n", + " create mode 100644 merlin/table/__init__.py\n", + " create mode 100644 merlin/table/conversions.py\n", + " create mode 100644 merlin/table/cupy_column.py\n", + " create mode 100644 merlin/table/numpy_column.py\n", + " create mode 100644 merlin/table/tensor_column.py\n", + " create mode 100644 merlin/table/tensor_table.py\n", + " create mode 100644 merlin/table/tensorflow_column.py\n", + " create mode 100644 merlin/table/torch_column.py\n", + " create mode 100644 tests/unit/dag/test_dag_utils.py\n", + " create mode 100644 tests/unit/dispatch/test_lazy_dispatch.py\n", + " create mode 100644 tests/unit/dtypes/test_module.py\n", + " create mode 100644 tests/unit/dtypes/test_shape.py\n", + " create mode 100644 tests/unit/table/test_convert_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_table.py\n", "Processing /core\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", @@ -139,68 +266,76 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (1.12.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (3.19.6)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (22.0)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (0.56.4)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (2022.7.1)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (1.3.5)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (2022.7.1)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (4.64.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (8.0.0)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (2022.5.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+54.g29c7587a) (1.2.5)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (1.3.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (45.2.0)\n", - "Requirement already satisfied: numpy<1.24,>=1.18 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (1.22.4)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (0.39.1)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (5.2.0)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (5.9.4)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (0.12.0)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (2.2.0)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (6.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (6.1)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (8.1.3)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.7.0)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.26.13)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.0.0)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (2.4.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (3.1.2)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (2.2.0)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.0.4)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (2022.7)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.3.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (1.2.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+54.g29c7587a) (3.11.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core==0.9.0+54.g29c7587a) (2.1.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+54.g29c7587a) (1.14.0)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (6.0.4)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (4.0.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+54.g29c7587a) (6.0.1)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (2022.7.1)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (11.4.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (8.0.0)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (2022.7.1)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (4.64.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (22.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (2022.5.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (0.56.4)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (3.19.6)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (1.3.5)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (1.2.5)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (1.12.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.3.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (0.12.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (6.0)\n", + "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow>=5.0.0->merlin-core==0.9.0+60.g2d60d237) (1.22.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.0.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.2.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.0.4)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (8.1.3)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.7.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (6.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (3.1.2)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (5.9.4)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.26.13)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.4.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (0.39.1)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (2.8.2)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (1.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.57.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.1.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.14.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (6.0.4)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (6.0.1)\n", "Building wheels for collected packages: merlin-core\n", " Building wheel for merlin-core (PEP 517): started\n", " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-core: filename=merlin_core-0.9.0+54.g29c7587a-py3-none-any.whl size=152409 sha256=cf0f970219f2ae5dcae772911442f0366c3b3400aaac27967ba709e9c9ac1a22\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-zn63nwq_/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + " Created wheel for merlin-core: filename=merlin_core-0.9.0+60.g2d60d237-py3-none-any.whl size=152708 sha256=ff70b25964dafa4162daf96e739c4866570e8eec2aa70c8b1f38049656b6b486\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-xyk5t8ph/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", "Successfully built merlin-core\n", "Installing collected packages: merlin-core\n", " Attempting uninstall: merlin-core\n", - " Found existing installation: merlin-core 0.9.0+54.g29c7587a\n", - " Uninstalling merlin-core-0.9.0+54.g29c7587a:\n", - " Successfully uninstalled merlin-core-0.9.0+54.g29c7587a\n", - "Successfully installed merlin-core-0.9.0+54.g29c7587a\n" + " Found existing installation: merlin-core 0.10.0\n", + " Uninstalling merlin-core-0.10.0:\n", + " Successfully uninstalled merlin-core-0.10.0\n", + "Successfully installed merlin-core-0.9.0+60.g2d60d237\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Already on 'main'\n" + "Previous HEAD position was 020b24b7 Fix output error occurring due to check if it is a dict or not (#1742)\n", + "Switched to branch 'main'\n" ] }, { @@ -215,14 +350,88 @@ "output_type": "stream", "text": [ "From https://github.com/NVIDIA-Merlin/NVTabular\n", - " * branch main -> FETCH_HEAD\n" + " * branch main -> FETCH_HEAD\n", + " c5bc4098..9b186ee9 main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Already up to date.\n", + "Updating c5bc4098..9b186ee9\n", + "Fast-forward\n", + " .github/ISSUE_TEMPLATE/bug_report.md | 11 +-\n", + " .github/ISSUE_TEMPLATE/documentation-request.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/feature_request.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/operator_request.md | 14 +-\n", + " .github/ISSUE_TEMPLATE/research_question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/task.md | 4 +-\n", + " .github/release-drafter.yml | 44 ++--\n", + " .github/workflows/blossom-ci.yml | 230 ++++++++++-----------\n", + " .github/workflows/conda-env-create.yml | 30 +--\n", + " .github/workflows/cpu-ci.yml | 138 -------------\n", + " .github/workflows/cpu-packages.yml | 132 ++++++++++++\n", + " .github/workflows/cpu-tests.yml | 69 +++++++\n", + " .github/workflows/docs-preview-pr.yaml | 2 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 6 +-\n", + " .github/workflows/gpu-ci.yml | 30 ---\n", + " .github/workflows/gpu-tests.yml | 30 +++\n", + " .gitlab-ci.yml | 23 +--\n", + " .pre-commit-config.yaml | 47 +++--\n", + " .prettierignore | 2 +\n", + " CHANGELOG.md | 187 ++++++++---------\n", + " CONTRIBUTING.md | 30 +--\n", + " README.md | 48 ++---\n", + " bench/datasets/tools/train_tensorflow.py | 1 -\n", + " bench/examples/MultiGPUBench.md | 67 +++---\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/environments/nvtabular_aws_sagemaker.yml | 2 +-\n", + " docs/README.md | 18 +-\n", + " docs/source/core_features.md | 48 ++---\n", + " docs/source/resources/architecture.md | 17 +-\n", + " docs/source/resources/cloud_integration.md | 24 ++-\n", + " docs/source/resources/links.md | 40 ++--\n", + " docs/source/toc.yaml | 12 +-\n", + " examples/01-Getting-started.ipynb | 5 +-\n", + " examples/02-Advanced-NVTabular-workflow.ipynb | 5 +-\n", + " .../03-Running-on-multiple-GPUs-or-on-CPU.ipynb | 5 +-\n", + " examples/README.md | 1 +\n", + " nvtabular/inference/__init__.py | 4 +-\n", + " nvtabular/inference/triton/ensemble.py | 86 ++------\n", + " nvtabular/inference/triton/model/model_pt.py | 1 -\n", + " nvtabular/inference/workflow/hugectr.py | 2 +-\n", + " nvtabular/loader/backend.py | 31 +--\n", + " nvtabular/loader/tensorflow.py | 1 +\n", + " nvtabular/ops/categorify.py | 2 -\n", + " nvtabular/ops/groupby.py | 35 ++--\n", + " nvtabular/ops/join_external.py | 1 -\n", + " nvtabular/ops/join_groupby.py | 18 +-\n", + " nvtabular/ops/list_slice.py | 22 +-\n", + " nvtabular/ops/moments.py | 2 -\n", + " nvtabular/ops/reduce_dtype_size.py | 9 +-\n", + " nvtabular/ops/value_counts.py | 14 +-\n", + " nvtabular/workflow/workflow.py | 113 +++++++++-\n", + " requirements-test.txt | 2 -\n", + " requirements/test.txt | 3 +-\n", + " setup.py | 5 +\n", + " tests/conftest.py | 1 -\n", + " .../test_02-Advanced-NVTabular-workflow.py | 12 +-\n", + " tests/unit/ops/test_column_similarity.py | 1 -\n", + " tests/unit/ops/test_groupyby.py | 2 +-\n", + " tests/unit/ops/test_lambda.py | 28 ++-\n", + " tests/unit/ops/test_ops_schema.py | 25 ++-\n", + " tests/unit/ops/test_value_count.py | 2 +\n", + " tests/unit/workflow/test_workflow.py | 75 ++++++-\n", + " tox.ini | 9 +-\n", + " 64 files changed, 1056 insertions(+), 786 deletions(-)\n", + " delete mode 100644 .github/workflows/cpu-ci.yml\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .github/workflows/cpu-tests.yml\n", + " delete mode 100644 .github/workflows/gpu-ci.yml\n", + " create mode 100644 .github/workflows/gpu-tests.yml\n", + " create mode 100644 .prettierignore\n", + " delete mode 100644 requirements-test.txt\n", "Processing /nvtabular\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", @@ -230,69 +439,70 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.9.0+54.g29c7587a)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.0.4)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.9.0+60.g2d60d237)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (1.9.3)\n", - "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.0.2+41.gdbf8816)\n", "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.56.4)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.0.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.19.6)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.5)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.5)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (22.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.5)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (11.4.1)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.0.0)\n", "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.5.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.19.6)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.12.0)\n", "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.64.1)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (22.0)\n", "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+42.g9b186ee9) (1.22.4)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.39.1)\n", "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (45.2.0)\n", "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.39.1)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.4.3)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.8.2)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.4.3)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.0)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.26.13)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.12.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.9.4)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.1.2)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.12.0)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.1)\n", "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.4)\n", "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.1.3)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.9.4)\n", "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.4.0)\n", "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.7.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.1)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.26.13)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.0)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.11.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.4)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.14.0)\n", "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.4)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.1.1)\n", "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.14.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.1)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.0.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.1)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ "Building wheels for collected packages: nvtabular\n", " Building wheel for nvtabular (PEP 517): started\n", " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", - " Created wheel for nvtabular: filename=nvtabular-1.6.0+42.g9b186ee9-cp38-cp38-linux_x86_64.whl size=258506 sha256=20845f4d83c616304250353b73943fa82e251b9514cbd62b7387b83a6d21efe8\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-dt3f85gj/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + " Created wheel for nvtabular: filename=nvtabular-1.6.0+42.g9b186ee9-cp38-cp38-linux_x86_64.whl size=258506 sha256=7731e40e8914024a9c9ea9abe993404858d29604ae832237d2a69c1675161f23\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-18ktqhn2/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", "Successfully built nvtabular\n", "Installing collected packages: nvtabular\n", " Attempting uninstall: nvtabular\n", - " Found existing installation: nvtabular 1.6.0+42.g9b186ee9\n", - " Uninstalling nvtabular-1.6.0+42.g9b186ee9:\n", - " Successfully uninstalled nvtabular-1.6.0+42.g9b186ee9\n", + " Found existing installation: nvtabular 1.8.0\n", + " Uninstalling nvtabular-1.8.0:\n", + " Successfully uninstalled nvtabular-1.8.0\n", "Successfully installed nvtabular-1.6.0+42.g9b186ee9\n" ] }, @@ -300,7 +510,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Already on 'main'\n" + "Previous HEAD position was feaf748 adding async tf strategy for gpu memory (#264)\n", + "Switched to branch 'main'\n" ] }, { @@ -315,14 +526,90 @@ "output_type": "stream", "text": [ "From https://github.com/NVIDIA-Merlin/systems\n", - " * branch main -> FETCH_HEAD\n" + " * branch main -> FETCH_HEAD\n", + " 20bb231..329cba4 main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Already up to date.\n", + "Updating 20bb231..329cba4\n", + "Fast-forward\n", + " .github/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", + " .github/ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", + " .github/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", + " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/task.md | 5 +-\n", + " .github/release-drafter.yml | 44 +-\n", + " .github/workflows/cpu-ci.yml | 112 ++--\n", + " .github/workflows/docs-preview-pr.yaml | 2 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 32 +-\n", + " .github/workflows/lint.yaml | 12 +-\n", + " .github/workflows/release-drafter.yml | 2 +-\n", + " .pre-commit-config.yaml | 71 +-\n", + " .prettierignore | 2 +\n", + " CLA.md | 9 +-\n", + " CONTRIBUTING.md | 2 +-\n", + " README.md | 2 +-\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " docs/README.md | 53 +-\n", + " ...ing-An-Implicit-Model-With-Merlin-Systems.ipynb | 5 +-\n", + " ...ving-An-XGboost-Model-With-Merlin-Systems.ipynb | 5 +-\n", + " ...erving-Ranking-Models-With-Merlin-Systems.ipynb | 5 +-\n", + " merlin/systems/dag/dictarray.py | 4 +-\n", + " merlin/systems/dag/op_runner.py | 1 -\n", + " merlin/systems/dag/ops/__init__.py | 11 +-\n", + " merlin/systems/dag/ops/faiss.py | 4 +-\n", + " merlin/systems/dag/ops/feast.py | 80 +--\n", + " merlin/systems/dag/ops/fil.py | 4 +-\n", + " merlin/systems/dag/ops/implicit.py | 72 +-\n", + " merlin/systems/dag/ops/operator.py | 189 +-----\n", + " merlin/systems/dag/ops/pytorch.py | 4 +-\n", + " merlin/systems/dag/ops/session_filter.py | 4 +-\n", + " merlin/systems/dag/ops/softmax_sampling.py | 17 +-\n", + " merlin/systems/dag/ops/unroll_features.py | 4 +-\n", + " merlin/systems/dag/ops/workflow.py | 4 +-\n", + " merlin/systems/dag/runtimes/triton/ops/implicit.py | 185 ++++++\n", + " merlin/systems/dag/runtimes/triton/ops/operator.py | 169 ++++-\n", + " merlin/systems/dag/runtimes/triton/ops/pytorch.py | 2 +-\n", + " .../systems/dag/runtimes/triton/ops/tensorflow.py | 12 +-\n", + " merlin/systems/dag/runtimes/triton/ops/workflow.py | 141 +++-\n", + " merlin/systems/dag/runtimes/triton/runtime.py | 14 +-\n", + " merlin/systems/triton/__init__.py | 33 +-\n", + " merlin/systems/triton/export.py | 724 +--------------------\n", + " merlin/systems/triton/models/executor_model.py | 34 +-\n", + " merlin/systems/triton/models/oprunner_model.py | 32 +-\n", + " merlin/systems/triton/models/pytorch_model.py | 127 ++--\n", + " merlin/systems/triton/models/workflow_model.py | 50 +-\n", + " merlin/systems/triton/utils.py | 35 +-\n", + " tests/conftest.py | 4 +-\n", + " ...erving_an_implicit_model_with_merlin_systems.py | 4 +-\n", + " ...serving_an_xgboost_model_with_merlin_systems.py | 4 +-\n", + " tests/unit/systems/dag/ops/test_ops.py | 20 +-\n", + " .../runtimes/local/ops/nvtabular/test_ensemble.py | 2 +-\n", + " .../triton/ops/fil/test_lightgbm_triton.py | 4 +-\n", + " .../runtimes/triton/ops/fil/test_sklearn_triton.py | 4 +-\n", + " .../runtimes/triton/ops/fil/test_xgboost_triton.py | 4 +-\n", + " .../dag/runtimes/triton/ops/torch/test_op.py | 4 +-\n", + " .../runtimes/triton/ops/workflow/test_ensemble.py | 67 +-\n", + " .../systems/dag/runtimes/triton/test_triton.py | 4 +-\n", + " tests/unit/systems/dag/test_dict_array.py | 4 +-\n", + " tests/unit/systems/dag/test_executors.py | 4 +-\n", + " tests/unit/systems/ops/faiss/test_executor.py | 4 +-\n", + " tests/unit/systems/ops/feast/test_op.py | 46 +-\n", + " tests/unit/systems/ops/fil/test_ensemble.py | 4 +-\n", + " tests/unit/systems/ops/implicit/test_executor.py | 4 +-\n", + " tests/unit/systems/ops/implicit/test_op.py | 11 +-\n", + " tests/unit/systems/ops/tf/test_ensemble.py | 4 +-\n", + " tests/unit/systems/utils/ops.py | 7 +-\n", + " tests/unit/test_export.py | 77 ---\n", + " tox.ini | 1 -\n", + " 70 files changed, 1072 insertions(+), 1580 deletions(-)\n", + " create mode 100644 .prettierignore\n", + " create mode 100644 merlin/systems/dag/runtimes/triton/ops/implicit.py\n", + " delete mode 100644 tests/unit/test_export.py\n", "Processing /systems\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", @@ -330,76 +617,77 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (0.9.0+54.g29c7587a)\n", - "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.28.1)\n", "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (1.6.0+42.g9b186ee9)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite-runtime==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.9.3)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite-runtime==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.22.4)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.56.4)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.5)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (22.0)\n", + "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.28.1)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (0.9.0+60.g2d60d237)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.9.3)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.22.4)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+61.g329cba4) (0.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.8)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (1.26.13)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2019.11.28)\n", "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.5)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.12.0)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.64.1)\n", "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.5.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (22.0)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.0.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.19.6)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (1.26.13)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2019.11.28)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.8)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", - "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+61.g329cba4) (0.0.2+41.gdbf8816)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.39.1)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.2.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (45.2.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.12.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.1.3)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.1.2)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.4)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.7.0)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.9.4)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.1)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.8.2)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.4.3)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.11.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.1)\n" + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.12.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.64.1)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (11.4.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.5)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.56.4)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.19.6)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.12.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.4)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.1)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.9.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.1.2)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.1.3)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.7.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.57.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.39.1)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.1.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.1)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.14.0)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.4)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.11.0)\n", "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.1)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.0.0)\n", "Building wheels for collected packages: merlin-systems\n", " Building wheel for merlin-systems (PEP 517): started\n", " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+61.g329cba4-py3-none-any.whl size=99480 sha256=c9ed3baf0f65ac381e50f14a63222abcbac99f78a39f4f04bd7e6828a7ed9c16\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-zfooq_xi/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+61.g329cba4-py3-none-any.whl size=99480 sha256=ddfc752fa7ed3e5062808e4652c1d9967ac2d68ec1847cb24cfbe573a88ed6a9\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-lnbqyxql/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", "Successfully built merlin-systems\n", "Installing collected packages: merlin-systems\n", " Attempting uninstall: merlin-systems\n", - " Found existing installation: merlin-systems 0.7.0+61.g329cba4\n", - " Uninstalling merlin-systems-0.7.0+61.g329cba4:\n", - " Successfully uninstalled merlin-systems-0.7.0+61.g329cba4\n", + " Found existing installation: merlin-systems 0.9.0\n", + " Uninstalling merlin-systems-0.9.0:\n", + " Successfully uninstalled merlin-systems-0.9.0\n", "Successfully installed merlin-systems-0.7.0+61.g329cba4\n" ] }, @@ -407,7 +695,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Already on 'main'\n" + "Previous HEAD position was fd5d3fc Use tf.function for list column operations (#89)\n", + "Switched to branch 'main'\n" ] }, { @@ -422,14 +711,43 @@ "output_type": "stream", "text": [ "From https://github.com/NVIDIA-Merlin/dataloader\n", - " * branch main -> FETCH_HEAD\n" + " * branch main -> FETCH_HEAD\n", + " 5b3fe46..ce2215d main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Already up to date.\n", + "Updating 5b3fe46..ce2215d\n", + "Fast-forward\n", + " .github/workflows/cpu-ci.yml | 81 -----\n", + " .github/workflows/cpu-packages.yml | 125 +++++++\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .pre-commit-config.yaml | 14 +-\n", + " ci/pr.gpu.Jenkinsfile | 44 +++\n", + " docs/README.md | 28 +-\n", + " examples/01a-Getting-started-Tensorflow.ipynb | 5 +-\n", + " examples/01b-Getting-started-Pytorch.ipynb | 9 +-\n", + " .../02-Multi-GPU-Tensorflow-with-Horovod.ipynb | 371 +++++++++++++++++++++\n", + " merlin/dataloader/jax.py | 3 +\n", + " merlin/dataloader/loader_base.py | 221 ++++--------\n", + " merlin/dataloader/ops/embeddings/embedding_op.py | 4 +-\n", + " .../ops/embeddings/torch_embedding_op.py | 4 +-\n", + " merlin/dataloader/tensorflow.py | 9 +-\n", + " merlin/dataloader/torch.py | 49 ++-\n", + " merlin/dataloader/utils/tf/tf_trainer.py | 2 +-\n", + " .../test_multi_GPU_with_horovod_and_tensorflow.py | 28 ++\n", + " tests/unit/dataloader/test_tf_dataloader.py | 20 +-\n", + " tests/unit/dataloader/test_tf_embeddings.py | 24 +-\n", + " tests/unit/dataloader/test_torch_dataloader.py | 38 +++\n", + " tests/unit/dataloader/test_torch_embeddings.py | 12 +-\n", + " tox.ini | 1 +\n", + " 22 files changed, 801 insertions(+), 298 deletions(-)\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 ci/pr.gpu.Jenkinsfile\n", + " create mode 100644 examples/02-Multi-GPU-Tensorflow-with-Horovod.ipynb\n", + " create mode 100644 tests/examples/test_multi_GPU_with_horovod_and_tensorflow.py\n", "Processing /dataloader\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", @@ -437,81 +755,90 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-core>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+41.gdbf8816) (0.9.0+54.g29c7587a)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.0.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.5)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.56.4)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.5.0)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.64.1)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.5)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (22.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.19.6)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.12.0)\n", - "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow>=5.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.22.4)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.2.0)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.7.0)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.9.4)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.4.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.12.0)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.1)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.26.13)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.4)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (8.1.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.1.2)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.0)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.2.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (5.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (0.39.1)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.8.2)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.57.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.4)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.1.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (2.1.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (3.11.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (1.14.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+41.gdbf8816) (4.0.0)\n" + "Requirement already satisfied: merlin-core>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+43.gce2215d) (0.9.0+60.g2d60d237)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.3.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (4.64.1)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.7.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (8.0.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (22.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (11.4.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (3.19.6)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.7.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.56.4)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.12.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.2.5)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.5.0)\n", + "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.22.4)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.7)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (8.1.3)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.4.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.2.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (3.1.2)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.7.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.0.4)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (5.9.4)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.2.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.1)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.12.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.26.13)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.0.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.3.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.39.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (5.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.57.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.2.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.14.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.0.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (3.11.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (4.1.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (4.0.0)\n", "Building wheels for collected packages: merlin-dataloader\n", " Building wheel for merlin-dataloader (PEP 517): started\n", " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+41.gdbf8816-py3-none-any.whl size=40852 sha256=60948b9af68c37dfacd1e48a9fdaaad2f9c78225e14116de0d4b643853d839bb\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-fwvmtvqd/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+43.gce2215d-py3-none-any.whl size=40867 sha256=1448516ec061e7ef5df449df29f4896705367b7602040fb55c679508f76d85a2\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ukzco8eb/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", "Successfully built merlin-dataloader\n", "Installing collected packages: merlin-dataloader\n", " Attempting uninstall: merlin-dataloader\n", - " Found existing installation: merlin-dataloader 0.0.2+41.gdbf8816\n", - " Uninstalling merlin-dataloader-0.0.2+41.gdbf8816:\n", - " Successfully uninstalled merlin-dataloader-0.0.2+41.gdbf8816\n", - "Successfully installed merlin-dataloader-0.0.2+41.gdbf8816\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.8/dist-packages (3.7.1)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (22.0)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (4.39.0)\n", - "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (9.4.0)\n", - "Requirement already satisfied: importlib-resources>=3.2.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from matplotlib) (5.10.2)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.4.4)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.0.7)\n", + " Found existing installation: merlin-dataloader 0.0.4\n", + " Uninstalling merlin-dataloader-0.0.4:\n", + " Successfully uninstalled merlin-dataloader-0.0.4\n", + "Successfully installed merlin-dataloader-0.0.2+43.gce2215d\n", + "Collecting matplotlib\n", + " Downloading matplotlib-3.7.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (9.2 MB)\n", + "Collecting pillow>=6.2.0\n", + " Downloading Pillow-9.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (2.8.2)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (3.0.9)\n", + "Collecting kiwisolver>=1.0.1\n", + " Downloading kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)\n", + "Requirement already satisfied: importlib-resources>=3.2.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from matplotlib) (5.10.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (22.0)\n", + "Collecting contourpy>=1.0.1\n", + " Downloading contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (300 kB)\n", + "Collecting cycler>=0.10\n", + " Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.22.4)\n", + "Collecting fonttools>=4.22.0\n", + " Downloading fonttools-4.39.0-py3-none-any.whl (1.0 MB)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (3.0.9)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.14.0)\n", "Requirement already satisfied: zipp>=3.1.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from importlib-resources>=3.2.0; python_version < \"3.10\"->matplotlib) (3.11.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.14.0)\n" + "Installing collected packages: pillow, kiwisolver, contourpy, cycler, fonttools, matplotlib\n", + "Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.0 kiwisolver-1.4.4 matplotlib-3.7.1 pillow-9.4.0\n" ] } ], @@ -528,26 +855,30 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "152aee86", + "execution_count": 2, + "id": "e9929dc8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.6.4)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", + "Collecting gdown\n", + " Downloading gdown-4.6.4-py3-none-any.whl (14 kB)\n", "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", - "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n" + "Collecting PySocks!=1.5.7,>=1.5.6; extra == \"socks\"\n", + " Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", + "Installing collected packages: gdown, PySocks\n", + "Successfully installed PySocks-1.7.1 gdown-4.6.4\n" ] }, { @@ -556,26 +887,41 @@ "text": [ "Downloading...\n", "From: https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", - "To: /workspace/rees46_ecom_dataset_small_for_ci.zip\n", - "100%|██████████| 43.4M/43.4M [00:08<00:00, 5.42MB/s]\n" + "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", + "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.14MB/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease\n", - "Get:2 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", - "Hit:3 http://archive.ubuntu.com/ubuntu focal InRelease\n", - "Get:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", - "Get:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", - "Fetched 336 kB in 3s (129 kB/s)\n", + "Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", + "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [920 kB]\n", + "Get:3 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", + "Get:5 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1017 kB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", + "Get:9 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", + "Get:10 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", + "Get:11 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2544 kB]\n", + "Get:12 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:13 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [1998 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3019 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2134 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1312 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", + "Fetched 26.5 MB in 10s (2574 kB/s)\n", "Reading package lists...\n", "Reading package lists...\n", "Building dependency tree...\n", "Reading state information...\n", "unzip is already the newest version (6.0-25ubuntu1.1).\n", - "0 upgraded, 0 newly installed, 0 to remove and 83 not upgraded.\n", + "0 upgraded, 0 newly installed, 0 to remove and 88 not upgraded.\n", "Archive: rees46_ecom_dataset_small_for_ci.zip\n", " creating: ecom_dataset/0001/\n", " inflating: ecom_dataset/0001/valid.parquet \n", @@ -613,10 +959,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-03-08 00:23:08.749959: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" + "2023-03-15 06:40:18.761460: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, { @@ -630,21 +974,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-03-08 00:23:11.232785: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-08 00:23:11.233226: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-08 00:23:11.233386: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-15 06:40:21.081059: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-15 06:40:21.081515: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-15 06:40:21.081687: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", - "2023-03-08 00:23:11.674938: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-03-15 06:40:21.521454: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-03-08 00:23:11.675977: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-08 00:23:11.676191: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-08 00:23:11.676346: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-08 00:23:12.417852: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-08 00:23:12.418073: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-08 00:23:12.418234: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-08 00:23:12.418351: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-03-08 00:23:12.418418: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + "2023-03-15 06:40:21.522384: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-15 06:40:21.522595: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-15 06:40:21.522750: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-15 06:40:22.246877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-15 06:40:22.247075: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-15 06:40:22.247224: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-15 06:40:22.247337: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-15 06:40:22.247404: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" ] } ], @@ -663,33 +1009,18 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "81e7f635", - "metadata": {}, - "outputs": [], - "source": [ - "# this is only temporary, we can align the functionality with the CI script later on\n", - "\n", - "DATA_FOLDER = os.environ.get(\n", - " \"DATA_FOLDER\", \n", - " 'ecom_dataset/0002'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, + "execution_count": 33, "id": "11647dd3", "metadata": {}, "outputs": [], "source": [ - "train = Dataset(os.path.join(DATA_FOLDER, \"train.parquet\"))\n", - "valid = Dataset(os.path.join(DATA_FOLDER, \"valid.parquet\"))" + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/test.parquet\")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "4ab4e0fb", "metadata": {}, "outputs": [], @@ -699,232 +1030,119 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "792daa9d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nametagsdtypeis_listis_raggedproperties.value_count.minproperties.value_count.max
0sess_pid_seq()DType(name='int32', element_type=<ElementType....TrueTrue0None
\n", - "
" - ], - "text/plain": [ - "[{'name': 'sess_pid_seq', 'tags': set(), 'properties': {'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int32', element_type=, element_size=32, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train.schema.select_by_name('sess_pid_seq')" - ] - }, - { - "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "id": "8d9903e6", "metadata": {}, "outputs": [], "source": [ - "# a couple of hyperparams I took from the CI script in T4Rec\n", + "# a couple of starter hyperparams\n", "\n", "d_model = 192\n", "n_layer = 3\n", "n_head = 16\n", "batch_size = 128\n", - "learning_rate = 0.0006667377132554976" + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 1\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "id": "a6ade14a", "metadata": {}, "outputs": [], "source": [ - "mlp_block = mm.MLPBlock(\n", - " [128,d_model],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )" + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + " \n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", + " input_block = mm.InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name('sess_pid_seq')\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", + " )\n", + " return model_transformer, xlnet_block" ] }, { "cell_type": "code", - "execution_count": 8, - "id": "7f15a0a0", + "execution_count": 6, + "id": "523fe2ac", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n", + "2023-03-15 06:40:28.698077: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" ] - } - ], - "source": [ - "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", - "\n", - "schema = TensorflowMetadata.from_proto_text_file(\n", - " './',\n", - " file_name='rees46_schema_modified.pbtxt'\n", - ").to_merlin_schema()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "74ccc9a9", - "metadata": {}, - "outputs": [], - "source": [ - "train.schema = schema" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "b2aa0beb", - "metadata": {}, - "outputs": [ + }, { "name": "stdout", "output_type": "stream", "text": [ - "{'sess_pid_seq': (, ), 'sess_ccid_seq': , 'sess_csid_seq': , 'sess_bid_seq': , 'sess_price_log_norm_seq': , 'sess_relative_price_to_avg_category_seq': , 'sess_prod_recency_days_log_norm_seq': , 'sess_et_hour_sin_seq': , 'sess_et_hour_cos_seq': , 'sess_et_dayofweek_sin_seq': , 'sess_et_dayofweek_cos_seq': , 'sess_etime_seq': }\n" + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" ] - } - ], - "source": [ - "from merlin.loader.tensorflow import Loader\n", - "\n", - "data = train\n", - "dataloader = Loader(data, batch_size=5)\n", - "batch = next(dataloader)\n", - "print(batch[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e64a9c0d", - "metadata": {}, - "outputs": [], - "source": [ - "# import nvtabular as nvt\n", - "\n", - "# ops = ['sess_pid_seq'] >> nvt.ops.Categorify()\n", - "\n", - "# wf = nvt.Workflow(ops)\n", - "# train = wf.fit_transform(train)\n", - "# valid = wf.transform(valid)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "292ef9ba", - "metadata": {}, - "outputs": [ + }, { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", " warnings.warn(\n" ] }, @@ -932,247 +1150,756 @@ "name": "stdout", "output_type": "stream", "text": [ - "(128, None, 192)\n" + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 106s 145ms/step - loss: 7.4476 - recall_at_20: 0.1321 - mrr_at_20: 0.0713 - ndcg_at_20: 0.0847 - map_at_20: 0.0713 - precision_at_20: 0.0066 - regularization_loss: 0.0000e+00 - loss_batch: 7.4395\n", + "84/84 [==============================] - 8s 43ms/step - loss: 8.5501 - recall_at_20: 0.2267 - mrr_at_20: 0.0746 - ndcg_at_20: 0.1080 - map_at_20: 0.0746 - precision_at_20: 0.0113 - regularization_loss: 0.0000e+00 - loss_batch: 8.5600\n" ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.550110816955566,\n", + " 'recall_at_20': 0.2287944257259369,\n", + " 'mrr_at_20': 0.07337629050016403,\n", + " 'ndcg_at_20': 0.10753783583641052,\n", + " 'map_at_20': 0.07337629050016403,\n", + " 'precision_at_20': 0.011439722031354904,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 8.98563003540039}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "batch = mm.sample_batch(train, batch_size=batch_size, include_targets=False, to_ragged=True)\n", - "print(input_block(batch).shape)" + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "cd25c97a", + "metadata": {}, + "outputs": [], + "source": [ + "from nvtabular.inference.triton import export_tensorflow_ensemble\n", + "from nvtabular import Workflow\n", + "from nvtabular.ops import Categorify" ] }, { "cell_type": "code", "execution_count": 35, - "id": "34c739b3", + "id": "18476ff8", "metadata": {}, "outputs": [], "source": [ - "train.schema = train.schema.select_by_name('sess_pid_seq')" + "ops = ['sess_pid_seq'] >> Categorify()" ] }, { "cell_type": "code", "execution_count": 36, - "id": "5a4c7ca3", + "id": "019b49e7", "metadata": {}, "outputs": [], "source": [ - "input_block = mm.InputBlockV2(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " embeddings=mm.Embeddings(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " sequence_combiner=None,\n", - " dim=d_model\n", - " )\n", - ")" + "wf = Workflow(ops)" ] }, { "cell_type": "code", "execution_count": 37, - "id": "14c35b2a", + "id": "4d519e09", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sess_pid_seq
0[2350, 27483, 2350, 221, 223, 450]
1[26562, 3233, 20844, 20946]
2[20611, 9566, 3411, 6358, 8434, 1282, 1218]
3[749, 476]
4[53988, 54681, 20488, 26337, 42209, 56005, 263...
......
86548[6547, 5690]
86549[20613, 30652, 20613]
86550[6, 9, 6]
86551[2584, 6531, 16567, 5737, 6531, 19856, 2584, 1...
86552[4793, 10632]
\n", + "

86553 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " sess_pid_seq\n", + "0 [2350, 27483, 2350, 221, 223, 450]\n", + "1 [26562, 3233, 20844, 20946]\n", + "2 [20611, 9566, 3411, 6358, 8434, 1282, 1218]\n", + "3 [749, 476]\n", + "4 [53988, 54681, 20488, 26337, 42209, 56005, 263...\n", + "... ...\n", + "86548 [6547, 5690]\n", + "86549 [20613, 30652, 20613]\n", + "86550 [6, 9, 6]\n", + "86551 [2584, 6531, 16567, 5737, 6531, 19856, 2584, 1...\n", + "86552 [4793, 10632]\n", + "\n", + "[86553 rows x 1 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)" + "wf.fit_transform(train).compute()" ] }, { "cell_type": "code", - "execution_count": 38, - "id": "866f3249", + "execution_count": 48, + "id": "34f29750", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 110). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: t4rec_model/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: t4rec_model/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:83: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return generic_utils.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + } + ], "source": [ - "dense_block = mm.SequentialBlock(\n", - " input_block,\n", - " mlp_block,\n", - " xlnet_block\n", - ")" + "model_transformer.save('t4rec_model')" ] }, { "cell_type": "code", - "execution_count": 39, - "id": "288d08df", + "execution_count": 49, + "id": "e8cba91e", "metadata": {}, "outputs": [], "source": [ - "mlp_block2 = mm.MLPBlock(\n", - " [128,d_model],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )" + "rm -rf " ] }, { "cell_type": "code", - "execution_count": 40, - "id": "064ea5ec", + "execution_count": 52, + "id": "b45e6f51", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + } + ], "source": [ - "prediction_task = mm.CategoricalOutput(\n", - " to_call=input_block[\"categorical\"][target],\n", - ")" + "import merlin.models.tf as mm\n", + "import tensorflow as tf\n", + "tf_model_path = os.path.join('t4rec_model')\n", + "\n", + "model = tf.keras.models.load_model(tf_model_path)" ] }, { "cell_type": "code", - "execution_count": 41, - "id": "6c008e16", + "execution_count": 60, + "id": "ff70e763", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_raggedproperties.value_count.minproperties.value_count.max
0sess_pid_seq()DType(name='int32', element_type=<ElementType....TrueTrue0None
\n", + "
" + ], + "text/plain": [ + "[{'name': 'sess_pid_seq', 'tags': set(), 'properties': {'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int32', element_type=, element_size=32, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}]" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)" + "wf.input_schema" ] }, { "cell_type": "code", - "execution_count": 42, - "id": "49b12d31", + "execution_count": 61, + "id": "fc365607", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_raggedproperties.num_bucketsproperties.freq_thresholdproperties.max_sizeproperties.start_indexproperties.cat_pathproperties.domain.minproperties.domain.maxproperties.domain.nameproperties.embedding_sizes.cardinalityproperties.embedding_sizes.dimensionproperties.value_count.minproperties.value_count.max
0sess_pid_seq(Tags.CATEGORICAL)DType(name='int64', element_type=<ElementType....TrueTrueNone000.//categories/unique.sess_pid_seq.parquet056582sess_pid_seq565835120None
\n", + "
" + ], + "text/plain": [ + "[{'name': 'sess_pid_seq', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.sess_pid_seq.parquet', 'domain': {'min': 0, 'max': 56582, 'name': 'sess_pid_seq'}, 'embedding_sizes': {'cardinality': 56583, 'dimension': 512}, 'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}]" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "optimizer = tf.keras.optimizers.Adam(\n", - " learning_rate=learning_rate,\n", - ")" + "wf.output_schema" ] }, { "cell_type": "code", - "execution_count": 43, - "id": "502ef8a3", + "execution_count": 62, + "id": "f7a09453", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_raggedproperties.domain.minproperties.domain.maxproperties.domain.nameproperties.value_count.minproperties.value_count.max
0sess_pid_seq(Tags.CATEGORICAL, Tags.LIST, Tags.ITEM_ID, Ta...DType(name='int64', element_type=<ElementType....TrueTrue1390000sess_pid_seq2.0None
\n", + "
" + ], + "text/plain": [ + "[{'name': 'sess_pid_seq', 'tags': {, , , , }, 'properties': {'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2.0, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0.0, max=None), Dimension(min=2.0, max=None)))), 'is_list': True, 'is_ragged': True}]" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "n_epoch = 1" + "model.input_schema" ] }, { "cell_type": "code", - "execution_count": 44, - "id": "d84a30d3", + "execution_count": 80, + "id": "88942e7d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "List(\n", + " (0): 'sess_pid_seq/categorical_output'\n", + ")" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", - " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[4])\n", - " )" + "model.output_names" ] }, { "cell_type": "code", - "execution_count": null, - "id": "9a9611ab", + "execution_count": 68, + "id": "5bd66ba8", "metadata": {}, "outputs": [], "source": [ - "# model_transformer.fit(\n", - "# train,\n", - "# batch_size=batch_size,\n", - "# epochs=n_epoch,\n", - "# pre=mm.SequencePredictRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", - "# )" + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n", + "\n", + "serving_operators = wf.input_schema.column_names >> TransformWorkflow(wf) >> PredictTensorflow(model)\n", + "\n", + "# serving_operators = model.input_schema.column_names >> PredictTensorflow(model)" ] }, { "cell_type": "code", - "execution_count": 46, - "id": "e7474131", + "execution_count": 82, + "id": "3cc747e5", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_12/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" + "ename": "ImportError", + "evalue": "cannot import name 'PredictTensorflowTriton' from 'merlin.systems.dag.ops.tensorflow' (/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/ops/tensorflow.py)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[82], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msystems\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdag\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mops\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtensorflow\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PredictTensorflowTriton\n", + "\u001b[0;31mImportError\u001b[0m: cannot import name 'PredictTensorflowTriton' from 'merlin.systems.dag.ops.tensorflow' (/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/ops/tensorflow.py)" ] - }, + } + ], + "source": [ + "from merlin.systems.dag.ops.tensorflow import PredictTensorflowTriton" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "b66ab2b8", + "metadata": {}, + "outputs": [], + "source": [ + "predict_op = PredictTensorflow(model)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "a26dca88", + "metadata": {}, + "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "665/665 [==============================] - 74s 107ms/step - loss: 8.9015 - recall_at_4: 0.0224 - mrr_at_4: 0.0129 - ndcg_at_4: 0.0153 - map_at_4: 0.0129 - precision_at_4: 0.0056 - regularization_loss: 0.0000e+00 - loss_batch: 8.8957\n" + "ename": "NotImplementedError", + "evalue": "Exporting an operator to run in a particular context (i.e. Triton) only makres sense when a runtime is specified. To select an operator for the appropriate runtime, replace PredictTensorflowwith a runtime-specific operator class, possibly PredictTensorflowTriton", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[81], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpredict_op\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexport\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m/workspace/models_for_benchmarking\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_schema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_schema\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/ops/operator.py:107\u001b[0m, in \u001b[0;36mInferenceOperator.export\u001b[0;34m(self, path, input_schema, output_schema, params, node_id, version, backend)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;129m@abstractmethod\u001b[39m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mexport\u001b[39m(\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 80\u001b[0m backend: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mensemble\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 81\u001b[0m ):\n\u001b[1;32m 82\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;124;03m Export the class object as a config and all related files to the user defined path.\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;124;03m A list of individual configs for each step (operator) in graph.\u001b[39;00m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 108\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExporting an operator to run in a particular context (i.e. Triton)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m only makres sense when a runtime is specified. To select an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moperator for the appropriate runtime, replace \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwith a runtime-specific operator class, possibly \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124mTriton\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m )\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Exporting an operator to run in a particular context (i.e. Triton) only makres sense when a runtime is specified. To select an operator for the appropriate runtime, replace PredictTensorflowwith a runtime-specific operator class, possibly PredictTensorflowTriton" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")" + "predict_op.export('/workspace/models_for_benchmarking', model.input_schema, model.input_schema)" ] }, { "cell_type": "code", - "execution_count": 47, - "id": "7bf839e3", + "execution_count": 69, + "id": "cd23a561", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "Output column 'sess_pid_seq' not detected in any child inputs for 'TransformWorkflow'.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[69], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msystems\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdag\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mensemble\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Ensemble\n\u001b[0;32m----> 3\u001b[0m ensemble \u001b[38;5;241m=\u001b[39m \u001b[43mEnsemble\u001b[49m\u001b[43m(\u001b[49m\u001b[43mserving_operators\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_schema\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m ens_conf, node_confs \u001b[38;5;241m=\u001b[39m ensemble\u001b[38;5;241m.\u001b[39mexport(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/workspace/models_for_benchmarking/ensemble\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/ensemble.py:48\u001b[0m, in \u001b[0;36mEnsemble.__init__\u001b[0;34m(self, ops, schema, label_columns)\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124;03m\"\"\"Construct a systems ensemble.\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;124;03m List of strings representing label columns, by default None\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph \u001b[38;5;241m=\u001b[39m Graph(ops)\n\u001b[0;32m---> 48\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconstruct_schema\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlabel_columns \u001b[38;5;241m=\u001b[39m label_columns \u001b[38;5;129;01mor\u001b[39;00m []\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dag/graph.py:110\u001b[0m, in \u001b[0;36mGraph.construct_schema\u001b[0;34m(self, root_schema, preserve_dtypes)\u001b[0m\n\u001b[1;32m 107\u001b[0m nodes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(postorder_iter_nodes(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_node))\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compute_node_schemas(root_schema, nodes, preserve_dtypes)\n\u001b[0;32m--> 110\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_node_schemas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot_schema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnodes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpreserve_dtypes\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dag/graph.py:120\u001b[0m, in \u001b[0;36mGraph._validate_node_schemas\u001b[0;34m(self, root_schema, nodes, strict_dtypes)\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_validate_node_schemas\u001b[39m(\u001b[38;5;28mself\u001b[39m, root_schema, nodes, strict_dtypes\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m node \u001b[38;5;129;01min\u001b[39;00m nodes:\n\u001b[0;32m--> 120\u001b[0m \u001b[43mnode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_schemas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot_schema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstrict_dtypes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstrict_dtypes\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/node.py:120\u001b[0m, in \u001b[0;36mInferenceNode.validate_schemas\u001b[0;34m(self, root_schema, strict_dtypes)\u001b[0m\n\u001b[1;32m 117\u001b[0m sink_col_schema \u001b[38;5;241m=\u001b[39m childrens_schema\u001b[38;5;241m.\u001b[39mget(col_name)\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m sink_col_schema:\n\u001b[0;32m--> 120\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 121\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOutput column \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcol_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m not detected in any \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mchild inputs for \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mop\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 123\u001b[0m )\n", + "\u001b[0;31mValueError\u001b[0m: Output column 'sess_pid_seq' not detected in any child inputs for 'TransformWorkflow'." + ] + } + ], "source": [ - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)" + "from merlin.systems.dag.ensemble import Ensemble\n", + "\n", + "ensemble = Ensemble(serving_operators, wf.input_schema)\n", + "\n", + "ens_conf, node_confs = ensemble.export(\"/workspace/models_for_benchmarking/ensemble\")" ] }, { "cell_type": "code", - "execution_count": 49, - "id": "15ccc448", + "execution_count": 73, + "id": "90637fc4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "84/84 [==============================] - 8s 40ms/step - loss: 8.8326 - recall_at_4: 0.0502 - mrr_at_4: 0.0319 - ndcg_at_4: 0.0365 - map_at_4: 0.0319 - precision_at_4: 0.0126 - regularization_loss: 0.0000e+00 - loss_batch: 8.8396\n" + "\u001b[0m\u001b[01;34massets\u001b[0m/ keras_metadata.pb saved_model.pb \u001b[01;34mvariables\u001b[0m/\r\n" ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.832579612731934,\n", - " 'recall_at_4': 0.05087455362081528,\n", - " 'mrr_at_4': 0.030891483649611473,\n", - " 'ndcg_at_4': 0.0359138660132885,\n", - " 'map_at_4': 0.030891483649611473,\n", - " 'precision_at_4': 0.01271863840520382,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.142295837402344}" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" + "ls t4rec_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7998b835", + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile /workspace/models_for_benchmarking/t4r_pytorch_pt/config.pbtxt\n", + "\n", + "name: \"t4r_pytorch_pt\"\n", + "input {\n", + " name: \"sess_pid_seq__values\"\n", + " data_type: TYPE_INT64\n", + " dims: -1\n", + " dims: 1\n", + "}\n", + "input {\n", + " name: \"sess_pid_seq__nnzs\"\n", + " data_type: TYPE_INT64\n", + " dims: -1\n", + " dims: 1\n", + "}\n", + "output {\n", + " name: \"output\"\n", + " data_type: TYPE_FP32\n", + " dims: -1\n", + " dims: 20\n", + "}\n", + "backend: \"python\"" ] } ], From 60e11afbc582760cfbcab8df8f07f707f8e7b5da Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Thu, 13 Apr 2023 17:55:27 +1000 Subject: [PATCH 09/15] update --- ...rain_and_save_model_for_benchmarking.ipynb | 1385 ++++++++--------- 1 file changed, 665 insertions(+), 720 deletions(-) diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking.ipynb index f6f757b496..9babe53629 100644 --- a/T4Rec_repro/train_and_save_model_for_benchmarking.ipynb +++ b/T4Rec_repro/train_and_save_model_for_benchmarking.ipynb @@ -11,27 +11,36 @@ "output_type": "stream", "text": [ "From https://github.com/NVIDIA-Merlin/Models\n", + " * [new branch] asvdb_metric_tracking -> origin/asvdb_metric_tracking\n", " * [new branch] ci/horovod -> origin/ci/horovod\n", " * [new branch] codespell_fix -> origin/codespell_fix\n", - " 16fb4149..c9d3baf4 fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " 16fb4149..fcaefc3e fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", - " 95462360..28fb60ad gh-pages -> origin/gh-pages\n", + " * [new branch] fix_datetime_issue_add_inference_on_TIS -> origin/fix_datetime_issue_add_inference_on_TIS\n", + " 95462360..d8d85835 gh-pages -> origin/gh-pages\n", " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", - " 835ad186..a5ac5668 main -> origin/main\n", + " 835ad186..572a7b4d main -> origin/main\n", " * [new branch] mtl_example -> origin/mtl_example\n", " cb431a8a..b90e9a1b release-22.12 -> origin/release-22.12\n", " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] tf/batch_predict_fix -> origin/tf/batch_predict_fix\n", " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", " * [new branch] tf/dlrm_dropout_fix -> origin/tf/dlrm_dropout_fix\n", " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", + " * [new branch] tf/fix_logq_correction -> origin/tf/fix_logq_correction\n", " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", " + 169f3df5...06eecddd tf/output-block -> origin/tf/output-block (forced update)\n", " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", " * [new branch] tf/transformer-api -> origin/tf/transformer-api\n", + " * [new branch] torch/dev -> origin/torch/dev\n", + " * [new branch] torch/masking -> origin/torch/masking\n", + " * [new branch] torch/remove-t4r-code -> origin/torch/remove-t4r-code\n", + " * [new branch] tox_github_actions_fix -> origin/tox_github_actions_fix\n", + " * [new branch] transformer-api -> origin/transformer-api\n", " + 0a65d603...9f53e8ff update_07 -> origin/update_07 (forced update)\n", " * [new tag] v23.02.00 -> v23.02.00\n", "Previous HEAD position was cb431a8a Fix the serialization of `SequenceSummary` block (#927)\n", @@ -51,49 +60,49 @@ " Preparing wheel metadata: finished with status 'done'\n", "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.10.0)\n", "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.0.4)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.5)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.5.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.5)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.12.0)\n", "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.0.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.19.6)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.5.0)\n", "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.64.1)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.5)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.56.4)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.19.6)\n", "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (22.0)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7)\n", - "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.22.4)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.8.2)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.57.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.1)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.56.4)\n", "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.4)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.0)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.4.0)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.26.13)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.9.4)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.7.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.1)\n", "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.1.3)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.7.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.4.0)\n", "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.12.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.1.2)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.9.4)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.26.13)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.0)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.4)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.8.2)\n", + "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.22.4)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.4.3)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.57.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.2.0)\n", "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.39.1)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.2.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.1.1)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.14.0)\n", "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.1.0)\n", "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.4)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.1.1)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.11.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.0.0)\n" + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.1)\n" ] }, { @@ -103,8 +112,8 @@ "Building wheels for collected packages: merlin-models\n", " Building wheel for merlin-models (PEP 517): started\n", " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-models: filename=merlin_models-23.2.0+7.ga86201ee-py3-none-any.whl size=374647 sha256=7566d7a4a90814a6adae96ac4566fa227e750c3301334ed8ae3c852608af406f\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-naqyczcx/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + " Created wheel for merlin-models: filename=merlin_models-23.2.0+7.ga86201ee-py3-none-any.whl size=374647 sha256=2159f40a60054f9efe2c6fd8630406a3e376fe5efb1ade89e5a7f7a59c35b281\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-4g3nhusu/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", "Successfully built merlin-models\n", "Installing collected packages: merlin-models\n", " Attempting uninstall: merlin-models\n", @@ -135,14 +144,14 @@ "text": [ "From https://github.com/NVIDIA-Merlin/core\n", " * branch main -> FETCH_HEAD\n", - " cd96ca5f..2d60d237 main -> origin/main\n" + " cd96ca5f..dd98a436 main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Updating cd96ca5f..2d60d237\n", + "Updating cd96ca5f..dd98a436\n", "Fast-forward\n", " .github/release-drafter.yml | 44 +--\n", " .github/workflows/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", @@ -150,37 +159,44 @@ " .../workflows/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", " .../workflows/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", " .github/workflows/ISSUE_TEMPLATE/task.md | 5 +-\n", - " .github/workflows/cpu-ci.yml | 145 +++-------\n", - " .github/workflows/cpu-models.yml | 52 ++--\n", - " .github/workflows/cpu-nvtabular.yml | 52 ++--\n", - " .github/workflows/cpu-packages.yml | 126 +++++++++\n", - " .github/workflows/cpu-systems.yml | 52 ++--\n", + " .github/workflows/cpu-ci.yml | 136 +--------\n", + " .github/workflows/cpu-models.yml | 44 ---\n", + " .github/workflows/cpu-nvtabular.yml | 44 ---\n", + " .github/workflows/cpu-packages.yml | 126 ++++++++\n", + " .github/workflows/cpu-systems.yml | 44 ---\n", " .github/workflows/docs-preview-pr.yaml | 2 +-\n", " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", - " .github/workflows/gpu-ci.yml | 30 +-\n", + " .github/workflows/gpu-ci.yml | 48 ++-\n", + " .github/workflows/merlin.yml | 35 +++\n", " .github/workflows/release-drafter.yaml | 2 +-\n", + " .github/workflows/tox.yml | 38 +++\n", " .pre-commit-config.yaml | 55 ++--\n", " .prettierignore | 2 +\n", " CLA.md | 9 +-\n", " CONTRIBUTING.md | 28 +-\n", " README.md | 68 ++---\n", " ci/pr.gpu.Jenkinsfile | 2 +-\n", - " docs/README.md | 49 ++--\n", - " merlin/core/compat.py | 59 +++-\n", - " merlin/core/dispatch.py | 51 +++-\n", + " docs/README.md | 49 +--\n", + " merlin/core/compat/__init__.py | 143 +++++++++\n", + " merlin/core/compat/tensorflow.py | 92 ++++++\n", + " merlin/core/compat/torch.py | 22 ++\n", + " merlin/core/dispatch.py | 243 ++++++++++-----\n", + " merlin/core/has_gpu.py | 46 +++\n", + " merlin/core/utils.py | 88 +-----\n", " merlin/dag/__init__.py | 1 +\n", " merlin/dag/base_operator.py | 30 +-\n", " merlin/dag/dictarray.py | 3 +-\n", - " merlin/dag/executors.py | 107 ++++---\n", + " merlin/dag/executors.py | 119 +++++---\n", " merlin/dag/graph.py | 20 ++\n", " merlin/dag/node.py | 2 +-\n", + " merlin/dag/selector.py | 3 +\n", " merlin/dag/utils.py | 69 +++++\n", - " merlin/dispatch/lazy.py | 152 ++++++++++\n", - " merlin/dtypes/__init__.py | 60 ++++\n", + " merlin/dispatch/lazy.py | 156 ++++++++++\n", + " merlin/dtypes/__init__.py | 61 ++++\n", " merlin/dtypes/aliases.py | 52 ++++\n", - " merlin/dtypes/base.py | 178 ++++++++++++\n", - " merlin/dtypes/mapping.py | 173 ++++++++++++\n", - " merlin/dtypes/mappings/__init__.py | 18 ++\n", + " merlin/dtypes/base.py | 178 +++++++++++\n", + " merlin/dtypes/mapping.py | 173 +++++++++++\n", + " .../compat.py => dtypes/mappings/__init__.py} | 17 +-\n", " merlin/dtypes/mappings/cudf.py | 57 ++++\n", " merlin/dtypes/mappings/numpy.py | 52 ++++\n", " merlin/dtypes/mappings/pandas.py | 38 +++\n", @@ -188,53 +204,71 @@ " merlin/dtypes/mappings/tf.py | 52 ++++\n", " merlin/dtypes/mappings/torch.py | 43 +++\n", " merlin/dtypes/mappings/triton.py | 53 ++++\n", - " merlin/dtypes/registry.py | 142 ++++++++++\n", - " merlin/dtypes/shape.py | 183 ++++++++++++\n", - " merlin/io/avro.py | 4 -\n", - " merlin/io/csv.py | 1 -\n", - " merlin/io/dask.py | 6 +-\n", - " merlin/io/dataset.py | 19 +-\n", - " merlin/io/fsspec_utils.py | 8 +-\n", - " merlin/io/parquet.py | 8 -\n", - " merlin/io/writer.py | 1 -\n", - " merlin/schema/io/tensorflow_metadata.py | 86 +++---\n", - " merlin/schema/schema.py | 312 ++++++++++++---------\n", + " merlin/dtypes/registry.py | 142 +++++++++\n", + " merlin/dtypes/shape.py | 189 ++++++++++++\n", + " merlin/io/__init__.py | 2 +-\n", + " merlin/io/avro.py | 6 +-\n", + " merlin/io/csv.py | 9 +-\n", + " merlin/io/dask.py | 74 ++++-\n", + " merlin/io/dataframe_engine.py | 6 +-\n", + " merlin/io/dataset.py | 111 +++++--\n", + " merlin/io/fsspec_utils.py | 16 +-\n", + " merlin/io/parquet.py | 25 +-\n", + " merlin/io/shuffle.py | 13 +-\n", + " merlin/io/worker.py | 7 +-\n", + " merlin/io/writer.py | 7 +-\n", + " merlin/io/writer_factory.py | 10 +-\n", + " merlin/schema/io/tensorflow_metadata.py | 115 +++++---\n", + " merlin/schema/schema.py | 327 +++++++++++++--------\n", " merlin/schema/tags.py | 1 +\n", " merlin/table/__init__.py | 24 ++\n", - " merlin/table/conversions.py | 135 +++++++++\n", - " merlin/table/cupy_column.py | 92 ++++++\n", - " merlin/table/numpy_column.py | 100 +++++++\n", - " merlin/table/tensor_column.py | 217 ++++++++++++++\n", - " merlin/table/tensor_table.py | 222 +++++++++++++++\n", - " merlin/table/tensorflow_column.py | 159 +++++++++++\n", - " merlin/table/torch_column.py | 124 ++++++++\n", - " requirements.txt | 5 +-\n", - " tests/conftest.py | 16 +-\n", - " tests/unit/core/test_dispatch.py | 19 ++\n", - " tests/unit/core/test_version.py | 4 +\n", + " merlin/table/conversions.py | 208 +++++++++++++\n", + " merlin/table/cupy_column.py | 108 +++++++\n", + " merlin/table/numpy_column.py | 116 ++++++++\n", + " merlin/table/tensor_column.py | 261 ++++++++++++++++\n", + " merlin/table/tensor_table.py | 226 ++++++++++++++\n", + " merlin/table/tensorflow_column.py | 173 +++++++++++\n", + " merlin/table/torch_column.py | 133 +++++++++\n", + " requirements-gpu.txt | 2 +-\n", + " requirements.txt | 13 +-\n", + " tests/conftest.py | 35 ++-\n", + " tests/unit/core/test_dispatch.py | 43 ++-\n", + " tests/unit/core/test_protocols.py | 10 +-\n", + " tests/unit/core/test_version.py | 2 +\n", " tests/unit/dag/test_dag_utils.py | 31 ++\n", " tests/unit/dispatch/test_lazy_dispatch.py | 61 ++++\n", - " tests/unit/dtypes/test_module.py | 48 ++++\n", - " tests/unit/dtypes/test_shape.py | 222 +++++++++++++++\n", - " tests/unit/io/test_io.py | 27 +-\n", - " tests/unit/schema/test_column_schemas.py | 142 ++++++----\n", - " tests/unit/schema/test_schema.py | 22 +-\n", + " tests/unit/dtypes/test_module.py | 48 +++\n", + " tests/unit/dtypes/test_shape.py | 222 ++++++++++++++\n", + " tests/unit/io/test_avro.py | 8 +-\n", + " tests/unit/io/test_dataset.py | 51 ++++\n", + " tests/unit/io/test_io.py | 98 ++++--\n", + " tests/unit/schema/test_column_schemas.py | 142 ++++++---\n", + " tests/unit/schema/test_schema.py | 60 +++-\n", " tests/unit/schema/test_schema_io.py | 27 +-\n", - " tests/unit/table/test_convert_column.py | 75 +++++\n", - " tests/unit/table/test_tensor_column.py | 186 ++++++++++++\n", - " tests/unit/table/test_tensor_table.py | 311 ++++++++++++++++++++\n", - " tests/unit/utils/test_utils.py | 3 -\n", - " tox.ini | 4 +\n", - " 81 files changed, 4441 insertions(+), 674 deletions(-)\n", + " tests/unit/table/test_convert_column.py | 164 +++++++++++\n", + " tests/unit/table/test_tensor_column.py | 262 +++++++++++++++++\n", + " tests/unit/table/test_tensor_table.py | 313 ++++++++++++++++++++\n", + " tests/unit/utils/test_utils.py | 16 +-\n", + " tox.ini | 46 ++-\n", + " 97 files changed, 5624 insertions(+), 1008 deletions(-)\n", + " delete mode 100644 .github/workflows/cpu-models.yml\n", + " delete mode 100644 .github/workflows/cpu-nvtabular.yml\n", " create mode 100644 .github/workflows/cpu-packages.yml\n", + " delete mode 100644 .github/workflows/cpu-systems.yml\n", + " create mode 100644 .github/workflows/merlin.yml\n", + " create mode 100644 .github/workflows/tox.yml\n", " create mode 100644 .prettierignore\n", + " create mode 100644 merlin/core/compat/__init__.py\n", + " create mode 100644 merlin/core/compat/tensorflow.py\n", + " create mode 100644 merlin/core/compat/torch.py\n", + " create mode 100644 merlin/core/has_gpu.py\n", " create mode 100644 merlin/dag/utils.py\n", " create mode 100644 merlin/dispatch/lazy.py\n", " create mode 100644 merlin/dtypes/__init__.py\n", " create mode 100644 merlin/dtypes/aliases.py\n", " create mode 100644 merlin/dtypes/base.py\n", " create mode 100644 merlin/dtypes/mapping.py\n", - " create mode 100644 merlin/dtypes/mappings/__init__.py\n", + " rename merlin/{core/compat.py => dtypes/mappings/__init__.py} (60%)\n", " create mode 100644 merlin/dtypes/mappings/cudf.py\n", " create mode 100644 merlin/dtypes/mappings/numpy.py\n", " create mode 100644 merlin/dtypes/mappings/pandas.py\n", @@ -255,7 +289,14 @@ " create mode 100644 tests/unit/dag/test_dag_utils.py\n", " create mode 100644 tests/unit/dispatch/test_lazy_dispatch.py\n", " create mode 100644 tests/unit/dtypes/test_module.py\n", - " create mode 100644 tests/unit/dtypes/test_shape.py\n", + " create mode 100644 tests/unit/dtypes/test_shape.py\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " create mode 100644 tests/unit/io/test_dataset.py\n", " create mode 100644 tests/unit/table/test_convert_column.py\n", " create mode 100644 tests/unit/table/test_tensor_column.py\n", " create mode 100644 tests/unit/table/test_tensor_table.py\n", @@ -266,68 +307,101 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (2022.7.1)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (11.4.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (8.0.0)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (2022.7.1)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (4.64.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (22.0)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (2022.5.0)\n" + "Collecting distributed>=2022.11.1\n", + " Downloading distributed-2023.3.2.1-py3-none-any.whl (957 kB)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (1.3.5)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (1.12.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (0.56.4)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (1.22.4)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (1.2.5)\n", + "Collecting dask-cuda>=22.12.0\n", + " Downloading dask_cuda-23.4.0-py3-none-any.whl (125 kB)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (3.19.6)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (11.4.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (8.0.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (22.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+96.gdd98a436) (4.64.1)\n", + "Collecting fsspec>=2022.7.1\n", + " Downloading fsspec-2023.4.0-py3-none-any.whl (153 kB)\n", + "Collecting dask>=2022.11.1\n", + " Downloading dask-2023.3.2-py3-none-any.whl (1.2 MB)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (0.12.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (8.1.3)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (1.0.4)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (1.7.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (6.0)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (6.1)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (1.0.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (2.4.0)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (3.1.2)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (1.26.13)\n", + "Requirement already satisfied: zict>=2.1.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (5.9.4)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (2.2.0)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+96.gdd98a436) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+96.gdd98a436) (2.8.2)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+96.gdd98a436) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+96.gdd98a436) (1.3.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+96.gdd98a436) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+96.gdd98a436) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+96.gdd98a436) (0.39.1)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+96.gdd98a436) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+96.gdd98a436) (0.4.3)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (1.3.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.1.0->distributed>=2022.11.1->merlin-core==0.9.0+96.gdd98a436) (1.0.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+96.gdd98a436) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+96.gdd98a436) (3.11.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+96.gdd98a436) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+96.gdd98a436) (4.1.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+96.gdd98a436) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+96.gdd98a436) (4.0.0)\n", + "Building wheels for collected packages: merlin-core\n", + " Building wheel for merlin-core (PEP 517): started\n", + " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-core: filename=merlin_core-0.9.0+96.gdd98a436-py3-none-any.whl size=159239 sha256=6329058125b220de2e191f83e787b13b391b0d612f1bd595baa8476dd2ec1646\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ooeyjgqt/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + "Successfully built merlin-core\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty requires cupy-cuda118<12,>=9.5.0, which is not installed.\n", + "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty requires cupy-cuda118<12,>=9.5.0, which is not installed.\n", + "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement dask==2022.7.1, but you'll have dask 2023.3.2 which is incompatible.\n", + "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement distributed==2022.7.1, but you'll have distributed 2023.3.2.1 which is incompatible.\n", + "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement cuda-python<11.7.1,>=11.5, but you'll have cuda-python 11.8.1 which is incompatible.\n", + "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement protobuf<3.21.0a0,>=3.20.1, but you'll have protobuf 3.19.6 which is incompatible.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (0.56.4)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (3.19.6)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (1.3.5)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (1.2.5)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+60.g2d60d237) (1.12.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.2.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.3.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (0.12.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (6.0)\n", - "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow>=5.0.0->merlin-core==0.9.0+60.g2d60d237) (1.22.4)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.0.0)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.2.0)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.0.4)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (8.1.3)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.7.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (6.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (3.1.2)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (5.9.4)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.26.13)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.4.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (5.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (0.39.1)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (2.8.2)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (1.2.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.57.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core==0.9.0+60.g2d60d237) (2.1.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+60.g2d60d237) (3.11.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+60.g2d60d237) (1.14.0)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (6.0.4)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (4.0.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+60.g2d60d237) (6.0.1)\n", - "Building wheels for collected packages: merlin-core\n", - " Building wheel for merlin-core (PEP 517): started\n", - " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-core: filename=merlin_core-0.9.0+60.g2d60d237-py3-none-any.whl size=152708 sha256=ff70b25964dafa4162daf96e739c4866570e8eec2aa70c8b1f38049656b6b486\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-xyk5t8ph/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", - "Successfully built merlin-core\n", - "Installing collected packages: merlin-core\n", + "Installing collected packages: fsspec, dask, distributed, dask-cuda, merlin-core\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2022.5.0\n", + " Uninstalling fsspec-2022.5.0:\n", + " Successfully uninstalled fsspec-2022.5.0\n", + " Attempting uninstall: dask\n", + " Found existing installation: dask 2022.7.1\n", + " Uninstalling dask-2022.7.1:\n", + " Successfully uninstalled dask-2022.7.1\n", + " Attempting uninstall: distributed\n", + " Found existing installation: distributed 2022.7.1\n", + " Uninstalling distributed-2022.7.1:\n", + " Successfully uninstalled distributed-2022.7.1\n", + " Attempting uninstall: dask-cuda\n", + " Found existing installation: dask-cuda 22.8.0a0+36.g9860cad\n", + " Uninstalling dask-cuda-22.8.0a0+36.g9860cad:\n", + " Successfully uninstalled dask-cuda-22.8.0a0+36.g9860cad\n", " Attempting uninstall: merlin-core\n", " Found existing installation: merlin-core 0.10.0\n", " Uninstalling merlin-core-0.10.0:\n", " Successfully uninstalled merlin-core-0.10.0\n", - "Successfully installed merlin-core-0.9.0+60.g2d60d237\n" + "Successfully installed dask-2023.3.2 dask-cuda-23.4.0 distributed-2023.3.2.1 fsspec-2023.4.0 merlin-core-0.9.0+96.gdd98a436\n" ] }, { @@ -351,14 +425,14 @@ "text": [ "From https://github.com/NVIDIA-Merlin/NVTabular\n", " * branch main -> FETCH_HEAD\n", - " c5bc4098..9b186ee9 main -> origin/main\n" + " c5bc4098..ae580ada main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Updating c5bc4098..9b186ee9\n", + "Updating c5bc4098..ae580ada\n", "Fast-forward\n", " .github/ISSUE_TEMPLATE/bug_report.md | 11 +-\n", " .github/ISSUE_TEMPLATE/documentation-request.md | 3 +-\n", @@ -371,10 +445,10 @@ " .github/workflows/blossom-ci.yml | 230 ++++++++++-----------\n", " .github/workflows/conda-env-create.yml | 30 +--\n", " .github/workflows/cpu-ci.yml | 138 -------------\n", - " .github/workflows/cpu-packages.yml | 132 ++++++++++++\n", - " .github/workflows/cpu-tests.yml | 69 +++++++\n", - " .github/workflows/docs-preview-pr.yaml | 2 +-\n", - " .github/workflows/docs-sched-rebuild.yaml | 6 +-\n", + " .github/workflows/cpu-packages.yml | 166 +++++++++++++++\n", + " .github/workflows/cpu-tests.yml | 71 +++++++\n", + " .github/workflows/docs-preview-pr.yaml | 4 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", " .github/workflows/gpu-ci.yml | 30 ---\n", " .github/workflows/gpu-tests.yml | 30 +++\n", " .gitlab-ci.yml | 23 +--\n", @@ -383,11 +457,14 @@ " CHANGELOG.md | 187 ++++++++---------\n", " CONTRIBUTING.md | 30 +--\n", " README.md | 48 ++---\n", + " bench/datasets/tools/nvt_etl.py | 4 +-\n", " bench/datasets/tools/train_tensorflow.py | 1 -\n", " bench/examples/MultiGPUBench.md | 67 +++---\n", + " bench/examples/dask-nvtabular-criteo-benchmark.py | 4 +-\n", " ci/pr.gpu.Jenkinsfile | 2 +-\n", " conda/environments/nvtabular_aws_sagemaker.yml | 2 +-\n", - " docs/README.md | 18 +-\n", + " cpp/nvtabular/inference/categorify.cc | 10 +\n", + " docs/README.md | 29 ++-\n", " docs/source/core_features.md | 48 ++---\n", " docs/source/resources/architecture.md | 17 +-\n", " docs/source/resources/cloud_integration.md | 24 ++-\n", @@ -395,36 +472,55 @@ " docs/source/toc.yaml | 12 +-\n", " examples/01-Getting-started.ipynb | 5 +-\n", " examples/02-Advanced-NVTabular-workflow.ipynb | 5 +-\n", - " .../03-Running-on-multiple-GPUs-or-on-CPU.ipynb | 5 +-\n", + " .../03-Running-on-multiple-GPUs-or-on-CPU.ipynb | 24 ++-\n", " examples/README.md | 1 +\n", + " .../tensorflow/tfrecords_to_parquet.py | 9 +-\n", " nvtabular/inference/__init__.py | 4 +-\n", + " nvtabular/inference/triton/data_conversions.py | 24 +--\n", " nvtabular/inference/triton/ensemble.py | 86 ++------\n", " nvtabular/inference/triton/model/model_pt.py | 1 -\n", " nvtabular/inference/workflow/hugectr.py | 2 +-\n", " nvtabular/loader/backend.py | 31 +--\n", " nvtabular/loader/tensorflow.py | 1 +\n", - " nvtabular/ops/categorify.py | 2 -\n", + " nvtabular/ops/categorify.py | 4 +-\n", + " nvtabular/ops/column_similarity.py | 40 ++--\n", " nvtabular/ops/groupby.py | 35 ++--\n", - " nvtabular/ops/join_external.py | 1 -\n", + " nvtabular/ops/join_external.py | 7 +-\n", " nvtabular/ops/join_groupby.py | 18 +-\n", " nvtabular/ops/list_slice.py | 22 +-\n", " nvtabular/ops/moments.py | 2 -\n", " nvtabular/ops/reduce_dtype_size.py | 9 +-\n", " nvtabular/ops/value_counts.py | 14 +-\n", - " nvtabular/workflow/workflow.py | 113 +++++++++-\n", + " nvtabular/tools/data_gen.py | 31 ++-\n", + " nvtabular/utils.py | 2 +-\n", + " nvtabular/workflow/workflow.py | 169 +++++++++++++--\n", " requirements-test.txt | 2 -\n", " requirements/test.txt | 3 +-\n", " setup.py | 5 +\n", - " tests/conftest.py | 1 -\n", - " .../test_02-Advanced-NVTabular-workflow.py | 12 +-\n", - " tests/unit/ops/test_column_similarity.py | 1 -\n", - " tests/unit/ops/test_groupyby.py | 2 +-\n", + " tests/conftest.py | 33 ++-\n", + " .../test_02-Advanced-NVTabular-workflow.py | 17 +-\n", + " .../test_03-Running-on-multiple-GPUs-or-on-CPU.py | 11 +-\n", + " tests/unit/loader/test_tf_dataloader.py | 206 +++---------------\n", + " tests/unit/loader/test_torch_dataloader.py | 73 ++-----\n", + " tests/unit/ops/test_categorify.py | 36 +++-\n", + " tests/unit/ops/test_column_similarity.py | 3 +-\n", + " tests/unit/ops/test_drop_low_cardinality.py | 7 +-\n", + " tests/unit/ops/test_groupyby.py | 9 +-\n", + " tests/unit/ops/test_join.py | 11 +-\n", " tests/unit/ops/test_lambda.py | 28 ++-\n", + " tests/unit/ops/test_ops.py | 12 +-\n", " tests/unit/ops/test_ops_schema.py | 25 ++-\n", + " tests/unit/ops/test_reduce_dtype_size.py | 7 +-\n", + " tests/unit/ops/test_target_encode.py | 11 +-\n", " tests/unit/ops/test_value_count.py | 2 +\n", - " tests/unit/workflow/test_workflow.py | 75 ++++++-\n", - " tox.ini | 9 +-\n", - " 64 files changed, 1056 insertions(+), 786 deletions(-)\n", + " tests/unit/test_dask_nvt.py | 5 +-\n", + " tests/unit/test_s3.py | 8 +-\n", + " tests/unit/test_tf4rec.py | 11 +-\n", + " tests/unit/test_triton_inference.py | 3 +-\n", + " tests/unit/workflow/test_cpu_workflow.py | 6 +-\n", + " tests/unit/workflow/test_workflow.py | 92 ++++++++-\n", + " tox.ini | 10 +-\n", + " 86 files changed, 1393 insertions(+), 1177 deletions(-)\n", " delete mode 100644 .github/workflows/cpu-ci.yml\n", " create mode 100644 .github/workflows/cpu-packages.yml\n", " create mode 100644 .github/workflows/cpu-tests.yml\n", @@ -439,71 +535,72 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.0.4)\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (0.9.0+60.g2d60d237)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+42.g9b186ee9) (1.9.3)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.56.4)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.5)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.5)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (11.4.1)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.64.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.0.0)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.5.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.19.6)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.12.0)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (22.0)\n", - "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+42.g9b186ee9) (1.22.4)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.2.0)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.8.2)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.4.3)\n" + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+58.gae580ada) (1.9.3)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+58.gae580ada) (0.9.0+96.gdd98a436)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+58.gae580ada) (0.0.4)\n", + "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+58.gae580ada) (1.22.4)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.3.5)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (11.4.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (0.56.4)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (4.64.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (3.19.6)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (2023.3.2)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.2.5)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.12.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (22.0)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (23.4.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.2.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.3.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (0.12.0)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (5.9.4)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.1.2)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.4)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (8.1.3)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.4.0)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.7.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.1)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.2.0)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.26.13)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (3.11.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.14.0)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.4)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (2.1.1)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (1.0.1)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+42.g9b186ee9) (4.0.0)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (8.0.0)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (2023.3.2.1)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (2023.4.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (2022.7)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (45.2.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (0.12.0)\n", + "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (8.1.3)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (2.2.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.3.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (6.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.57.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (2.2.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.0.0)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (3.1.2)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (2.4.0)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (6.1)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.26.13)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.7.0)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.0.4)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (5.9.4)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (3.11.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (4.1.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (2.1.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->nvtabular==1.6.0+58.gae580ada) (6.0.1)\n", "Building wheels for collected packages: nvtabular\n", " Building wheel for nvtabular (PEP 517): started\n", " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", - " Created wheel for nvtabular: filename=nvtabular-1.6.0+42.g9b186ee9-cp38-cp38-linux_x86_64.whl size=258506 sha256=7731e40e8914024a9c9ea9abe993404858d29604ae832237d2a69c1675161f23\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-18ktqhn2/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + " Created wheel for nvtabular: filename=nvtabular-1.6.0+58.gae580ada-cp38-cp38-linux_x86_64.whl size=259843 sha256=2adce1586183203caf25c45be5ae665301f512c8ef53b0c8a2f5ac712da8065b\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-3pmnk263/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", "Successfully built nvtabular\n", "Installing collected packages: nvtabular\n", " Attempting uninstall: nvtabular\n", " Found existing installation: nvtabular 1.8.0\n", " Uninstalling nvtabular-1.8.0:\n", " Successfully uninstalled nvtabular-1.8.0\n", - "Successfully installed nvtabular-1.6.0+42.g9b186ee9\n" + "Successfully installed nvtabular-1.6.0+58.gae580ada\n" ] }, { @@ -527,14 +624,14 @@ "text": [ "From https://github.com/NVIDIA-Merlin/systems\n", " * branch main -> FETCH_HEAD\n", - " 20bb231..329cba4 main -> origin/main\n" + " 20bb231..f8d8808 main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Updating 20bb231..329cba4\n", + "Updating 20bb231..f8d8808\n", "Fast-forward\n", " .github/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", " .github/ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", @@ -542,11 +639,13 @@ " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", " .github/ISSUE_TEMPLATE/task.md | 5 +-\n", " .github/release-drafter.yml | 44 +-\n", - " .github/workflows/cpu-ci.yml | 112 ++--\n", + " .github/workflows/cpu-ci.yml | 133 ++--\n", " .github/workflows/docs-preview-pr.yaml | 2 +-\n", " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", - " .github/workflows/gpu-ci.yml | 32 +-\n", + " .github/workflows/gpu-ci.yml | 36 +-\n", " .github/workflows/lint.yaml | 12 +-\n", + " .github/workflows/postmerge-cpu.yml | 96 +++\n", + " .github/workflows/postmerge-gpu.yml | 27 +\n", " .github/workflows/release-drafter.yml | 2 +-\n", " .pre-commit-config.yaml | 71 +-\n", " .prettierignore | 2 +\n", @@ -558,57 +657,91 @@ " ...ing-An-Implicit-Model-With-Merlin-Systems.ipynb | 5 +-\n", " ...ving-An-XGboost-Model-With-Merlin-Systems.ipynb | 5 +-\n", " ...erving-Ranking-Models-With-Merlin-Systems.ipynb | 5 +-\n", - " merlin/systems/dag/dictarray.py | 4 +-\n", - " merlin/systems/dag/op_runner.py | 1 -\n", - " merlin/systems/dag/ops/__init__.py | 11 +-\n", - " merlin/systems/dag/ops/faiss.py | 4 +-\n", - " merlin/systems/dag/ops/feast.py | 80 +--\n", - " merlin/systems/dag/ops/fil.py | 4 +-\n", - " merlin/systems/dag/ops/implicit.py | 72 +-\n", - " merlin/systems/dag/ops/operator.py | 189 +-----\n", - " merlin/systems/dag/ops/pytorch.py | 4 +-\n", - " merlin/systems/dag/ops/session_filter.py | 4 +-\n", - " merlin/systems/dag/ops/softmax_sampling.py | 17 +-\n", - " merlin/systems/dag/ops/unroll_features.py | 4 +-\n", - " merlin/systems/dag/ops/workflow.py | 4 +-\n", - " merlin/systems/dag/runtimes/triton/ops/implicit.py | 185 ++++++\n", - " merlin/systems/dag/runtimes/triton/ops/operator.py | 169 ++++-\n", - " merlin/systems/dag/runtimes/triton/ops/pytorch.py | 2 +-\n", - " .../systems/dag/runtimes/triton/ops/tensorflow.py | 12 +-\n", - " merlin/systems/dag/runtimes/triton/ops/workflow.py | 141 +++-\n", - " merlin/systems/dag/runtimes/triton/runtime.py | 14 +-\n", - " merlin/systems/triton/__init__.py | 33 +-\n", - " merlin/systems/triton/export.py | 724 +--------------------\n", - " merlin/systems/triton/models/executor_model.py | 34 +-\n", - " merlin/systems/triton/models/oprunner_model.py | 32 +-\n", - " merlin/systems/triton/models/pytorch_model.py | 127 ++--\n", - " merlin/systems/triton/models/workflow_model.py | 50 +-\n", - " merlin/systems/triton/utils.py | 35 +-\n", - " tests/conftest.py | 4 +-\n", - " ...erving_an_implicit_model_with_merlin_systems.py | 4 +-\n", + " merlin/systems/dag/__init__.py | 2 -\n", + " merlin/systems/dag/dictarray.py | 345 ----------\n", + " merlin/systems/dag/ensemble.py | 2 +-\n", + " merlin/systems/dag/node.py | 29 +-\n", + " merlin/systems/dag/op_runner.py | 68 --\n", + " merlin/systems/dag/ops/__init__.py | 22 +-\n", + " merlin/systems/dag/ops/faiss.py | 116 +---\n", + " merlin/systems/dag/ops/feast.py | 110 +---\n", + " merlin/systems/dag/ops/fil.py | 74 +--\n", + " merlin/systems/dag/ops/implicit.py | 84 +--\n", + " merlin/systems/dag/ops/operator.py | 216 +-----\n", + " merlin/systems/dag/ops/pytorch.py | 24 +-\n", + " merlin/systems/dag/ops/session_filter.py | 72 +-\n", + " merlin/systems/dag/ops/softmax_sampling.py | 61 +-\n", + " merlin/systems/dag/ops/tensorflow.py | 140 ++--\n", + " merlin/systems/dag/ops/unroll_features.py | 36 +-\n", + " merlin/systems/dag/ops/workflow.py | 29 +-\n", + " merlin/systems/dag/runtimes/triton/ops/fil.py | 43 +-\n", + " merlin/systems/dag/runtimes/triton/ops/operator.py | 84 ++-\n", + " merlin/systems/dag/runtimes/triton/ops/pytorch.py | 18 +-\n", + " .../systems/dag/runtimes/triton/ops/tensorflow.py | 32 +-\n", + " merlin/systems/dag/runtimes/triton/ops/workflow.py | 143 +++-\n", + " merlin/systems/dag/runtimes/triton/runtime.py | 36 +-\n", + " merlin/systems/triton/__init__.py | 118 ++--\n", + " merlin/systems/triton/conversions.py | 89 ++-\n", + " merlin/systems/triton/export.py | 729 +--------------------\n", + " merlin/systems/triton/models/executor_model.py | 38 +-\n", + " merlin/systems/triton/models/oprunner_model.py | 129 ----\n", + " merlin/systems/triton/models/pytorch_model.py | 139 ++--\n", + " merlin/systems/triton/models/workflow_model.py | 64 +-\n", + " merlin/systems/triton/utils.py | 50 +-\n", + " merlin/systems/workflow/base.py | 26 +-\n", + " merlin/systems/workflow/hugectr.py | 87 ---\n", + " merlin/systems/workflow/pytorch.py | 46 --\n", + " merlin/systems/workflow/tensorflow.py | 68 --\n", + " pytest.ini | 7 +-\n", + " tests/conftest.py | 36 +-\n", + " ...erving_an_implicit_model_with_merlin_systems.py | 12 +-\n", " ...serving_an_xgboost_model_with_merlin_systems.py | 4 +-\n", - " tests/unit/systems/dag/ops/test_ops.py | 20 +-\n", - " .../runtimes/local/ops/nvtabular/test_ensemble.py | 2 +-\n", - " .../triton/ops/fil/test_lightgbm_triton.py | 4 +-\n", + " tests/integration/tf/test_transformer_model.py | 103 +++\n", + " .../systems/dag/test_column.py => test_passing.py} | 15 +-\n", + " tests/unit/systems/dag/ops/test_ops.py | 101 ++-\n", + " .../dag/runtimes/local/ops/fil/test_lightgbm.py | 15 +-\n", + " .../dag/runtimes/local/ops/fil/test_sklearn.py | 15 +-\n", + " .../dag/runtimes/local/ops/fil/test_xgboost.py | 18 +-\n", + " .../runtimes/local/ops/nvtabular/test_ensemble.py | 10 +-\n", + " .../runtimes/local/ops/tensorflow/test_ensemble.py | 35 +-\n", + " .../dag/runtimes/local/ops/torch/test_op.py | 6 +-\n", + " .../triton/ops/fil/test_lightgbm_triton.py | 11 +-\n", " .../runtimes/triton/ops/fil/test_sklearn_triton.py | 4 +-\n", - " .../runtimes/triton/ops/fil/test_xgboost_triton.py | 4 +-\n", + " .../runtimes/triton/ops/fil/test_xgboost_triton.py | 7 +-\n", " .../dag/runtimes/triton/ops/torch/test_op.py | 4 +-\n", - " .../runtimes/triton/ops/workflow/test_ensemble.py | 67 +-\n", - " .../systems/dag/runtimes/triton/test_triton.py | 4 +-\n", - " tests/unit/systems/dag/test_dict_array.py | 4 +-\n", - " tests/unit/systems/dag/test_executors.py | 4 +-\n", - " tests/unit/systems/ops/faiss/test_executor.py | 4 +-\n", - " tests/unit/systems/ops/feast/test_op.py | 46 +-\n", - " tests/unit/systems/ops/fil/test_ensemble.py | 4 +-\n", + " .../runtimes/triton/ops/workflow/test_ensemble.py | 69 +-\n", + " .../systems/dag/runtimes/triton/test_triton.py | 21 +-\n", + " tests/unit/systems/dag/test_dict_array.py | 76 ---\n", + " tests/unit/systems/dag/test_ensemble.py | 4 +-\n", + " tests/unit/systems/dag/test_executors.py | 12 +-\n", + " tests/unit/systems/dag/test_op_runner.py | 210 ------\n", + " tests/unit/systems/ops/faiss/test_executor.py | 25 +-\n", + " tests/unit/systems/ops/feast/test_op.py | 76 +--\n", + " tests/unit/systems/ops/fil/test_ensemble.py | 21 +-\n", + " tests/unit/systems/ops/fil/test_forest.py | 47 +-\n", + " tests/unit/systems/ops/fil/test_op.py | 106 ++-\n", " tests/unit/systems/ops/implicit/test_executor.py | 4 +-\n", - " tests/unit/systems/ops/implicit/test_op.py | 11 +-\n", - " tests/unit/systems/ops/tf/test_ensemble.py | 4 +-\n", - " tests/unit/systems/utils/ops.py | 7 +-\n", + " tests/unit/systems/ops/implicit/test_op.py | 51 +-\n", + " tests/unit/systems/ops/tf/test_ensemble.py | 15 +-\n", + " tests/unit/systems/ops/tf/test_op.py | 6 +-\n", + " tests/unit/systems/utils/ops.py | 13 +-\n", + " tests/unit/systems/utils/tf.py | 65 +-\n", " tests/unit/test_export.py | 77 ---\n", - " tox.ini | 1 -\n", - " 70 files changed, 1072 insertions(+), 1580 deletions(-)\n", + " tox.ini | 42 +-\n", + " 95 files changed, 1738 insertions(+), 3531 deletions(-)\n", + " create mode 100644 .github/workflows/postmerge-cpu.yml\n", + " create mode 100644 .github/workflows/postmerge-gpu.yml\n", " create mode 100644 .prettierignore\n", - " create mode 100644 merlin/systems/dag/runtimes/triton/ops/implicit.py\n", + " delete mode 100644 merlin/systems/dag/dictarray.py\n", + " delete mode 100644 merlin/systems/dag/op_runner.py\n", + " delete mode 100644 merlin/systems/triton/models/oprunner_model.py\n", + " delete mode 100644 merlin/systems/workflow/hugectr.py\n", + " delete mode 100644 merlin/systems/workflow/pytorch.py\n", + " delete mode 100644 merlin/systems/workflow/tensorflow.py\n", + " create mode 100644 tests/integration/tf/test_transformer_model.py\n", + " rename tests/{unit/systems/dag/test_column.py => test_passing.py} (66%)\n", + " delete mode 100644 tests/unit/systems/dag/test_dict_array.py\n", + " delete mode 100644 tests/unit/systems/dag/test_op_runner.py\n", " delete mode 100644 tests/unit/test_export.py\n", "Processing /systems\n", " Installing build dependencies: started\n", @@ -617,78 +750,85 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", - "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", - "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (1.6.0+42.g9b186ee9)\n", - "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (2.28.1)\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+61.g329cba4) (0.9.0+60.g2d60d237)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.9.3)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+61.g329cba4) (1.22.4)\n", - "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+61.g329cba4) (0.0.4)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.8)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (1.26.13)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+61.g329cba4) (2019.11.28)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.5)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.5.0)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (22.0)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.0.0)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7.1)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.12.0)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.64.1)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (11.4.1)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.5)\n" + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+85.gf8d8808) (0.9.0+96.gdd98a436)\n", + "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+85.gf8d8808) (2.28.1)\n", + "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+85.gf8d8808) (2.4.0)\n", + "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+85.gf8d8808) (1.6.0+58.gae580ada)\n", + "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+85.gf8d8808) (2.4.0)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.12.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (3.19.6)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.2.5)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (2023.3.2.1)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (2023.3.2)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.22.4)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (8.0.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (4.64.1)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (11.4.1)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.3.5)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (0.56.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (22.0)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (23.4.0)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (2023.4.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+85.gf8d8808) (2019.11.28)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+85.gf8d8808) (1.26.13)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+85.gf8d8808) (2.8)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+85.gf8d8808) (2.1.1)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+85.gf8d8808) (1.9.3)\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+85.gf8d8808) (0.0.4)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.3.0)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (0.4.3)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (3.1.2)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.0.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.0.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (6.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (5.9.4)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (2.2.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.7.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (2.4.0)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (6.1)\n", + "Requirement already satisfied: zict>=2.1.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (2.2.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (0.12.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (8.1.3)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.3.0)\n", + "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (5.2.0)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (2.8.2)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (45.2.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (6.0.4)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.1.0->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.0.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata>=4.13.0->dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (1.14.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+85.gf8d8808) (4.0.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.56.4)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.19.6)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.2.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.12.0)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.4.0)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.4)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.1)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.9.4)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.1.2)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (8.1.3)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.7.0)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.2.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.57.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2022.7)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (5.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (0.39.1)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.4)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.1.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (2.1.1)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.0.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (1.14.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (3.11.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+61.g329cba4) (4.0.0)\n", "Building wheels for collected packages: merlin-systems\n", " Building wheel for merlin-systems (PEP 517): started\n", " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+61.g329cba4-py3-none-any.whl size=99480 sha256=ddfc752fa7ed3e5062808e4652c1d9967ac2d68ec1847cb24cfbe573a88ed6a9\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-lnbqyxql/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+85.gf8d8808-py3-none-any.whl size=82291 sha256=8c5627527a9d78da90574f8395d44aeae5cc2e8b3b312c1661ed142177d4c5a2\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ti8uwtr9/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", "Successfully built merlin-systems\n", "Installing collected packages: merlin-systems\n", " Attempting uninstall: merlin-systems\n", " Found existing installation: merlin-systems 0.9.0\n", " Uninstalling merlin-systems-0.9.0:\n", " Successfully uninstalled merlin-systems-0.9.0\n", - "Successfully installed merlin-systems-0.7.0+61.g329cba4\n" + "Successfully installed merlin-systems-0.7.0+85.gf8d8808\n" ] }, { @@ -712,42 +852,70 @@ "text": [ "From https://github.com/NVIDIA-Merlin/dataloader\n", " * branch main -> FETCH_HEAD\n", - " 5b3fe46..ce2215d main -> origin/main\n" + " 5b3fe46..8782c9d main -> origin/main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Updating 5b3fe46..ce2215d\n", + "Updating 5b3fe46..8782c9d\n", "Fast-forward\n", - " .github/workflows/cpu-ci.yml | 81 -----\n", + " .github/workflows/cpu-ci.yml | 81 ----\n", " .github/workflows/cpu-packages.yml | 125 +++++++\n", " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/models.yml | 43 +++\n", + " .github/workflows/nvtabular.yml | 43 +++\n", + " .github/workflows/systems.yml | 43 +++\n", + " .github/workflows/transformers4rec.yml | 43 +++\n", " .pre-commit-config.yaml | 14 +-\n", " ci/pr.gpu.Jenkinsfile | 44 +++\n", " docs/README.md | 28 +-\n", " examples/01a-Getting-started-Tensorflow.ipynb | 5 +-\n", - " examples/01b-Getting-started-Pytorch.ipynb | 9 +-\n", - " .../02-Multi-GPU-Tensorflow-with-Horovod.ipynb | 371 +++++++++++++++++++++\n", - " merlin/dataloader/jax.py | 3 +\n", - " merlin/dataloader/loader_base.py | 221 ++++--------\n", - " merlin/dataloader/ops/embeddings/embedding_op.py | 4 +-\n", - " .../ops/embeddings/torch_embedding_op.py | 4 +-\n", - " merlin/dataloader/tensorflow.py | 9 +-\n", - " merlin/dataloader/torch.py | 49 ++-\n", - " merlin/dataloader/utils/tf/tf_trainer.py | 2 +-\n", + " examples/01b-Getting-started-Pytorch.ipynb | 5 +-\n", + " .../02-Multi-GPU-Tensorflow-with-Horovod.ipynb | 371 +++++++++++++++++++\n", + " merlin/dataloader/jax.py | 52 +--\n", + " merlin/dataloader/loader_base.py | 412 +++++++++------------\n", + " .../{embeddings/embedding_op.py => embeddings.py} | 44 +--\n", + " merlin/dataloader/ops/embeddings/__init__.py | 15 -\n", + " .../dataloader/ops/embeddings/tf_embedding_op.py | 101 -----\n", + " .../ops/embeddings/torch_embedding_op.py | 106 ------\n", + " merlin/dataloader/ops/padding.py | 88 +++++\n", + " merlin/dataloader/tensorflow.py | 320 ++++------------\n", + " merlin/dataloader/torch.py | 219 +++++------\n", + " merlin/dataloader/utils/tf/tf_trainer.py | 13 +-\n", + " tests/conftest.py | 11 +-\n", " .../test_multi_GPU_with_horovod_and_tensorflow.py | 28 ++\n", - " tests/unit/dataloader/test_tf_dataloader.py | 20 +-\n", - " tests/unit/dataloader/test_tf_embeddings.py | 24 +-\n", - " tests/unit/dataloader/test_torch_dataloader.py | 38 +++\n", - " tests/unit/dataloader/test_torch_embeddings.py | 12 +-\n", - " tox.ini | 1 +\n", - " 22 files changed, 801 insertions(+), 298 deletions(-)\n", + " tests/unit/dataloader/test_array_dataloader.py | 54 +++\n", + " tests/unit/dataloader/test_array_to_tensorflow.py | 54 +++\n", + " tests/unit/dataloader/test_array_to_torch.py | 69 ++++\n", + " .../{test_tf_embeddings.py => test_embeddings.py} | 109 +++---\n", + " tests/unit/dataloader/test_jax_dataloader.py | 29 +-\n", + " tests/unit/dataloader/test_padding.py | 46 +++\n", + " tests/unit/dataloader/test_tf_dataloader.py | 330 ++++++++---------\n", + " tests/unit/dataloader/test_torch_dataloader.py | 233 +++++++++---\n", + " tests/unit/dataloader/test_torch_embeddings.py | 242 ------------\n", + " tox.ini | 55 +++\n", + " 35 files changed, 1950 insertions(+), 1532 deletions(-)\n", " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .github/workflows/models.yml\n", + " create mode 100644 .github/workflows/nvtabular.yml\n", + " create mode 100644 .github/workflows/systems.yml\n", + " create mode 100644 .github/workflows/transformers4rec.yml\n", " create mode 100644 ci/pr.gpu.Jenkinsfile\n", " create mode 100644 examples/02-Multi-GPU-Tensorflow-with-Horovod.ipynb\n", + " rename merlin/dataloader/ops/{embeddings/embedding_op.py => embeddings.py} (85%)\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/__init__.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/tf_embedding_op.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/torch_embedding_op.py\n", + " create mode 100644 merlin/dataloader/ops/padding.py\n", " create mode 100644 tests/examples/test_multi_GPU_with_horovod_and_tensorflow.py\n", + " create mode 100644 tests/unit/dataloader/test_array_dataloader.py\n", + " create mode 100644 tests/unit/dataloader/test_array_to_tensorflow.py\n", + " create mode 100644 tests/unit/dataloader/test_array_to_torch.py\n", + " rename tests/unit/dataloader/{test_tf_embeddings.py => test_embeddings.py} (65%)\n", + " create mode 100644 tests/unit/dataloader/test_padding.py\n", + " delete mode 100644 tests/unit/dataloader/test_torch_embeddings.py\n", "Processing /dataloader\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", @@ -755,90 +923,91 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-core>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+43.gce2215d) (0.9.0+60.g2d60d237)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.3.5)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (4.64.1)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.7.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (8.0.0)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (22.0)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (11.4.1)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (3.19.6)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.7.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.56.4)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.12.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.2.5)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.5.0)\n", - "Requirement already satisfied: numpy>=1.17.3; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.22.4)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2022.7)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (8.1.3)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.4.0)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.2.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (3.1.2)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.7.0)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.0.4)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (5.9.4)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.2.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.1)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.12.0)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.0)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.26.13)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.0.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.3.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (45.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.39.1)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (5.2.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.57.0)\n" + "Requirement already satisfied: merlin-core>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+61.g8782c9d) (0.9.0+96.gdd98a436)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (0.56.4)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (2023.4.0)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (8.0.0)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (23.4.0)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.3.5)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.2.5)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (11.4.1)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (2023.3.2.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (22.0)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.22.4)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (2023.3.2)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.12.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (4.64.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (3.19.6)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (0.39.1)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (2.2.0)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (2.8.2)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.2.0)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (3.1.2)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (0.12.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (2.4.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.2.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.14.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (2.1.1)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (1.0.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (3.11.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.0.4)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (4.1.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+43.gce2215d) (4.0.0)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.0.4)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (5.9.4)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (2.2.0)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (6.1)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (8.1.3)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.26.13)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (6.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.0.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.7.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.3.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.57.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (3.11.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.0.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (1.14.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (4.1.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (2.1.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.8.0->merlin-dataloader==0.0.2+61.g8782c9d) (6.0.1)\n", "Building wheels for collected packages: merlin-dataloader\n", " Building wheel for merlin-dataloader (PEP 517): started\n", " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+43.gce2215d-py3-none-any.whl size=40867 sha256=1448516ec061e7ef5df449df29f4896705367b7602040fb55c679508f76d85a2\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-ukzco8eb/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+61.g8782c9d-py3-none-any.whl size=35106 sha256=fa1f42bb96e28202a3d7f568445715105e51cd88e96e2baed0f0d3e0981bf5a3\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-i0cb09xq/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", "Successfully built merlin-dataloader\n", "Installing collected packages: merlin-dataloader\n", " Attempting uninstall: merlin-dataloader\n", " Found existing installation: merlin-dataloader 0.0.4\n", " Uninstalling merlin-dataloader-0.0.4:\n", " Successfully uninstalled merlin-dataloader-0.0.4\n", - "Successfully installed merlin-dataloader-0.0.2+43.gce2215d\n", + "Successfully installed merlin-dataloader-0.0.2+61.g8782c9d\n", "Collecting matplotlib\n", " Downloading matplotlib-3.7.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (9.2 MB)\n", - "Collecting pillow>=6.2.0\n", - " Downloading Pillow-9.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (2.8.2)\n", - "Collecting kiwisolver>=1.0.1\n", - " Downloading kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)\n", + "Collecting fonttools>=4.22.0\n", + " Downloading fonttools-4.39.3-py3-none-any.whl (1.0 MB)\n", "Requirement already satisfied: importlib-resources>=3.2.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from matplotlib) (5.10.2)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (22.0)\n", - "Collecting contourpy>=1.0.1\n", - " Downloading contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (300 kB)\n", "Collecting cycler>=0.10\n", " Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (1.22.4)\n", - "Collecting fonttools>=4.22.0\n", - " Downloading fonttools-4.39.0-py3-none-any.whl (1.0 MB)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (22.0)\n", + "Collecting contourpy>=1.0.1\n", + " Downloading contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (300 kB)\n", + "Collecting pillow>=6.2.0\n", + " Downloading Pillow-9.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n", + "Collecting kiwisolver>=1.0.1\n", + " Downloading kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib) (3.0.9)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.14.0)\n", "Requirement already satisfied: zipp>=3.1.0; python_version < \"3.10\" in /usr/local/lib/python3.8/dist-packages (from importlib-resources>=3.2.0; python_version < \"3.10\"->matplotlib) (3.11.0)\n", - "Installing collected packages: pillow, kiwisolver, contourpy, cycler, fonttools, matplotlib\n", - "Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.0 kiwisolver-1.4.4 matplotlib-3.7.1 pillow-9.4.0\n" + "Installing collected packages: fonttools, cycler, contourpy, pillow, kiwisolver, matplotlib\n", + "Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.3 kiwisolver-1.4.4 matplotlib-3.7.1 pillow-9.5.0\n" ] } ], @@ -864,21 +1033,21 @@ "output_type": "stream", "text": [ "Collecting gdown\n", - " Downloading gdown-4.6.4-py3-none-any.whl (14 kB)\n", + " Downloading gdown-4.7.1-py3-none-any.whl (15 kB)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", + "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", - "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", "Collecting PySocks!=1.5.7,>=1.5.6; extra == \"socks\"\n", " Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", "Installing collected packages: gdown, PySocks\n", - "Successfully installed PySocks-1.7.1 gdown-4.6.4\n" + "Successfully installed PySocks-1.7.1 gdown-4.7.1\n" ] }, { @@ -886,9 +1055,10 @@ "output_type": "stream", "text": [ "Downloading...\n", - "From: https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=522dba81-f22d-40ea-baec-dc798d7feb51\n", "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", - "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.14MB/s]\n" + "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.15MB/s]\n" ] }, { @@ -896,32 +1066,32 @@ "output_type": "stream", "text": [ "Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", - "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [920 kB]\n", + "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [973 kB]\n", "Get:3 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", "Get:4 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", - "Get:5 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1017 kB]\n", - "Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", - "Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", - "Get:8 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", - "Get:9 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", - "Get:10 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", - "Get:11 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2544 kB]\n", - "Get:12 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", - "Get:13 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [1998 kB]\n", - "Get:14 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", - "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3019 kB]\n", - "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2134 kB]\n", - "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1312 kB]\n", - "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:5 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", + "Get:6 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2593 kB]\n", + "Get:7 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Get:9 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:11 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [2065 kB]\n", + "Get:12 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1028 kB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3075 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2203 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1324 kB]\n", "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", - "Fetched 26.5 MB in 10s (2574 kB/s)\n", + "Fetched 26.8 MB in 9s (3067 kB/s)\n", "Reading package lists...\n", "Reading package lists...\n", "Building dependency tree...\n", "Reading state information...\n", "unzip is already the newest version (6.0-25ubuntu1.1).\n", - "0 upgraded, 0 newly installed, 0 to remove and 88 not upgraded.\n", + "0 upgraded, 0 newly installed, 0 to remove and 98 not upgraded.\n", "Archive: rees46_ecom_dataset_small_for_ci.zip\n", " creating: ecom_dataset/0001/\n", " inflating: ecom_dataset/0001/valid.parquet \n", @@ -951,7 +1121,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "ceb3ae93", "metadata": {}, "outputs": [ @@ -959,7 +1129,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-03-15 06:40:18.761460: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-04-13 07:03:01.943949: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, @@ -976,21 +1146,21 @@ "text": [ "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-03-15 06:40:21.081059: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-15 06:40:21.081515: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-15 06:40:21.081687: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 07:03:04.162345: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 07:03:04.162771: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 07:03:04.162913: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", - "2023-03-15 06:40:21.521454: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-04-13 07:03:04.732148: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-03-15 06:40:21.522384: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-15 06:40:21.522595: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-15 06:40:21.522750: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-15 06:40:22.246877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-15 06:40:22.247075: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-15 06:40:22.247224: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-15 06:40:22.247337: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-03-15 06:40:22.247404: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + "2023-04-13 07:03:04.733142: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 07:03:04.733326: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 07:03:04.733458: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 07:03:05.442807: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 07:03:05.442992: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 07:03:05.443126: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 07:03:05.443238: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-04-13 07:03:05.443295: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" ] } ], @@ -1009,7 +1179,28 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 3, + "id": "574b955a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/cudf/utils/gpu_utils.py:148: UserWarning: No NVIDIA GPU detected\n", + " warnings.warn(\"No NVIDIA GPU detected\")\n" + ] + } + ], + "source": [ + "# import cudf\n", + "\n", + "# cudf.read_parquet('ecom_dataset/0001/train.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "11647dd3", "metadata": {}, "outputs": [], @@ -1020,7 +1211,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "4ab4e0fb", "metadata": {}, "outputs": [], @@ -1030,7 +1221,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "8d9903e6", "metadata": {}, "outputs": [], @@ -1049,7 +1240,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "a6ade14a", "metadata": {}, "outputs": [], @@ -1114,7 +1305,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "523fe2ac", "metadata": {}, "outputs": [ @@ -1123,54 +1314,44 @@ "output_type": "stream", "text": [ "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n", - "2023-03-15 06:40:28.698077: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", " warnings.warn(\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 106s 145ms/step - loss: 7.4476 - recall_at_20: 0.1321 - mrr_at_20: 0.0713 - ndcg_at_20: 0.0847 - map_at_20: 0.0713 - precision_at_20: 0.0066 - regularization_loss: 0.0000e+00 - loss_batch: 7.4395\n", - "84/84 [==============================] - 8s 43ms/step - loss: 8.5501 - recall_at_20: 0.2267 - mrr_at_20: 0.0746 - ndcg_at_20: 0.1080 - map_at_20: 0.0746 - precision_at_20: 0.0113 - regularization_loss: 0.0000e+00 - loss_batch: 8.5600\n" + "ename": "LinkerError", + "evalue": "[222] Call to cuLinkAddData results in UNKNOWN_CUDA_ERROR\nptxas application ptx input, line 9; fatal : Unsupported .version 7.8; current version is '7.7'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mLinkerError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m model_transformer, xlnet_block \u001b[38;5;241m=\u001b[39m get_model()\n\u001b[0;32m----> 2\u001b[0m \u001b[43mmodel_transformer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_epoch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mpre\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSequencePredictNext\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtransformer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mxlnet_block\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m predict_last \u001b[38;5;241m=\u001b[39m mm\u001b[38;5;241m.\u001b[39mSequencePredictLast(schema\u001b[38;5;241m=\u001b[39mvalid\u001b[38;5;241m.\u001b[39mschema, target\u001b[38;5;241m=\u001b[39mtarget, transformer\u001b[38;5;241m=\u001b[39mxlnet_block)\n\u001b[1;32m 10\u001b[0m model_transformer\u001b[38;5;241m.\u001b[39mevaluate(\n\u001b[1;32m 11\u001b[0m valid,\n\u001b[1;32m 12\u001b[0m batch_size\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[1;32m 13\u001b[0m pre\u001b[38;5;241m=\u001b[39mpredict_last,\n\u001b[1;32m 14\u001b[0m return_dict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 15\u001b[0m )\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/models/base.py:1363\u001b[0m, in \u001b[0;36mBaseModel.fit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing, train_metrics_steps, pre, **kwargs)\u001b[0m\n\u001b[1;32m 1360\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrain_pre, SequenceTransform):\n\u001b[1;32m 1361\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrain_pre\u001b[38;5;241m.\u001b[39mconfigure_for_train()\n\u001b[0;32m-> 1363\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfit_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1365\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pre:\n\u001b[1;32m 1366\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrain_pre\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 67\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 68\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28mNone\u001b[39m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/tensorflow.py:78\u001b[0m, in \u001b[0;36mLoader.__getitem__\u001b[0;34m(self, index)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, index):\n\u001b[1;32m 71\u001b[0m \u001b[38;5;124;03m\"\"\"Gets batch at position `index`.\u001b[39;00m\n\u001b[1;32m 72\u001b[0m \n\u001b[1;32m 73\u001b[0m \u001b[38;5;124;03m Note: This returns the next batch in the iterator.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;124;03m don't currently support fetching a batch by index.\u001b[39;00m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 78\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__next__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/tensorflow.py:82\u001b[0m, in \u001b[0;36mLoader.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__next__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 81\u001b[0m \u001b[38;5;124;03m\"\"\"Get the next batch from the dataloader\"\"\"\u001b[39;00m\n\u001b[0;32m---> 82\u001b[0m converted_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconvert_batch(\u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__next__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m map_fn \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_map_fns:\n\u001b[1;32m 84\u001b[0m converted_batch \u001b[38;5;241m=\u001b[39m map_fn(\u001b[38;5;241m*\u001b[39mconverted_batch)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/loader_base.py:261\u001b[0m, in \u001b[0;36mLoaderBase.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__next__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 260\u001b[0m \u001b[38;5;124;03m\"\"\"Get the next batch.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 261\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_next_batch\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/loader_base.py:332\u001b[0m, in \u001b[0;36mLoaderBase._get_next_batch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 330\u001b[0m \u001b[38;5;66;03m# try to iterate through existing batches\u001b[39;00m\n\u001b[1;32m 331\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 332\u001b[0m batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_batch_itr)\n\u001b[1;32m 333\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[1;32m 334\u001b[0m \u001b[38;5;66;03m# anticipate any more chunks getting created\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \u001b[38;5;66;03m# if not, raise the StopIteration\u001b[39;00m\n\u001b[1;32m 336\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_working \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_buff\u001b[38;5;241m.\u001b[39mempty:\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/loader_base.py:369\u001b[0m, in \u001b[0;36mLoaderBase.make_tensors\u001b[0;34m(self, gdf, use_row_lengths)\u001b[0m\n\u001b[1;32m 352\u001b[0m \u001b[38;5;129m@annotate\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmake_tensors\u001b[39m\u001b[38;5;124m\"\u001b[39m, color\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdarkgreen\u001b[39m\u001b[38;5;124m\"\u001b[39m, domain\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmerlin_dataloader\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 353\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmake_tensors\u001b[39m(\u001b[38;5;28mself\u001b[39m, gdf, use_row_lengths\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m 354\u001b[0m \u001b[38;5;124;03m\"\"\"Yields batches of tensors from a dataframe\u001b[39;00m\n\u001b[1;32m 355\u001b[0m \n\u001b[1;32m 356\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 367\u001b[0m \n\u001b[1;32m 368\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 369\u001b[0m tensors_by_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_convert_df_to_tensors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 370\u001b[0m rows_per_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_rows_per_batch(\u001b[38;5;28mlen\u001b[39m(gdf))\n\u001b[1;32m 372\u001b[0m tensor_batches \u001b[38;5;241m=\u001b[39m {}\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/nvtx/nvtx.py:101\u001b[0m, in \u001b[0;36mannotate.__call__..inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 100\u001b[0m libnvtx_push_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mattributes, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[0;32m--> 101\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 102\u001b[0m libnvtx_pop_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/dataloader/loader_base.py:549\u001b[0m, in \u001b[0;36mLoaderBase._convert_df_to_tensors\u001b[0;34m(self, gdf)\u001b[0m\n\u001b[1;32m 545\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 547\u001b[0m leaves, col_offsets \u001b[38;5;241m=\u001b[39m pull_apart_list(column, device\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[0;32m--> 549\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[43mleaves\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m, \u001b[38;5;28mlist\u001b[39m):\n\u001b[1;32m 550\u001b[0m leaves, nest_offsets \u001b[38;5;241m=\u001b[39m pull_apart_list(leaves, device\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m 551\u001b[0m col_offsets \u001b[38;5;241m=\u001b[39m nest_offsets\u001b[38;5;241m.\u001b[39miloc[col_offsets[:]]\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/nvtx/nvtx.py:101\u001b[0m, in \u001b[0;36mannotate.__call__..inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 100\u001b[0m libnvtx_push_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mattributes, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[0;32m--> 101\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 102\u001b[0m libnvtx_pop_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/series.py:1171\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m 1169\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miloc[arg]\n\u001b[1;32m 1170\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43marg\u001b[49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/nvtx/nvtx.py:101\u001b[0m, in \u001b[0;36mannotate.__call__..inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 100\u001b[0m libnvtx_push_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mattributes, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[0;32m--> 101\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 102\u001b[0m libnvtx_pop_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/series.py:255\u001b[0m, in \u001b[0;36m_SeriesLocIndexer.__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[1;32m 254\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 255\u001b[0m arg \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_loc_to_iloc(arg)\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mIndexError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m):\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(arg)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/series.py:294\u001b[0m, in \u001b[0;36m_SeriesLocIndexer._loc_to_iloc\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m found_index\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 294\u001b[0m found_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_frame\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39m_values\u001b[38;5;241m.\u001b[39mfind_first_value(\n\u001b[1;32m 295\u001b[0m arg, closest\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 296\u001b[0m )\n\u001b[1;32m 297\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m found_index\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mIndexError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m):\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/column/numerical.py:566\u001b[0m, in \u001b[0;36mNumericalColumn.find_first_value\u001b[0;34m(self, value, closest)\u001b[0m\n\u001b[1;32m 564\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m value \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax():\n\u001b[1;32m 565\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m--> 566\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_find_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mclosest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcudautils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfind_first\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/core/column/numerical.py:534\u001b[0m, in \u001b[0;36mNumericalColumn._find_value\u001b[0;34m(self, value, closest, find, compare)\u001b[0m\n\u001b[1;32m 532\u001b[0m found \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 533\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 534\u001b[0m found \u001b[38;5;241m=\u001b[39m \u001b[43mfind\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 535\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_array_view\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 536\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 537\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 538\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 539\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m found \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 540\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_monotonic_increasing \u001b[38;5;129;01mand\u001b[39;00m closest:\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/utils/cudautils.py:114\u001b[0m, in \u001b[0;36mfind_first\u001b[0;34m(arr, val, mask, compare)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfind_first\u001b[39m(arr, val, mask\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, compare\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meq\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 101\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;124;03m Returns the index of the first occurrence of *val* in *arr*..\u001b[39;00m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;124;03m Or the first occurrence of *arr* *compare* *val*, if *compare* is not eq\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03m compare: str ('gt', 'lt', or 'eq' (default))\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 114\u001b[0m found_col \u001b[38;5;241m=\u001b[39m \u001b[43mfind_index_of_val\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompare\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompare\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 115\u001b[0m found_col \u001b[38;5;241m=\u001b[39m found_col\u001b[38;5;241m.\u001b[39mfind_and_replace([arr\u001b[38;5;241m.\u001b[39msize], [\u001b[38;5;28;01mNone\u001b[39;00m], \u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 117\u001b[0m min_index \u001b[38;5;241m=\u001b[39m found_col\u001b[38;5;241m.\u001b[39mmin()\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/cudf/utils/cudautils.py:93\u001b[0m, in \u001b[0;36mfind_index_of_val\u001b[0;34m(arr, val, mask, compare)\u001b[0m\n\u001b[1;32m 89\u001b[0m gpu_mark_found_float\u001b[38;5;241m.\u001b[39mforall(found\u001b[38;5;241m.\u001b[39msize)(\n\u001b[1;32m 90\u001b[0m arr, val, found, arr\u001b[38;5;241m.\u001b[39msize\n\u001b[1;32m 91\u001b[0m )\n\u001b[1;32m 92\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 93\u001b[0m \u001b[43mgpu_mark_found_int\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforall\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfound\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 94\u001b[0m \u001b[43m \u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfound\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cudf\u001b[38;5;241m.\u001b[39mcore\u001b[38;5;241m.\u001b[39mcolumn\u001b[38;5;241m.\u001b[39mcolumn\u001b[38;5;241m.\u001b[39mas_column(found)\u001b[38;5;241m.\u001b[39mset_mask(mask)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/dispatcher.py:438\u001b[0m, in \u001b[0;36mForAll.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 436\u001b[0m specialized \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdispatcher\n\u001b[1;32m 437\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 438\u001b[0m specialized \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mspecialize\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 439\u001b[0m blockdim \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compute_thread_per_block(specialized)\n\u001b[1;32m 440\u001b[0m griddim \u001b[38;5;241m=\u001b[39m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mntasks \u001b[38;5;241m+\u001b[39m blockdim \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m blockdim\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/dispatcher.py:667\u001b[0m, in \u001b[0;36mCUDADispatcher.specialize\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 664\u001b[0m targetoptions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtargetoptions\n\u001b[1;32m 665\u001b[0m specialization \u001b[38;5;241m=\u001b[39m CUDADispatcher(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpy_func,\n\u001b[1;32m 666\u001b[0m targetoptions\u001b[38;5;241m=\u001b[39mtargetoptions)\n\u001b[0;32m--> 667\u001b[0m \u001b[43mspecialization\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile\u001b[49m\u001b[43m(\u001b[49m\u001b[43margtypes\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 668\u001b[0m specialization\u001b[38;5;241m.\u001b[39mdisable_compile()\n\u001b[1;32m 669\u001b[0m specialization\u001b[38;5;241m.\u001b[39m_specialized \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/dispatcher.py:796\u001b[0m, in \u001b[0;36mCUDADispatcher.compile\u001b[0;34m(self, sig)\u001b[0m\n\u001b[1;32m 794\u001b[0m kernel \u001b[38;5;241m=\u001b[39m _Kernel(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpy_func, argtypes, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtargetoptions)\n\u001b[1;32m 795\u001b[0m \u001b[38;5;66;03m# We call bind to force codegen, so that there is a cubin to cache\u001b[39;00m\n\u001b[0;32m--> 796\u001b[0m \u001b[43mkernel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbind\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cache\u001b[38;5;241m.\u001b[39msave_overload(sig, kernel)\n\u001b[1;32m 799\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madd_overload(kernel, argtypes)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/dispatcher.py:178\u001b[0m, in \u001b[0;36m_Kernel.bind\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mbind\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124;03m Force binding to current CUDA context\u001b[39;00m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 178\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_codelibrary\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cufunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/codegen.py:208\u001b[0m, in \u001b[0;36mCUDACodeLibrary.get_cufunc\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cufunc:\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cufunc\n\u001b[0;32m--> 208\u001b[0m cubin \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cubin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_capability\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 209\u001b[0m module \u001b[38;5;241m=\u001b[39m ctx\u001b[38;5;241m.\u001b[39mcreate_module_image(cubin)\n\u001b[1;32m 211\u001b[0m \u001b[38;5;66;03m# Load\u001b[39;00m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/codegen.py:181\u001b[0m, in \u001b[0;36mCUDACodeLibrary.get_cubin\u001b[0;34m(self, cc)\u001b[0m\n\u001b[1;32m 179\u001b[0m ptxes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ptxes(cc\u001b[38;5;241m=\u001b[39mcc)\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ptx \u001b[38;5;129;01min\u001b[39;00m ptxes:\n\u001b[0;32m--> 181\u001b[0m \u001b[43mlinker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_ptx\u001b[49m\u001b[43m(\u001b[49m\u001b[43mptx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m path \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_linking_files:\n\u001b[1;32m 183\u001b[0m linker\u001b[38;5;241m.\u001b[39madd_file_guess_ext(path)\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/numba/cuda/cudadrv/driver.py:2708\u001b[0m, in \u001b[0;36mCtypesLinker.add_ptx\u001b[0;34m(self, ptx, name)\u001b[0m\n\u001b[1;32m 2705\u001b[0m driver\u001b[38;5;241m.\u001b[39mcuLinkAddData(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle, enums\u001b[38;5;241m.\u001b[39mCU_JIT_INPUT_PTX,\n\u001b[1;32m 2706\u001b[0m ptxbuf, \u001b[38;5;28mlen\u001b[39m(ptx), namebuf, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 2707\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m CudaAPIError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m-> 2708\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LinkerError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (e, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39merror_log))\n", + "\u001b[0;31mLinkerError\u001b[0m: [222] Call to cuLinkAddData results in UNKNOWN_CUDA_ERROR\nptxas application ptx input, line 9; fatal : Unsupported .version 7.8; current version is '7.7'" ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.550110816955566,\n", - " 'recall_at_20': 0.2287944257259369,\n", - " 'mrr_at_20': 0.07337629050016403,\n", - " 'ndcg_at_20': 0.10753783583641052,\n", - " 'map_at_20': 0.07337629050016403,\n", - " 'precision_at_20': 0.011439722031354904,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 8.98563003540039}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -1193,7 +1374,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "cd25c97a", "metadata": {}, "outputs": [], @@ -1205,7 +1386,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "18476ff8", "metadata": {}, "outputs": [], @@ -1215,7 +1396,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "019b49e7", "metadata": {}, "outputs": [], @@ -1225,267 +1406,31 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "4d519e09", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sess_pid_seq
0[2350, 27483, 2350, 221, 223, 450]
1[26562, 3233, 20844, 20946]
2[20611, 9566, 3411, 6358, 8434, 1282, 1218]
3[749, 476]
4[53988, 54681, 20488, 26337, 42209, 56005, 263...
......
86548[6547, 5690]
86549[20613, 30652, 20613]
86550[6, 9, 6]
86551[2584, 6531, 16567, 5737, 6531, 19856, 2584, 1...
86552[4793, 10632]
\n", - "

86553 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " sess_pid_seq\n", - "0 [2350, 27483, 2350, 221, 223, 450]\n", - "1 [26562, 3233, 20844, 20946]\n", - "2 [20611, 9566, 3411, 6358, 8434, 1282, 1218]\n", - "3 [749, 476]\n", - "4 [53988, 54681, 20488, 26337, 42209, 56005, 263...\n", - "... ...\n", - "86548 [6547, 5690]\n", - "86549 [20613, 30652, 20613]\n", - "86550 [6, 9, 6]\n", - "86551 [2584, 6531, 16567, 5737, 6531, 19856, 2584, 1...\n", - "86552 [4793, 10632]\n", - "\n", - "[86553 rows x 1 columns]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "wf.fit_transform(train).compute()" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": null, "id": "34f29750", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n", - "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 110). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: t4rec_model/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: t4rec_model/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:83: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", - "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " return generic_utils.serialize_keras_object(obj)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "model_transformer.save('t4rec_model')" ] }, { "cell_type": "code", - "execution_count": 49, - "id": "e8cba91e", + "execution_count": null, + "id": "47c206ca", "metadata": {}, "outputs": [], - "source": [ - "rm -rf " - ] + "source": [] }, { "cell_type": "code", From 60054c0a98a953a6844ae3e15a547e8ffb613b27 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Thu, 13 Apr 2023 21:29:35 +1000 Subject: [PATCH 10/15] update --- examples/usecases/transformers-next-item-prediction.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/usecases/transformers-next-item-prediction.ipynb b/examples/usecases/transformers-next-item-prediction.ipynb index e864685a81..090453979d 100644 --- a/examples/usecases/transformers-next-item-prediction.ipynb +++ b/examples/usecases/transformers-next-item-prediction.ipynb @@ -1016,7 +1016,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.8.10" } }, "nbformat": 4, From 7d42d4dee1b75c62c131d8ee66bbeaae992cbc99 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Fri, 14 Apr 2023 14:14:09 +1000 Subject: [PATCH 11/15] update --- ...nd_save_model_for_benchmarking-Copy1.ipynb | 1975 +++++++++++++++++ 1 file changed, 1975 insertions(+) create mode 100644 T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb new file mode 100644 index 0000000000..74b19fa9d3 --- /dev/null +++ b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb @@ -0,0 +1,1975 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "d062ceda", + "metadata": {}, + "outputs": [], + "source": [ + "# %%bash\n", + "\n", + "# # cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", + "# cd /models && git checkout main && git pull origin main && pip install .\n", + "# cd /core && git checkout main && git pull origin main && pip install .\n", + "# cd /nvtabular && git checkout main && git pull origin main && pip install .\n", + "# cd /systems && git checkout main && git pull origin main && pip install .\n", + "# cd /dataloader && git checkout main && git pull origin main && pip install .\n", + "\n", + "# ---\n", + "# pip install matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e9929dc8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.7.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", + "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", + "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading...\n", + "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=0dd96474-79af-47bb-9148-b96d64204e14\n", + "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", + "100%|██████████| 43.4M/43.4M [00:12<00:00, 3.62MB/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease\n", + "Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease\n", + "Hit:3 http://security.ubuntu.com/ubuntu focal-security InRelease\n", + "Hit:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease\n", + "Hit:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease\n", + "Reading package lists...\n", + "Reading package lists...\n", + "Building dependency tree...\n", + "Reading state information...\n", + "unzip is already the newest version (6.0-25ubuntu1.1).\n", + "0 upgraded, 0 newly installed, 0 to remove and 98 not upgraded.\n", + "Archive: rees46_ecom_dataset_small_for_ci.zip\n", + " creating: ecom_dataset/0001/\n", + " inflating: ecom_dataset/0001/valid.parquet \n", + " extracting: ecom_dataset/0001/.zip \n", + " inflating: ecom_dataset/0001/train.parquet \n", + " inflating: ecom_dataset/0001/test.parquet \n", + " creating: ecom_dataset/0002/\n", + " inflating: ecom_dataset/0002/valid.parquet \n", + " inflating: ecom_dataset/0002/train.parquet \n", + " inflating: ecom_dataset/0002/test.parquet \n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "rm -rf ecom_dataset\n", + "mkdir -p ecom_dataset\n", + "\n", + "pip install gdown\n", + "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "apt-get update -y\n", + "apt-get install unzip -y\n", + "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4a0105a7", + "metadata": {}, + "outputs": [], + "source": [ + "# !cd /dataloader && git checkout main && git pull origin main && git checkout ce2215d8f871d0fb8c71900f7b914a226aea7c24 && pip install ." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8101aa27", + "metadata": {}, + "outputs": [], + "source": [ + "# !cd /core && git checkout main && git pull origin main && pip install ." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0f799172", + "metadata": {}, + "outputs": [], + "source": [ + "# %%writefile /core/merlin/dag/graph.py\n", + "\n", + "# #\n", + "# # Copyright (c) 2022, NVIDIA CORPORATION.\n", + "# #\n", + "# # Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# # you may not use this file except in compliance with the License.\n", + "# # You may obtain a copy of the License at\n", + "# #\n", + "# # http://www.apache.org/licenses/LICENSE-2.0\n", + "# #\n", + "# # Unless required by applicable law or agreed to in writing, software\n", + "# # distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# # See the License for the specific language governing permissions and\n", + "# # limitations under the License.\n", + "# #\n", + "\n", + "# import logging\n", + "# from collections import deque\n", + "# from typing import Dict, Optional\n", + "\n", + "# from merlin.dag.node import (\n", + "# Node,\n", + "# _combine_schemas,\n", + "# iter_nodes,\n", + "# postorder_iter_nodes,\n", + "# preorder_iter_nodes,\n", + "# )\n", + "# from merlin.schema import Schema\n", + "\n", + "# LOG = logging.getLogger(\"merlin\")\n", + "\n", + "\n", + "# class Graph:\n", + "# \"\"\"\n", + "# Represents an DAG composed of Nodes, each of which contains an operator that\n", + "# transforms dataframes or dataframe-like data\n", + "# \"\"\"\n", + "\n", + "# def __init__(self, output_node: Node, subgraphs: Optional[Dict[str, Node]] = None):\n", + "# self.output_node = output_node\n", + "# self.subgraphs = subgraphs or {}\n", + "\n", + "# parents_with_deps = self.output_node.parents_with_dependencies\n", + "# parents_with_deps.append(output_node)\n", + "\n", + "# for name, sg in self.subgraphs.items():\n", + "# if sg not in parents_with_deps:\n", + "# raise ValueError(\n", + "# f\"The output node of subgraph {name} does not exist in the provided graph.\"\n", + "# )\n", + "\n", + "# def subgraph(self, name: str) -> \"Graph\":\n", + "# if name not in self.subgraphs.keys():\n", + "# raise ValueError(f\"No subgraph named {name}. Options are: {self.subgraphs.keys()}\")\n", + "# return Graph(self.subgraphs[name])\n", + "\n", + "# @property\n", + "# def input_dtypes(self):\n", + "# if self.input_schema:\n", + "# return {\n", + "# name: col_schema.dtype\n", + "# for name, col_schema in self.input_schema.column_schemas.items()\n", + "# }\n", + "# else:\n", + "# return {}\n", + "\n", + "# @property\n", + "# def output_dtypes(self):\n", + "# if self.output_schema:\n", + "# return {\n", + "# name: col_schema.dtype\n", + "# for name, col_schema in self.output_schema.column_schemas.items()\n", + "# }\n", + "# else:\n", + "# return {}\n", + "\n", + "# @property\n", + "# def column_mapping(self):\n", + "# nodes = preorder_iter_nodes(self.output_node)\n", + "# column_mapping = self.output_node.column_mapping\n", + "# for node in list(nodes)[1:]:\n", + "# node_map = node.column_mapping\n", + "# for output_col, input_cols in column_mapping.items():\n", + "# early_inputs = []\n", + "# for input_col in input_cols:\n", + "# early_inputs += node_map.get(input_col, [input_col])\n", + "# column_mapping[output_col] = early_inputs\n", + "\n", + "# return column_mapping\n", + "\n", + "# def construct_schema(self, root_schema: Schema, preserve_dtypes=False) -> \"Graph\":\n", + "# \"\"\"\n", + "# Given the schema of a dataset to transform, determine the output schema of the graph\n", + "\n", + "# Parameters\n", + "# ----------\n", + "# root_schema : Schema\n", + "# The schema of a dataset to be transformed with this DAG\n", + "# preserve_dtypes : bool, optional\n", + "# Whether to keep any dtypes that may already be present in the schemas, by default False\n", + "\n", + "# Returns\n", + "# -------\n", + "# Graph\n", + "# This DAG after the schemas have been filled in\n", + "# \"\"\"\n", + "# nodes = list(postorder_iter_nodes(self.output_node))\n", + "\n", + "# self._compute_node_schemas(root_schema, nodes, preserve_dtypes)\n", + "# # self._validate_node_schemas(root_schema, nodes, preserve_dtypes)\n", + "\n", + "# return self\n", + "\n", + "# def _compute_node_schemas(self, root_schema, nodes, preserve_dtypes=False):\n", + "# for node in nodes:\n", + "# node.compute_schemas(root_schema, preserve_dtypes=preserve_dtypes)\n", + "\n", + "# def _validate_node_schemas(self, root_schema, nodes, strict_dtypes=False):\n", + "# for node in nodes:\n", + "# node.validate_schemas(root_schema, strict_dtypes=strict_dtypes)\n", + "\n", + "# @property\n", + "# def input_schema(self):\n", + "# # leaf_node input and output schemas are the same (aka selection)\n", + "# return _combine_schemas(self.leaf_nodes)\n", + "\n", + "# @property\n", + "# def leaf_nodes(self):\n", + "# return [node for node in postorder_iter_nodes(self.output_node) if not node.parents]\n", + "\n", + "# @property\n", + "# def output_schema(self):\n", + "# return self.output_node.output_schema\n", + "\n", + "# def _input_columns(self):\n", + "# input_cols = []\n", + "# for node in iter_nodes([self.output_node]):\n", + "# upstream_output_cols = []\n", + "\n", + "# for upstream_node in node.parents_with_dependencies:\n", + "# upstream_output_cols += upstream_node.output_columns.names\n", + "\n", + "# upstream_output_cols = _get_unique(upstream_output_cols)\n", + "# input_cols += list(set(node.input_columns.names) - set(upstream_output_cols))\n", + "\n", + "# return _get_unique(input_cols)\n", + "\n", + "# def remove_inputs(self, to_remove):\n", + "# \"\"\"\n", + "# Removes columns from a Graph\n", + "\n", + "# Starting at the leaf nodes, trickle down looking for columns to remove,\n", + "# when found remove but then must propagate the removal of any other\n", + "# output columns derived from that column.\n", + "\n", + "# Parameters\n", + "# -----------\n", + "# graph : Graph\n", + "# The graph to remove columns from\n", + "# to_remove : array_like\n", + "# A list of input column names to remove from the graph\n", + "\n", + "# Returns\n", + "# -------\n", + "# Graph\n", + "# The same graph with columns removed\n", + "# \"\"\"\n", + "# nodes_to_process = deque([(node, to_remove) for node in self.leaf_nodes])\n", + "\n", + "# while nodes_to_process:\n", + "# node, columns_to_remove = nodes_to_process.popleft()\n", + "# if node.input_schema and len(node.input_schema):\n", + "# output_columns_to_remove = node.remove_inputs(columns_to_remove)\n", + "\n", + "# for child in node.children:\n", + "# nodes_to_process.append(\n", + "# (child, list(set(to_remove + output_columns_to_remove)))\n", + "# )\n", + "\n", + "# if not len(node.input_schema):\n", + "# node.remove_child(child)\n", + "\n", + "# # remove any dependencies that do not have an output schema\n", + "# node.dependencies = [\n", + "# dep for dep in node.dependencies if dep.output_schema and len(dep.output_schema)\n", + "# ]\n", + "\n", + "# if not node.input_schema or not len(node.input_schema):\n", + "# for parent in node.parents:\n", + "# parent.remove_child(node)\n", + "# for dependency in node.dependencies:\n", + "# dependency.remove_child(node)\n", + "# del node\n", + "\n", + "# return self\n", + "\n", + "# @classmethod\n", + "# def get_nodes_by_op_type(cls, nodes, op_type):\n", + "# return set(node for node in iter_nodes(nodes) if isinstance(node.op, op_type))\n", + "\n", + "\n", + "# def _get_schemaless_nodes(nodes):\n", + "# schemaless_nodes = []\n", + "# for node in iter_nodes(nodes):\n", + "# if node.input_schema is None:\n", + "# schemaless_nodes.append(node)\n", + "\n", + "# return set(schemaless_nodes)\n", + "\n", + "\n", + "# def _get_unique(cols):\n", + "# # Need to preserve order in unique-column list\n", + "# return list({x: x for x in cols}.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ab4f272d", + "metadata": {}, + "outputs": [], + "source": [ + "# !cd /core && pip install ." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-04-13 11:21:28.090236: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-04-13 11:21:30.471061: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:30.471514: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:30.471678: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO]: sparse_operation_kit is imported\n", + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Initialize finished, communication tool: horovod\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-04-13 11:21:30.757567: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:21:30.758435: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:30.758639: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:30.758792: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:31.508591: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:31.508802: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:31.508961: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:31.509071: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-04-13 11:21:31.509079: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-04-13 11:21:31.509140: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/test.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'\n", + "seq_name = target" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of starter hyperparams\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 1\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "410ea223", + "metadata": {}, + "outputs": [], + "source": [ + "# seq_name = 'seq'\n", + "# target = seq_name" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4328f03a", + "metadata": {}, + "outputs": [], + "source": [ + "from nvtabular.inference.triton import export_tensorflow_ensemble\n", + "from nvtabular import Workflow\n", + "from nvtabular.ops import Categorify, Rename" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4571b92b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d5a9dd50", + "metadata": {}, + "outputs": [], + "source": [ + "ops = ['sess_pid_seq'] >> Categorify(dtype=np.int32) #>> Rename(name=seq_name)\n", + "\n", + "wf = Workflow(ops)\n", + "\n", + "train = wf.fit_transform(train)\n", + "valid = wf.transform(valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3116726e", + "metadata": {}, + "outputs": [], + "source": [ + "# cat rees46_schema_modified.pbtxt" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "69e8f95c", + "metadata": {}, + "outputs": [], + "source": [ + "# %%writefile rees46_schema_modified_2.pbtxt\n", + "\n", + "# feature {\n", + "# name: \"seq\"\n", + "# value_count {\n", + "# min: 2\n", + "# }\n", + "# type: INT\n", + "# int_domain {\n", + "# name: \"seq\"\n", + "# min: 1\n", + "# max: 390000\n", + "# is_categorical: true\n", + "# }\n", + "# annotation {\n", + "# tag: \"item_id\"\n", + "# tag: \"list\"\n", + "# tag: \"categorical\"\n", + "# tag: \"item\"\n", + "# }\n", + "# }" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + " \n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", + " input_block = mm.InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name(seq_name)\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "523fe2ac", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n", + "2023-04-13 11:21:38.342588: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 106s 144ms/step - loss: 7.3129 - recall_at_20: 0.1424 - mrr_at_20: 0.0802 - ndcg_at_20: 0.0939 - map_at_20: 0.0802 - precision_at_20: 0.0071 - regularization_loss: 0.0000e+00 - loss_batch: 7.3149\n", + "84/84 [==============================] - 4s 27ms/step - loss: 8.5848 - recall_at_20: 0.2229 - mrr_at_20: 0.0736 - ndcg_at_20: 0.1066 - map_at_20: 0.0736 - precision_at_20: 0.0111 - regularization_loss: 0.0000e+00 - loss_batch: 8.5971\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.584781646728516,\n", + " 'recall_at_20': 0.2308632731437683,\n", + " 'mrr_at_20': 0.07471762597560883,\n", + " 'ndcg_at_20': 0.10908268392086029,\n", + " 'map_at_20': 0.07471762597560883,\n", + " 'precision_at_20': 0.011543160304427147,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.130510330200195}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")\n", + "\n", + "# model_transformer.save('t4rec_model')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "5bd66ba8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return generic_utils.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n", + "\n", + "serving_operators = [seq_name] >> TransformWorkflow(wf) >> PredictTensorflow(model_transformer)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "3ef1e5fc", + "metadata": {}, + "outputs": [], + "source": [ + "# import merlin.models.tf as mm\n", + "# import tensorflow as tf\n", + "# tf_model_path = os.path.join('t4rec_model')\n", + "\n", + "# model = tf.keras.models.load_model(tf_model_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "e2a7b6ee", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "rm -rf /workspace/models_for_benchmarking\n", + "mkdir -p /workspace/models_for_benchmarking" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "55ad012c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_raggedproperties.domain.minproperties.domain.maxproperties.domain.nameproperties.value_count.minproperties.value_count.max
0sess_pid_seq(Tags.CATEGORICAL, Tags.ITEM, Tags.ID, Tags.IT...DType(name='int64', element_type=<ElementType....TrueTrue1390000sess_pid_seq2None
\n", + "
" + ], + "text/plain": [ + "[{'name': 'sess_pid_seq', 'tags': {, , , , }, 'properties': {'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), 'is_list': True, 'is_ragged': True}]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.schema.select_by_name('sess_pid_seq')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "1a39b4f8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return generic_utils.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ensemble import Ensemble\n", + "\n", + "ensemble = Ensemble(serving_operators, wf.input_schema)\n", + "ens_conf, node_confs = ensemble.export(\"/workspace/models_for_benchmarking\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "1720a5af", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '/workspace/models_for_benchmarking/1': No such file or directory\r\n" + ] + } + ], + "source": [ + "ls /workspace/models_for_benchmarking/1" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "d7cdc6cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "I0413 11:24:28.716029 1527 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f7f2a000000' with size 268435456\n", + "I0413 11:24:28.716361 1527 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864\n", + "I0413 11:24:28.718446 1527 model_lifecycle.cc:459] loading: 0_transformworkflowtriton:1\n", + "I0413 11:24:28.718465 1527 model_lifecycle.cc:459] loading: 1_predicttensorflowtriton:1\n", + "I0413 11:24:28.718478 1527 model_lifecycle.cc:459] loading: executor_model:1\n", + "I0413 11:24:28.924940 1527 tensorflow.cc:2536] TRITONBACKEND_Initialize: tensorflow\n", + "I0413 11:24:28.924955 1527 tensorflow.cc:2546] Triton TRITONBACKEND API version: 1.10\n", + "I0413 11:24:28.924960 1527 tensorflow.cc:2552] 'tensorflow' TRITONBACKEND API version: 1.10\n", + "I0413 11:24:28.924962 1527 tensorflow.cc:2576] backend configuration:\n", + "{\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}}\n", + "2023-04-13 11:24:30.207841: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:32.085748: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:32.086174: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:32.086365: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "I0413 11:24:33.803267 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: executor_model_0 (GPU device 0)\n", + "2023-04-13 11:24:35.316462: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:37.126873: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.127251: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.127427: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "I0413 11:24:37.157059 1527 tensorflow.cc:2642] TRITONBACKEND_ModelInitialize: 1_predicttensorflowtriton (version 1)\n", + "I0413 11:24:37.157179 1527 model_lifecycle.cc:694] successfully loaded 'executor_model' version 1\n", + "2023-04-13 11:24:37.157805: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:37.178699: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", + "2023-04-13 11:24:37.178742: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:37.178876: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:37.179781: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196068: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196289: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196570: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196747: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196909: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.197031: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-04-13 11:24:37.203975: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-04-13 11:24:37.262568: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled\n", + "2023-04-13 11:24:37.271889: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n", + "2023-04-13 11:24:37.678751: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:37.745105: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 587310 microseconds.\n", + "2023-04-13 11:24:39.105154: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:40.997532: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:40.997994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:40.998186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "I0413 11:24:42.684588 1527 tensorflow.cc:2691] TRITONBACKEND_ModelInstanceInitialize: 1_predicttensorflowtriton_0 (GPU device 0)\n", + "2023-04-13 11:24:42.684902: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:42.702205: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", + "2023-04-13 11:24:42.702239: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:42.702447: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.702659: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.702822: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.703025: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.703189: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.703311: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-04-13 11:24:42.742722: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-04-13 11:24:43.330311: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:43.395816: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 710922 microseconds.\n", + "I0413 11:24:43.395921 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: 0_transformworkflowtriton_0 (GPU device 0)\n", + "I0413 11:24:43.396107 1527 model_lifecycle.cc:694] successfully loaded '1_predicttensorflowtriton' version 1\n", + "2023-04-13 11:24:44.668497: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:46.525315: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:46.525768: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:46.525978: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "I0413 11:24:46.583396 1527 model_lifecycle.cc:694] successfully loaded '0_transformworkflowtriton' version 1\n", + "I0413 11:24:46.583508 1527 server.cc:563] \n", + "+------------------+------+\n", + "| Repository Agent | Path |\n", + "+------------------+------+\n", + "+------------------+------+\n", + "\n", + "I0413 11:24:46.583587 1527 server.cc:590] \n", + "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| Backend | Path | Config |\n", + "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| python | /opt/tritonserver/backends/python/libtriton_python.so | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n", + "| tensorflow | /opt/tritonserver/backends/tensorflow2/libtriton_tensorflow2.so | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n", + "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "I0413 11:24:46.583634 1527 server.cc:633] \n", + "+---------------------------+---------+--------+\n", + "| Model | Version | Status |\n", + "+---------------------------+---------+--------+\n", + "| 0_transformworkflowtriton | 1 | READY |\n", + "| 1_predicttensorflowtriton | 1 | READY |\n", + "| executor_model | 1 | READY |\n", + "+---------------------------+---------+--------+\n", + "\n", + "I0413 11:24:46.610538 1527 metrics.cc:864] Collecting metrics for GPU 0: Quadro RTX 8000\n", + "I0413 11:24:46.610778 1527 metrics.cc:757] Collecting CPU metrics\n", + "I0413 11:24:46.610913 1527 tritonserver.cc:2264] \n", + "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| Option | Value |\n", + "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| server_id | triton |\n", + "| server_version | 2.28.0 |\n", + "| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace logging |\n", + "| model_repository_path[0] | /workspace/models_for_benchmarking/ |\n", + "| model_control_mode | MODE_NONE |\n", + "| strict_model_config | 0 |\n", + "| rate_limit | OFF |\n", + "| pinned_memory_pool_byte_size | 268435456 |\n", + "| cuda_memory_pool_byte_size{0} | 67108864 |\n", + "| response_cache_byte_size | 0 |\n", + "| min_supported_compute_capability | 6.0 |\n", + "| strict_readiness | 1 |\n", + "| exit_timeout | 30 |\n", + "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "I0413 11:24:46.611676 1527 grpc_server.cc:4819] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0413 11:24:46.611833 1527 http_server.cc:3477] Started HTTPService at 0.0.0.0:8000\n", + "I0413 11:24:46.652586 1527 http_server.cc:184] Started Metrics Service at 0.0.0.0:8002\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-04-13 11:25:37.504455: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + } + ], + "source": [ + "import nvtabular.inference.triton as nvt_triton\n", + "import tritonclient.grpc as grpcclient\n", + "import subprocess\n", + "\n", + "subprocess.Popen(['tritonserver', '--model-repository=/workspace/models_for_benchmarking/'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f63b425", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a772eeb", + "metadata": {}, + "outputs": [], + "source": [ + "# !pkill triton" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6ed7b5a", + "metadata": {}, + "outputs": [], + "source": [ + "import tritonhttpclient\n", + "try:\n", + " triton_client = tritonhttpclient.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n", + " print(\"client created.\")\n", + "except Exception as e:\n", + " print(\"channel creation failed: \" + str(e))\n", + "triton_client.is_server_live()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10c2a62e", + "metadata": {}, + "outputs": [], + "source": [ + "validation_data.iloc[]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c2723e9", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.triton import convert_df_to_triton_input\n", + "\n", + "validation_data = valid.compute()\n", + "inputs = convert_df_to_triton_input(wf.input_schema, validation_data.iloc[:1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa9fc0dd", + "metadata": {}, + "outputs": [], + "source": [ + "inputs[0].name()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ae7eb08", + "metadata": {}, + "outputs": [], + "source": [ + "inputs[0].shape()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac3596c3", + "metadata": {}, + "outputs": [], + "source": [ + "inputs[1].name()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18f8e77d", + "metadata": {}, + "outputs": [], + "source": [ + "inputs[1].shape()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "292b58da", + "metadata": {}, + "outputs": [], + "source": [ + "validation_data.iloc[:1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8e1fd90", + "metadata": {}, + "outputs": [], + "source": [ + "wf.input_schema" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a79c58f", + "metadata": {}, + "outputs": [], + "source": [ + "import tritonclient.grpc as grpcclient\n", + "\n", + "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", + " response = client.infer('1_predicttensorflowtriton', inputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6dd51a6", + "metadata": {}, + "outputs": [], + "source": [ + "response.get_output('sess_pid_seq/categorical_output')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba6712bb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "637eb3f0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd62f641", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d1bc6530", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:absl:Function `_wrapped_model` contains input name(s) sess_pid_seq with unsupported characters which will be renamed to sess_pid_seq_1 in the SavedModel.\n", + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 110). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/0_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/0_predicttensorflowtriton/1/model.savedmodel/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:83: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return generic_utils.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ensemble import Ensemble\n", + "\n", + "ensemble = Ensemble(serving_operators, train.schema)\n", + "ens_conf, node_confs = ensemble.export(\"/workspace/models_for_benchmarking\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "8d390999", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name: \"0_predicttensorflowtriton\"\r\n", + "platform: \"tensorflow_savedmodel\"\r\n", + "input {\r\n", + " name: \"sess_pid_seq\"\r\n", + " data_type: TYPE_INT32\r\n", + " dims: -1\r\n", + " dims: 1\r\n", + "}\r\n", + "input {\r\n", + " name: \"sess_pid_seq_1\"\r\n", + " data_type: TYPE_INT32\r\n", + " dims: -1\r\n", + " dims: 1\r\n", + "}\r\n", + "output {\r\n", + " name: \"sess_pid_seq/categorical_output\"\r\n", + " data_type: TYPE_FP32\r\n", + " dims: -1\r\n", + " dims: 390001\r\n", + "}\r\n", + "parameters {\r\n", + " key: \"TF_GRAPH_TAG\"\r\n", + " value {\r\n", + " string_value: \"serve\"\r\n", + " }\r\n", + "}\r\n", + "parameters {\r\n", + " key: \"TF_SIGNATURE_DEF\"\r\n", + " value {\r\n", + " string_value: \"serving_default\"\r\n", + " }\r\n", + "}\r\n", + "backend: \"tensorflow\"\r\n" + ] + } + ], + "source": [ + "cat /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "f7fe741c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt\n" + ] + } + ], + "source": [ + "%%writefile /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt\n", + "\n", + "name: \"0_predicttensorflowtriton\"\n", + "platform: \"tensorflow_savedmodel\"\n", + "input {\n", + " name: \"sess_pid_seq\"\n", + " data_type: TYPE_INT32\n", + " dims: -1\n", + " dims: 1\n", + "}\n", + "input {\n", + " name: \"sess_pid_seq_1\"\n", + " data_type: TYPE_INT32\n", + " dims: -1\n", + " dims: 1\n", + "}\n", + "output {\n", + " name: \"sess_pid_seq/categorical_output\"\n", + " data_type: TYPE_FP32\n", + " dims: -1\n", + " dims: 390001\n", + "}\n", + "parameters {\n", + " key: \"TF_GRAPH_TAG\"\n", + " value {\n", + " string_value: \"serve\"\n", + " }\n", + "}\n", + "parameters {\n", + " key: \"TF_SIGNATURE_DEF\"\n", + " value {\n", + " string_value: \"serving_default\"\n", + " }\n", + "}\n", + "backend: \"tensorflow\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "9cfe8bca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name: \"executor_model\"\r\n", + "platform: \"merlin_executor\"\r\n", + "input {\r\n", + " name: \"sess_pid_seq__values\"\r\n", + " data_type: TYPE_INT64\r\n", + " dims: -1\r\n", + " dims: -1\r\n", + "}\r\n", + "input {\r\n", + " name: \"sess_pid_seq__lengths\"\r\n", + " data_type: TYPE_INT32\r\n", + " dims: -1\r\n", + " dims: -1\r\n", + "}\r\n", + "output {\r\n", + " name: \"sess_pid_seq/categorical_output\"\r\n", + " data_type: TYPE_FP32\r\n", + " dims: -1\r\n", + " dims: 390001\r\n", + "}\r\n", + "backend: \"python\"\r\n" + ] + } + ], + "source": [ + "cat /workspace/models_for_benchmarking/executor_model/config.pbtxt" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a659255d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting /workspace/models_for_benchmarking/executor_model/config.pbtxt\n" + ] + } + ], + "source": [ + "%%writefile /workspace/models_for_benchmarking/executor_model/config.pbtxt\n", + "\n", + "name: \"executor_model\"\n", + "platform: \"merlin_executor\"\n", + "input {\n", + " name: \"sess_pid_seq__values\"\n", + " data_type: TYPE_INT64\n", + " dims: -1\n", + " dims: -1\n", + "}\n", + "input {\n", + " name: \"sess_pid_seq__nnzs\"\n", + " data_type: TYPE_INT64\n", + " dims: -1\n", + " dims: -1\n", + "}\n", + "output {\n", + " name: \"sess_pid_seq/categorical_output\"\n", + " data_type: TYPE_FP32\n", + " dims: -1\n", + " dims: 390001\n", + "}\n", + "backend: \"python\"" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "ddf2dc55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.\r\n", + "#\r\n", + "# Redistribution and use in source and binary forms, with or without\r\n", + "# modification, are permitted provided that the following conditions\r\n", + "# are met:\r\n", + "# * Redistributions of source code must retain the above copyright\r\n", + "# notice, this list of conditions and the following disclaimer.\r\n", + "# * Redistributions in binary form must reproduce the above copyright\r\n", + "# notice, this list of conditions and the following disclaimer in the\r\n", + "# documentation and/or other materials provided with the distribution.\r\n", + "# * Neither the name of NVIDIA CORPORATION nor the names of its\r\n", + "# contributors may be used to endorse or promote products derived\r\n", + "# from this software without specific prior written permission.\r\n", + "#\r\n", + "# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY\r\n", + "# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\n", + "# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\r\n", + "# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r\n", + "# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r\n", + "# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r\n", + "# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r\n", + "# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY\r\n", + "# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r\n", + "# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\n", + "# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r\n", + "import pathlib\r\n", + "from pathlib import Path\r\n", + "\r\n", + "from merlin.dag import postorder_iter_nodes\r\n", + "from merlin.systems.dag import Ensemble\r\n", + "from merlin.systems.dag.runtimes.triton import TritonExecutorRuntime\r\n", + "from merlin.systems.triton.conversions import (\r\n", + " dict_array_to_triton_response,\r\n", + " triton_request_to_dict_array,\r\n", + ")\r\n", + "from merlin.systems.triton.utils import triton_error_handling, triton_multi_request\r\n", + "\r\n", + "\r\n", + "class TritonPythonModel:\r\n", + " \"\"\"Model for Triton Python Backend.\r\n", + "\r\n", + " Every Python model must have \"TritonPythonModel\" as the class name\r\n", + " \"\"\"\r\n", + "\r\n", + " def initialize(self, args):\r\n", + " \"\"\"Called only once when the model is being loaded. Allowing\r\n", + " the model to initialize any state associated with this model.\r\n", + "\r\n", + " Parameters\r\n", + " ----------\r\n", + " args : dict\r\n", + " Both keys and values are strings. The dictionary keys and values are:\r\n", + " * model_config: A JSON string containing the model configuration\r\n", + " * model_instance_kind: A string containing model instance kind\r\n", + " * model_instance_device_id: A string containing model instance device ID\r\n", + " * model_repository: Model repository path\r\n", + " * model_version: Model version\r\n", + " * model_name: Model name\r\n", + " \"\"\"\r\n", + " # Arg parsing\r\n", + " model_repo = args[\"model_repository\"]\r\n", + " repository_path = _parse_model_repository(model_repo)\r\n", + "\r\n", + " ensemble_path = (\r\n", + " Path(repository_path) / args[\"model_name\"] / str(args[\"model_version\"]) / \"ensemble\"\r\n", + " )\r\n", + "\r\n", + " self.ensemble = Ensemble.load(str(ensemble_path))\r\n", + "\r\n", + " for node in list(postorder_iter_nodes(self.ensemble.graph.output_node)):\r\n", + " if hasattr(node.op, \"load_artifacts\"):\r\n", + " node.op.load_artifacts(str(ensemble_path))\r\n", + "\r\n", + " @triton_multi_request\r\n", + " @triton_error_handling\r\n", + " def execute(self, request):\r\n", + " \"\"\"Receives a list of pb_utils.InferenceRequest as the only argument. This\r\n", + " function is called when an inference is requested for this model. Depending on the\r\n", + " batching configuration (e.g. Dynamic Batching) used, `requests` may contain\r\n", + " multiple requests. Every Python model, must create one pb_utils.InferenceResponse\r\n", + " for every pb_utils.InferenceRequest in `requests`. If there is an error, you can\r\n", + " set the error argument when creating a pb_utils.InferenceResponse.\r\n", + "\r\n", + " Parameters\r\n", + " ----------\r\n", + " requests : list\r\n", + " A list of pb_utils.InferenceRequest\r\n", + "\r\n", + " Returns\r\n", + " -------\r\n", + " list\r\n", + " A list of pb_utils.InferenceResponse. The length of this list must\r\n", + " be the same as `requests`\r\n", + " \"\"\"\r\n", + " inputs = triton_request_to_dict_array(request, self.ensemble.input_schema.column_names)\r\n", + " outputs = self.ensemble.transform(inputs, runtime=TritonExecutorRuntime())\r\n", + " return dict_array_to_triton_response(outputs)\r\n", + "\r\n", + "\r\n", + "def _parse_model_repository(model_repository: str) -> str:\r\n", + " \"\"\"\r\n", + " Extract the model repository path from the model_repository value\r\n", + " passed to the TritonPythonModel initialize method.\r\n", + " \"\"\"\r\n", + " # Handle bug in Tritonserver 22.06\r\n", + " # model_repository argument became path to model.py\r\n", + " # instead of path to model directory within the model repository\r\n", + " if model_repository.endswith(\".py\"):\r\n", + " return str(pathlib.Path(model_repository).parent.parent.parent)\r\n", + " else:\r\n", + " return str(pathlib.Path(model_repository).parent)\r\n" + ] + } + ], + "source": [ + "cat /workspace/models_for_benchmarking/executor_model/1/model.py" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "3d21ce62", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"versions\": {\"python\": \"3.8.10 (default, Nov 14 2022, 12:59:47) \\n[GCC 9.4.0]\"}, \"generated_timestamp\": 1679017581}" + ] + } + ], + "source": [ + "cat /workspace/models_for_benchmarking/executor_model/1/ensemble/metadata.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7998b835", + "metadata": {}, + "outputs": [], + "source": [ + "# %%writefile /workspace/models_for_benchmarking/t4r_pytorch_pt/config.pbtxt\n", + "\n", + "# name: \"t4r_pytorch_pt\"\n", + "# input {\n", + "# name: \"sess_pid_seq__values\"\n", + "# data_type: TYPE_INT64\n", + "# dims: -1\n", + "# dims: 1\n", + "# }\n", + "# input {\n", + "# name: \"sess_pid_seq__nnzs\"\n", + "# data_type: TYPE_INT64\n", + "# dims: -1\n", + "# dims: 1\n", + "# }\n", + "# output {\n", + "# name: \"output\"\n", + "# data_type: TYPE_FP32\n", + "# dims: -1\n", + "# dims: 20\n", + "# }\n", + "# backend: \"python\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 552cf323876a597a7167356b12baf471b809c4ae Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Tue, 9 May 2023 12:07:46 +1000 Subject: [PATCH 12/15] update --- ...nd_save_model_for_benchmarking-Copy1.ipynb | 13764 +++++++++++++++- 1 file changed, 13438 insertions(+), 326 deletions(-) diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb index 74b19fa9d3..c048898d04 100644 --- a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb +++ b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb @@ -2,27 +2,1528 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "d062ceda", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Models\n", + " * [new branch] asvdb_metric_tracking -> origin/asvdb_metric_tracking\n", + " * [new branch] benchmark-session-based -> origin/benchmark-session-based\n", + " * [new branch] ci/horovod -> origin/ci/horovod\n", + " * [new branch] codespell_fix -> origin/codespell_fix\n", + " 16fb4149..fcaefc3e fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", + " * [new branch] fix_datetime_issue_add_inference_on_TIS -> origin/fix_datetime_issue_add_inference_on_TIS\n", + " 95462360..7d68dc88 gh-pages -> origin/gh-pages\n", + " * [new branch] implement_review_comments -> origin/implement_review_comments\n", + " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", + " * [new branch] laiacano/concurrency -> origin/laiacano/concurrency\n", + " 835ad186..d8133b8f main -> origin/main\n", + " * [new branch] mtl_example -> origin/mtl_example\n", + " cb431a8a..b90e9a1b release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-23.04 -> origin/release-23.04\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] tf/batch_predict_fix -> origin/tf/batch_predict_fix\n", + " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", + " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", + " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", + " * [new branch] tf/dlrm_dropout_fix -> origin/tf/dlrm_dropout_fix\n", + " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", + " * [new branch] tf/fix_logq_correction -> origin/tf/fix_logq_correction\n", + " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", + " * [new branch] tf/loglossmetric_callbacks -> origin/tf/loglossmetric_callbacks\n", + " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", + " + 169f3df5...06eecddd tf/output-block -> origin/tf/output-block (forced update)\n", + " * [new branch] tf/pretrained_emb -> origin/tf/pretrained_emb\n", + " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", + " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", + " * [new branch] tf/transformer-api -> origin/tf/transformer-api\n", + " * [new branch] torch/dev -> origin/torch/dev\n", + " * [new branch] torch/masking -> origin/torch/masking\n", + " * [new branch] torch/prototype -> origin/torch/prototype\n", + " * [new branch] torch/remove-t4r-code -> origin/torch/remove-t4r-code\n", + " * [new branch] tox_github_actions_fix -> origin/tox_github_actions_fix\n", + " * [new branch] transformer-api -> origin/transformer-api\n", + " + 0a65d603...9f53e8ff update_07 -> origin/update_07 (forced update)\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v23.04.00 -> v23.04.00\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Previous HEAD position was cb431a8a Fix the serialization of `SequenceSummary` block (#927)\n", + "HEAD is now at a86201ee add masking support to SequencePredictRandom transform\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing /models\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.0.4)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.10.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.56.4)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.0.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.19.6)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.12.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.5)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (22.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.5.0)\n", + "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.5)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (45.2.0)\n", + "Requirement already satisfied: numpy<1.24,>=1.18 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.22.4)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.57.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.12.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.1.3)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.9.4)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.7.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.4.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.26.13)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.1.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.8.2)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.11.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.1.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.14.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.0.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Building wheels for collected packages: merlin-models\n", + " Building wheel for merlin-models (PEP 517): started\n", + " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-models: filename=merlin_models-23.2.0+7.ga86201ee-py3-none-any.whl size=374647 sha256=e83a617585afdc41213cc3cf69dd7c136b778260ce9dc14c37e87c4a5675372a\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-uc9xl_m5/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + "Successfully built merlin-models\n", + "Installing collected packages: merlin-models\n", + " Attempting uninstall: merlin-models\n", + " Found existing installation: merlin-models 0.11.0\n", + " Uninstalling merlin-models-0.11.0:\n", + " Successfully uninstalled merlin-models-0.11.0\n", + "Successfully installed merlin-models-23.2.0+7.ga86201ee\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was a86201ee add masking support to SequencePredictRandom transform\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is behind 'origin/main' by 75 commits, and can be fast-forwarded.\n", + " (use \"git pull\" to update your local branch)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Models\n", + " * branch main -> FETCH_HEAD\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating 835ad186..d8133b8f\n", + "Fast-forward\n", + " .github/workflows/blossom-ci.yml | 102 --\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/cpu-horovod.yml | 53 +\n", + " .github/workflows/cpu-nvtabular.yml | 10 +-\n", + " .github/workflows/cpu-systems.yml | 10 +-\n", + " .github/workflows/cpu-t4r.yml | 41 +\n", + " .github/workflows/datasets.yml | 8 +-\n", + " .github/workflows/docs-build.yaml | 2 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 12 +-\n", + " .github/workflows/implicit.yml | 8 +-\n", + " .github/workflows/lightfm.yml | 14 +-\n", + " .github/workflows/multi-gpu-ci.yml | 34 +\n", + " .github/workflows/packages.yaml | 120 ++\n", + " .github/workflows/pre-commit.yml | 8 +\n", + " .github/workflows/pytorch.yml | 85 +-\n", + " .github/workflows/release-drafter.yaml | 2 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .github/workflows/tensorflow.yml | 49 +-\n", + " .github/workflows/xgboost.yml | 8 +-\n", + " .pre-commit-config.yaml | 10 +-\n", + " MANIFEST.in | 5 +-\n", + " README.md | 2 +-\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/recipes/meta.yaml | 17 +-\n", + " docs/README.md | 46 +-\n", + " docs/source/api.rst | 99 +-\n", + " examples/01-Getting-started.ipynb | 101 +-\n", + " ...2-Merlin-Models-and-NVTabular-integration.ipynb | 13 +-\n", + " examples/03-Exploring-different-models.ipynb | 25 +-\n", + " examples/04-Exporting-ranking-models.ipynb | 9 +-\n", + " examples/05-Retrieval-Model.ipynb | 30 +-\n", + " ...-your-own-architecture-with-Merlin-Models.ipynb | 546 +++----\n", + " ...nal-ML-models-using-the-Merlin-Models-API.ipynb | 701 +++++++-\n", + " examples/images/mtl_architectures.png | Bin 0 -> 72404 bytes\n", + " ...ing-of-large-embedding-tables-by-LazyAdam.ipynb | 12 +-\n", + " ...on-based-next-item-prediction-for-fashion.ipynb | 11 +-\n", + " .../entertainment-with-pretrained-embeddings.ipynb | 8 +-\n", + " .../incremental-training-with-layer-freezing.ipynb | 275 ++--\n", + " .../multi-gpu-data-parallel-training.ipynb | 7 +-\n", + " .../multi-gpu/install_sparse_operation_kit.sh | 16 +\n", + " .../usecases/ranking_with_multitask_learning.ipynb | 1718 ++++++++++++++++++++\n", + " ...etrieval-with-hyperparameter-optimization.ipynb | 5 +-\n", + " .../transformers-next-item-prediction.ipynb | 1085 ++++++++----\n", + " .../ecommerce/booking/transformed/schema.pbtxt | 15 +-\n", + " merlin/datasets/ecommerce/small/schema.json | 7 +-\n", + " .../entertainment/movielens/100k/schema.pbtxt | 1 +\n", + " .../entertainment/movielens/1m/schema.pbtxt | 3 +-\n", + " .../entertainment/movielens/25m/schema.pbtxt | 1 +\n", + " .../entertainment/music_streaming/schema.json | 10 +-\n", + " .../entertainment/tenrec_video}/__init__.py | 0\n", + " .../entertainment/tenrec_video/schema.pbtxt | 159 ++\n", + " merlin/datasets/synthetic.py | 104 +-\n", + " .../datasets/testing/sequence_testing/schema.json | 24 +-\n", + " merlin/models/implicit/__init__.py | 115 +-\n", + " merlin/models/io.py | 2 -\n", + " merlin/models/lightfm/__init__.py | 132 +-\n", + " merlin/models/tf/__init__.py | 12 +-\n", + " merlin/models/tf/blocks/dlrm.py | 21 +-\n", + " merlin/models/tf/blocks/experts.py | 33 +-\n", + " merlin/models/tf/blocks/optimizer.py | 74 +-\n", + " merlin/models/tf/blocks/retrieval/base.py | 1 -\n", + " merlin/models/tf/core/aggregation.py | 87 +-\n", + " merlin/models/tf/core/combinators.py | 6 +-\n", + " merlin/models/tf/core/encoder.py | 54 +-\n", + " merlin/models/tf/core/tabular.py | 3 +-\n", + " merlin/models/tf/distributed/backend.py | 20 +\n", + " merlin/models/tf/distributed/embedding.py | 232 +++\n", + " merlin/models/tf/experimental/sample_weight.py | 177 ++\n", + " merlin/models/tf/inputs/base.py | 26 +-\n", + " merlin/models/tf/inputs/continuous.py | 41 +-\n", + " merlin/models/tf/inputs/embedding.py | 138 +-\n", + " merlin/models/tf/loader.py | 36 +-\n", + " merlin/models/tf/metrics/__init__.py | 31 +-\n", + " merlin/models/tf/metrics/evaluation.py | 4 +-\n", + " merlin/models/tf/metrics/topk.py | 17 +-\n", + " merlin/models/tf/models/base.py | 887 +++++++---\n", + " merlin/models/tf/models/benchmark.py | 20 +-\n", + " merlin/models/tf/models/ranking.py | 93 +-\n", + " merlin/models/tf/models/retrieval.py | 5 +\n", + " merlin/models/tf/models/utils.py | 38 +\n", + " merlin/models/tf/outputs/base.py | 27 +-\n", + " merlin/models/tf/outputs/block.py | 300 ++++\n", + " merlin/models/tf/outputs/classification.py | 14 +-\n", + " merlin/models/tf/outputs/contrastive.py | 65 +-\n", + " merlin/models/tf/outputs/regression.py | 8 +-\n", + " merlin/models/tf/outputs/sampling/base.py | 34 +-\n", + " merlin/models/tf/outputs/sampling/popularity.py | 93 +-\n", + " merlin/models/tf/outputs/topk.py | 2 -\n", + " merlin/models/tf/prediction_tasks/base.py | 15 +\n", + " .../models/tf/prediction_tasks/classification.py | 11 +-\n", + " merlin/models/tf/prediction_tasks/regression.py | 3 +-\n", + " merlin/models/tf/transformers/block.py | 61 +-\n", + " merlin/models/tf/transformers/transforms.py | 52 +-\n", + " merlin/models/tf/transforms/bias.py | 18 +-\n", + " merlin/models/tf/transforms/features.py | 579 +++++--\n", + " merlin/models/tf/transforms/negative_sampling.py | 25 +-\n", + " merlin/models/tf/transforms/sequence.py | 523 ++++--\n", + " merlin/models/tf/transforms/tensor.py | 249 +--\n", + " merlin/models/tf/utils/batch_utils.py | 8 +-\n", + " merlin/models/tf/utils/testing_utils.py | 81 +-\n", + " merlin/models/tf/utils/tf_utils.py | 85 +-\n", + " merlin/models/torch/__init__.py | 97 --\n", + " merlin/models/torch/block/base.py | 321 ----\n", + " merlin/models/torch/block/mlp.py | 95 --\n", + " merlin/models/torch/features/base.py | 23 -\n", + " merlin/models/torch/features/continuous.py | 66 -\n", + " merlin/models/torch/features/embedding.py | 497 ------\n", + " merlin/models/torch/features/tabular.py | 217 ---\n", + " merlin/models/torch/losses.py | 75 -\n", + " merlin/models/torch/model/__init__.py | 15 -\n", + " merlin/models/torch/model/base.py | 660 --------\n", + " merlin/models/torch/model/prediction_task.py | 101 --\n", + " merlin/models/torch/tabular/__init__.py | 15 -\n", + " merlin/models/torch/tabular/aggregation.py | 149 --\n", + " merlin/models/torch/tabular/base.py | 640 --------\n", + " merlin/models/torch/tabular/transformations.py | 124 --\n", + " merlin/models/torch/typing.py | 30 -\n", + " merlin/models/torch/utils/__init__.py | 15 -\n", + " merlin/models/torch/utils/data_utils.py | 376 -----\n", + " merlin/models/torch/utils/examples_utils.py | 107 --\n", + " merlin/models/torch/utils/torch_utils.py | 210 ---\n", + " merlin/models/utils/dataset.py | 59 +-\n", + " merlin/models/utils/misc_utils.py | 7 +-\n", + " merlin/models/utils/nvt_utils.py | 6 +-\n", + " merlin/models/utils/schema_utils.py | 24 +-\n", + " merlin/models/xgb/__init__.py | 1 -\n", + " pytest.ini | 15 +\n", + " requirements/base.txt | 4 +-\n", + " requirements/docs.txt | 3 +-\n", + " requirements/horovod-cpu-environment.yml | 18 +\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " requirements/horovod.txt | 1 +\n", + " requirements/tensorflow.txt | 2 +-\n", + " requirements/test.txt | 2 +-\n", + " requirements/transformers.txt | 2 +-\n", + " tests/common/tf/retrieval/retrieval_utils.py | 4 +-\n", + " tests/integration/tf/test_ci_01_getting_started.py | 20 +-\n", + " .../tf/test_ci_03_exploring_different_models.py | 8 +-\n", + " .../tf/test_ci_06_advanced_own_architecture.py | 8 +-\n", + " tests/unit/datasets/test_ecommerce.py | 27 +-\n", + " tests/unit/datasets/test_synthetic.py | 15 +-\n", + " tests/unit/implicit/test_implicit.py | 60 +-\n", + " tests/unit/lightfm/test_lightfm.py | 68 +\n", + " .../blocks/retrieval/test_matrix_factorization.py | 7 +-\n", + " tests/unit/tf/blocks/retrieval/test_two_tower.py | 9 +-\n", + " tests/unit/tf/blocks/test_cross.py | 2 -\n", + " tests/unit/tf/blocks/test_interactions.py | 6 +-\n", + " tests/unit/tf/blocks/test_mlp.py | 39 +\n", + " tests/unit/tf/blocks/test_optimizer.py | 64 +-\n", + " tests/unit/tf/core/test_base.py | 5 +-\n", + " tests/unit/tf/core/test_combinators.py | 1 +\n", + " tests/unit/tf/core/test_encoder.py | 6 +-\n", + " tests/unit/tf/core/test_prediction.py | 2 +-\n", + " tests/unit/tf/examples/test_01_getting_started.py | 8 +-\n", + " .../examples/test_03_exploring_different_models.py | 8 +-\n", + " ...test_usecase_accelerate_training_by_lazyadam.py | 1 +\n", + " ..._usecase_incremental_training_layer_freezing.py | 2 +-\n", + " ...test_usecase_ranking_with_multitask_learning.py | 46 +\n", + " ...st_usecase_transformers_next_item_prediction.py | 36 +-\n", + " .../unit/tf/experimental}/__init__.py | 0\n", + " tests/unit/tf/experimental/test_sample_weight.py | 112 ++\n", + " tests/unit/tf/horovod/__init__.py | 2 +-\n", + " tests/unit/tf/horovod/test_embedding.py | 46 +\n", + " tests/unit/tf/horovod/test_horovod.py | 10 +-\n", + " tests/unit/tf/inputs/test_base.py | 2 +-\n", + " tests/unit/tf/inputs/test_block.py | 202 +++\n", + " tests/unit/tf/inputs/test_continuous.py | 4 +-\n", + " tests/unit/tf/inputs/test_embedding.py | 41 +-\n", + " tests/unit/tf/inputs/test_tabular.py | 10 +-\n", + " tests/unit/tf/metrics/test_metrics_topk.py | 2 -\n", + " tests/unit/tf/models/test_base.py | 93 +-\n", + " tests/unit/tf/models/test_benchmark.py | 13 +-\n", + " tests/unit/tf/models/test_ranking.py | 103 +-\n", + " tests/unit/tf/models/test_retrieval.py | 35 +-\n", + " tests/unit/tf/outputs/test_base.py | 78 +-\n", + " tests/unit/tf/outputs/test_block.py | 936 +++++++++++\n", + " tests/unit/tf/outputs/test_classification.py | 69 +-\n", + " tests/unit/tf/outputs/test_contrastive.py | 28 +-\n", + " tests/unit/tf/outputs/test_sampling.py | 17 +-\n", + " tests/unit/tf/prediction_tasks/test_multi_task.py | 281 +++-\n", + " tests/unit/tf/test_loader.py | 28 +-\n", + " tests/unit/tf/transformers/test_block.py | 187 ++-\n", + " tests/unit/tf/transforms/test_features.py | 123 +-\n", + " tests/unit/tf/transforms/test_negative_sampling.py | 63 +-\n", + " tests/unit/tf/transforms/test_noise.py | 1 -\n", + " tests/unit/tf/transforms/test_sequence.py | 55 +-\n", + " tests/unit/tf/transforms/test_tensor.py | 20 +-\n", + " tests/unit/tf/utils/test_batch.py | 20 +-\n", + " tests/unit/torch/__init__.py | 18 -\n", + " tests/unit/torch/_conftest.py | 151 --\n", + " tests/unit/torch/block/__init__.py | 15 -\n", + " tests/unit/torch/block/test_base.py | 62 -\n", + " tests/unit/torch/block/test_mlp.py | 30 -\n", + " tests/unit/torch/features/__init__.py | 15 -\n", + " tests/unit/torch/features/test_continuous.py | 34 -\n", + " tests/unit/torch/features/test_embedding.py | 250 ---\n", + " tests/unit/torch/features/test_tabular.py | 84 -\n", + " tests/unit/torch/model/__init__.py | 15 -\n", + " tests/unit/torch/model/test_head.py | 92 --\n", + " tests/unit/torch/model/test_model.py | 122 --\n", + " tests/unit/torch/tabular/__init__.py | 15 -\n", + " tests/unit/torch/tabular/test_aggregation.py | 106 --\n", + " tests/unit/torch/tabular/test_tabular.py | 88 -\n", + " tests/unit/torch/tabular/test_transformations.py | 122 --\n", + " tests/unit/torch/test_dataloader_utils.py | 86 -\n", + " tests/unit/torch/test_losses.py | 53 -\n", + " tests/unit/torch/test_public_api.py | 27 -\n", + " tests/unit/torch/utils/__init__.py | 15 -\n", + " tests/unit/xgb/test_xgboost.py | 2 +-\n", + " tox.ini | 78 +-\n", + " 210 files changed, 10688 insertions(+), 8019 deletions(-)\n", + " delete mode 100644 .github/workflows/blossom-ci.yml\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " create mode 100644 .github/workflows/cpu-horovod.yml\n", + " create mode 100644 .github/workflows/cpu-t4r.yml\n", + " create mode 100644 .github/workflows/multi-gpu-ci.yml\n", + " create mode 100644 .github/workflows/packages.yaml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 examples/images/mtl_architectures.png\n", + " create mode 100644 examples/usecases/multi-gpu/install_sparse_operation_kit.sh\n", + " create mode 100644 examples/usecases/ranking_with_multitask_learning.ipynb\n", + " rename merlin/{models/torch/block => datasets/entertainment/tenrec_video}/__init__.py (100%)\n", + " create mode 100644 merlin/datasets/entertainment/tenrec_video/schema.pbtxt\n", + " create mode 100644 merlin/models/tf/distributed/embedding.py\n", + " create mode 100644 merlin/models/tf/experimental/sample_weight.py\n", + " create mode 100644 merlin/models/tf/outputs/block.py\n", + " delete mode 100644 merlin/models/torch/__init__.py\n", + " delete mode 100644 merlin/models/torch/block/base.py\n", + " delete mode 100644 merlin/models/torch/block/mlp.py\n", + " delete mode 100644 merlin/models/torch/features/base.py\n", + " delete mode 100644 merlin/models/torch/features/continuous.py\n", + " delete mode 100644 merlin/models/torch/features/embedding.py\n", + " delete mode 100644 merlin/models/torch/features/tabular.py\n", + " delete mode 100644 merlin/models/torch/losses.py\n", + " delete mode 100644 merlin/models/torch/model/__init__.py\n", + " delete mode 100644 merlin/models/torch/model/base.py\n", + " delete mode 100644 merlin/models/torch/model/prediction_task.py\n", + " delete mode 100644 merlin/models/torch/tabular/__init__.py\n", + " delete mode 100644 merlin/models/torch/tabular/aggregation.py\n", + " delete mode 100644 merlin/models/torch/tabular/base.py\n", + " delete mode 100644 merlin/models/torch/tabular/transformations.py\n", + " delete mode 100644 merlin/models/torch/typing.py\n", + " delete mode 100644 merlin/models/torch/utils/__init__.py\n", + " delete mode 100644 merlin/models/torch/utils/data_utils.py\n", + " delete mode 100644 merlin/models/torch/utils/examples_utils.py\n", + " delete mode 100644 merlin/models/torch/utils/torch_utils.py\n", + " create mode 100644 pytest.ini\n", + " create mode 100644 requirements/horovod-cpu-environment.yml\n", + " create mode 100644 tests/unit/tf/examples/test_usecase_ranking_with_multitask_learning.py\n", + " rename {merlin/models/torch/features => tests/unit/tf/experimental}/__init__.py (100%)\n", + " create mode 100644 tests/unit/tf/experimental/test_sample_weight.py\n", + " create mode 100644 tests/unit/tf/horovod/test_embedding.py\n", + " create mode 100644 tests/unit/tf/inputs/test_block.py\n", + " create mode 100644 tests/unit/tf/outputs/test_block.py\n", + " delete mode 100644 tests/unit/torch/__init__.py\n", + " delete mode 100644 tests/unit/torch/_conftest.py\n", + " delete mode 100644 tests/unit/torch/block/__init__.py\n", + " delete mode 100644 tests/unit/torch/block/test_base.py\n", + " delete mode 100644 tests/unit/torch/block/test_mlp.py\n", + " delete mode 100644 tests/unit/torch/features/__init__.py\n", + " delete mode 100644 tests/unit/torch/features/test_continuous.py\n", + " delete mode 100644 tests/unit/torch/features/test_embedding.py\n", + " delete mode 100644 tests/unit/torch/features/test_tabular.py\n", + " delete mode 100644 tests/unit/torch/model/__init__.py\n", + " delete mode 100644 tests/unit/torch/model/test_head.py\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " delete mode 100644 tests/unit/torch/model/test_model.py\n", + " delete mode 100644 tests/unit/torch/tabular/__init__.py\n", + " delete mode 100644 tests/unit/torch/tabular/test_aggregation.py\n", + " delete mode 100644 tests/unit/torch/tabular/test_tabular.py\n", + " delete mode 100644 tests/unit/torch/tabular/test_transformations.py\n", + " delete mode 100644 tests/unit/torch/test_dataloader_utils.py\n", + " delete mode 100644 tests/unit/torch/test_losses.py\n", + " delete mode 100644 tests/unit/torch/test_public_api.py\n", + " delete mode 100644 tests/unit/torch/utils/__init__.py\n", + "Processing /models\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Collecting merlin-core>=23.4.0\n", + " Downloading merlin-core-23.4.0.tar.gz (133 kB)\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Collecting merlin-dataloader>=23.4.0\n", + " Downloading merlin-dataloader-23.4.0.tar.gz (46 kB)\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (8.0.0)\n", + "Collecting dask-cuda>=22.12.0\n", + " Downloading dask_cuda-23.4.0-py3-none-any.whl (125 kB)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.56.4)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.12.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (11.4.1)\n", + "Collecting fsspec>=2022.7.1\n", + " Downloading fsspec-2023.5.0-py3-none-any.whl (160 kB)\n", + "Collecting dask>=2022.11.1\n", + " Downloading dask-2023.4.1-py3-none-any.whl (1.2 MB)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.22.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (22.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (3.19.6)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.3.5)\n", + "Collecting distributed>=2022.11.1\n", + " Downloading distributed-2023.4.1-py3-none-any.whl (962 kB)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.2.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (4.64.1)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.39.1)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.3.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.12.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (8.1.3)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.2.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.3.0)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.8.2)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (3.1.2)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.7.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (5.9.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.0.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.4.0)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.26.13)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.1)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.0.4)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.2.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.0.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (3.11.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.14.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.1.1)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.0.4)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (4.0.0)\n", + "Building wheels for collected packages: merlin-models, merlin-core, merlin-dataloader\n", + " Building wheel for merlin-models (PEP 517): started\n", + " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-models: filename=merlin_models-23.5.dev0+12.gd8133b8f-py3-none-any.whl size=343289 sha256=1f20f65acef288535cc4e5bca6de216485c546156d707b17b3bb9b8ceedc3ec7\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-0prgr6hn/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + " Building wheel for merlin-core (PEP 517): started\n", + " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-core: filename=merlin_core-23.4.0-py3-none-any.whl size=159556 sha256=9a716886c9862c32bd19979d286f32eb664022c85bcee19ca2d762fa014c8e85\n", + " Stored in directory: /root/.cache/pip/wheels/42/ef/87/2c64bce8c3064a2c4e399933df4eda4838939355698ff8f7c7\n", + " Building wheel for merlin-dataloader (PEP 517): started\n", + " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-23.4.0-py3-none-any.whl size=34732 sha256=a7853a487205c4a6fdf99d03bda0cacba559264387e507e2f8d6cd87dc471c80\n", + " Stored in directory: /root/.cache/pip/wheels/90/b0/66/48e52cc29f544ffbd105154b8be0901b5bb80cc85842b778fc\n", + "Successfully built merlin-models merlin-core merlin-dataloader\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty requires cupy-cuda118<12,>=9.5.0, which is not installed.\n", + "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty requires cupy-cuda118<12,>=9.5.0, which is not installed.\n", + "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement dask==2022.7.1, but you'll have dask 2023.4.1 which is incompatible.\n", + "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement distributed==2022.7.1, but you'll have distributed 2023.4.1 which is incompatible.\n", + "ERROR: dask-cuda 23.4.0 has requirement dask==2023.3.2, but you'll have dask 2023.4.1 which is incompatible.\n", + "ERROR: dask-cuda 23.4.0 has requirement distributed==2023.3.2.1, but you'll have distributed 2023.4.1 which is incompatible.\n", + "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement cuda-python<11.7.1,>=11.5, but you'll have cuda-python 11.8.1 which is incompatible.\n", + "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement protobuf<3.21.0a0,>=3.20.1, but you'll have protobuf 3.19.6 which is incompatible.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Installing collected packages: fsspec, dask, distributed, dask-cuda, merlin-core, merlin-dataloader, merlin-models\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2022.5.0\n", + " Uninstalling fsspec-2022.5.0:\n", + " Successfully uninstalled fsspec-2022.5.0\n", + " Attempting uninstall: dask\n", + " Found existing installation: dask 2022.7.1\n", + " Uninstalling dask-2022.7.1:\n", + " Successfully uninstalled dask-2022.7.1\n", + " Attempting uninstall: distributed\n", + " Found existing installation: distributed 2022.7.1\n", + " Uninstalling distributed-2022.7.1:\n", + " Successfully uninstalled distributed-2022.7.1\n", + " Attempting uninstall: dask-cuda\n", + " Found existing installation: dask-cuda 22.8.0a0+36.g9860cad\n", + " Uninstalling dask-cuda-22.8.0a0+36.g9860cad:\n", + " Successfully uninstalled dask-cuda-22.8.0a0+36.g9860cad\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 0.10.0\n", + " Uninstalling merlin-core-0.10.0:\n", + " Successfully uninstalled merlin-core-0.10.0\n", + " Attempting uninstall: merlin-dataloader\n", + " Found existing installation: merlin-dataloader 0.0.4\n", + " Uninstalling merlin-dataloader-0.0.4:\n", + " Successfully uninstalled merlin-dataloader-0.0.4\n", + " Attempting uninstall: merlin-models\n", + " Found existing installation: merlin-models 23.2.0+7.ga86201ee\n", + " Uninstalling merlin-models-23.2.0+7.ga86201ee:\n", + " Successfully uninstalled merlin-models-23.2.0+7.ga86201ee\n", + "Successfully installed dask-2023.4.1 dask-cuda-23.4.0 distributed-2023.4.1 fsspec-2023.5.0 merlin-core-23.4.0 merlin-dataloader-23.4.0 merlin-models-23.5.dev0+12.gd8133b8f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was 2fc6889 add schema parameter to the `repartition` method (#192)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/core\n", + " * branch main -> FETCH_HEAD\n", + " cd96ca5f..a0bcd30f main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating cd96ca5f..a0bcd30f\n", + "Fast-forward\n", + " .github/actionlint.yaml | 5 +\n", + " .github/release-drafter.yml | 44 +--\n", + " .github/workflows/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", + " .../ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", + " .../workflows/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", + " .../workflows/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/workflows/ISSUE_TEMPLATE/task.md | 5 +-\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/cpu-ci.yml | 138 +-------\n", + " .github/workflows/cpu-models.yml | 44 ---\n", + " .github/workflows/cpu-nvtabular.yml | 44 ---\n", + " .github/workflows/cpu-systems.yml | 44 ---\n", + " .github/workflows/docs-preview-pr.yaml | 4 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 51 +--\n", + " .github/workflows/gpu-ci.yml | 52 ++-\n", + " .github/workflows/lint.yaml | 11 +-\n", + " .github/workflows/merlin.yml | 35 ++\n", + " .github/workflows/packages.yaml | 154 ++++++++\n", + " .github/workflows/release-drafter.yaml | 4 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .github/workflows/tox.yml | 38 ++\n", + " .pre-commit-config.yaml | 55 +--\n", + " .prettierignore | 2 +\n", + " CLA.md | 9 +-\n", + " CONTRIBUTING.md | 28 +-\n", + " README.md | 68 ++--\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/recipe/meta.yaml | 4 +-\n", + " docs/README.md | 49 ++-\n", + " merlin/core/compat/__init__.py | 143 ++++++++\n", + " merlin/core/compat/tensorflow.py | 92 +++++\n", + " merlin/core/compat/torch.py | 22 ++\n", + " merlin/core/dispatch.py | 245 ++++++++-----\n", + " merlin/core/has_gpu.py | 46 +++\n", + " merlin/core/utils.py | 88 +----\n", + " merlin/dag/__init__.py | 1 +\n", + " merlin/dag/base_operator.py | 30 +-\n", + " merlin/dag/dictarray.py | 3 +-\n", + " merlin/dag/executors.py | 242 +++++++------\n", + " merlin/dag/graph.py | 20 ++\n", + " merlin/dag/node.py | 5 +-\n", + " merlin/dag/selector.py | 10 +-\n", + " merlin/dag/utils.py | 69 ++++\n", + " merlin/dispatch/lazy.py | 156 +++++++++\n", + " merlin/dtypes/__init__.py | 61 ++++\n", + " merlin/dtypes/aliases.py | 53 +++\n", + " merlin/dtypes/base.py | 179 ++++++++++\n", + " merlin/dtypes/mapping.py | 177 ++++++++++\n", + " .../compat.py => dtypes/mappings/__init__.py} | 17 +-\n", + " merlin/dtypes/mappings/cudf.py | 61 ++++\n", + " merlin/dtypes/mappings/merlin.py | 51 +++\n", + " merlin/dtypes/mappings/numpy.py | 52 +++\n", + " merlin/dtypes/mappings/pandas.py | 38 ++\n", + " merlin/dtypes/mappings/python.py | 28 ++\n", + " merlin/dtypes/mappings/tf.py | 52 +++\n", + " merlin/dtypes/mappings/torch.py | 43 +++\n", + " merlin/dtypes/mappings/triton.py | 53 +++\n", + " merlin/dtypes/registry.py | 136 ++++++++\n", + " merlin/dtypes/shape.py | 200 +++++++++++\n", + " merlin/io/__init__.py | 2 +-\n", + " merlin/io/avro.py | 6 +-\n", + " merlin/io/csv.py | 9 +-\n", + " merlin/io/dask.py | 74 +++-\n", + " merlin/io/dataframe_engine.py | 6 +-\n", + " merlin/io/dataset.py | 112 ++++--\n", + " merlin/io/fsspec_utils.py | 16 +-\n", + " merlin/io/parquet.py | 25 +-\n", + " merlin/io/shuffle.py | 13 +-\n", + " merlin/io/worker.py | 104 +++---\n", + " merlin/io/writer.py | 7 +-\n", + " merlin/io/writer_factory.py | 10 +-\n", + " merlin/schema/io/tensorflow_metadata.py | 115 ++++--\n", + " merlin/schema/schema.py | 331 +++++++++++-------\n", + " merlin/schema/tags.py | 7 +-\n", + " merlin/table/__init__.py | 24 ++\n", + " merlin/table/conversions.py | 226 ++++++++++++\n", + " merlin/table/cupy_column.py | 108 ++++++\n", + " merlin/table/numpy_column.py | 122 +++++++\n", + " merlin/table/tensor_column.py | 261 ++++++++++++++\n", + " merlin/table/tensor_table.py | 294 ++++++++++++++++\n", + " merlin/table/tensorflow_column.py | 173 +++++++++\n", + " merlin/table/torch_column.py | 135 +++++++\n", + " requirements-gpu.txt | 2 +-\n", + " requirements.txt | 13 +-\n", + " tests/conftest.py | 35 +-\n", + " tests/unit/core/test_dispatch.py | 43 ++-\n", + " tests/unit/core/test_protocols.py | 10 +-\n", + " tests/unit/core/test_version.py | 2 +\n", + " tests/unit/dag/test_column_selector.py | 6 +\n", + " tests/unit/dag/test_dag_utils.py | 31 ++\n", + " tests/unit/dispatch/test_lazy_dispatch.py | 61 ++++\n", + " tests/unit/dtypes/test_cudf.py | 30 ++\n", + " tests/unit/dtypes/test_module.py | 61 ++++\n", + " tests/unit/dtypes/test_shape.py | 222 ++++++++++++\n", + " tests/unit/io/test_avro.py | 8 +-\n", + " tests/unit/io/test_dataset.py | 51 +++\n", + " tests/unit/io/test_io.py | 95 ++++-\n", + " tests/unit/io/test_worker.py | 142 ++++++++\n", + " tests/unit/schema/test_column_schemas.py | 142 +++++---\n", + " tests/unit/schema/test_schema.py | 60 +++-\n", + " tests/unit/schema/test_schema_io.py | 54 ++-\n", + " tests/unit/table/test_convert_column.py | 164 +++++++++\n", + " tests/unit/table/test_tensor_column.py | 262 ++++++++++++++\n", + " tests/unit/table/test_tensor_table.py | 387 +++++++++++++++++++++\n", + " tests/unit/utils/test_utils.py | 16 +-\n", + " tox.ini | 49 ++-\n", + " 106 files changed, 6299 insertions(+), 1146 deletions(-)\n", + " create mode 100644 .github/actionlint.yaml\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " delete mode 100644 .github/workflows/cpu-models.yml\n", + " delete mode 100644 .github/workflows/cpu-nvtabular.yml\n", + " delete mode 100644 .github/workflows/cpu-systems.yml\n", + " create mode 100644 .github/workflows/merlin.yml\n", + " create mode 100644 .github/workflows/packages.yaml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 .github/workflows/tox.yml\n", + " create mode 100644 .prettierignore\n", + " create mode 100644 merlin/core/compat/__init__.py\n", + " create mode 100644 merlin/core/compat/tensorflow.py\n", + " create mode 100644 merlin/core/compat/torch.py\n", + " create mode 100644 merlin/core/has_gpu.py\n", + " create mode 100644 merlin/dag/utils.py\n", + " create mode 100644 merlin/dispatch/lazy.py\n", + " create mode 100644 merlin/dtypes/__init__.py\n", + " create mode 100644 merlin/dtypes/aliases.py\n", + " create mode 100644 merlin/dtypes/base.py\n", + " create mode 100644 merlin/dtypes/mapping.py\n", + " rename merlin/{core/compat.py => dtypes/mappings/__init__.py} (60%)\n", + " create mode 100644 merlin/dtypes/mappings/cudf.py\n", + " create mode 100644 merlin/dtypes/mappings/merlin.py\n", + " create mode 100644 merlin/dtypes/mappings/numpy.py\n", + " create mode 100644 merlin/dtypes/mappings/pandas.py\n", + " create mode 100644 merlin/dtypes/mappings/python.py\n", + " create mode 100644 merlin/dtypes/mappings/tf.py\n", + " create mode 100644 merlin/dtypes/mappings/torch.py\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " create mode 100644 merlin/dtypes/mappings/triton.py\n", + " create mode 100644 merlin/dtypes/registry.py\n", + " create mode 100644 merlin/dtypes/shape.py\n", + " create mode 100644 merlin/table/__init__.py\n", + " create mode 100644 merlin/table/conversions.py\n", + " create mode 100644 merlin/table/cupy_column.py\n", + " create mode 100644 merlin/table/numpy_column.py\n", + " create mode 100644 merlin/table/tensor_column.py\n", + " create mode 100644 merlin/table/tensor_table.py\n", + " create mode 100644 merlin/table/tensorflow_column.py\n", + " create mode 100644 merlin/table/torch_column.py\n", + " create mode 100644 tests/unit/dag/test_dag_utils.py\n", + " create mode 100644 tests/unit/dispatch/test_lazy_dispatch.py\n", + " create mode 100644 tests/unit/dtypes/test_cudf.py\n", + " create mode 100644 tests/unit/dtypes/test_module.py\n", + " create mode 100644 tests/unit/dtypes/test_shape.py\n", + " create mode 100644 tests/unit/io/test_dataset.py\n", + " create mode 100644 tests/unit/io/test_worker.py\n", + " create mode 100644 tests/unit/table/test_convert_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_table.py\n", + "Processing /core\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.2.5)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.3.5)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (0.56.4)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (23.4.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (11.4.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (22.0)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.4.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (3.19.6)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.4.1)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.22.4)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (8.0.0)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.12.0)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.5.0)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (0.4.3)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (2.8.2)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (45.2.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core==0.9.0+125.ga0bcd30f) (2.2.0)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (3.1.2)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (5.9.4)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.26.13)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.1)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.4.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.7.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.0.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.2.0)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.0.4)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (8.1.3)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (0.12.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (4.1.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (3.11.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core==0.9.0+125.ga0bcd30f) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.1.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (4.0.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (6.0.1)\n", + "Building wheels for collected packages: merlin-core\n", + " Building wheel for merlin-core (PEP 517): started\n", + " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-core: filename=merlin_core-0.9.0+125.ga0bcd30f-py3-none-any.whl size=161449 sha256=57d8552cb7abbed6b1d1b2860391c64e7dfea045c442fc0f94c0fc940aed7e3d\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-0yemn26u/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + "Successfully built merlin-core\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: merlin-models 23.5.dev0+12.gd8133b8f has requirement merlin-core>=23.4.0, but you'll have merlin-core 0.9.0+125.ga0bcd30f which is incompatible.\n", + "ERROR: merlin-dataloader 23.4.0 has requirement merlin-core>=23.4.0, but you'll have merlin-core 0.9.0+125.ga0bcd30f which is incompatible.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Installing collected packages: merlin-core\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 23.4.0\n", + " Uninstalling merlin-core-23.4.0:\n", + " Successfully uninstalled merlin-core-23.4.0\n", + "Successfully installed merlin-core-0.9.0+125.ga0bcd30f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was 020b24b7 Fix output error occurring due to check if it is a dict or not (#1742)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/NVTabular\n", + " * branch main -> FETCH_HEAD\n", + " c5bc4098..67136eba main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating c5bc4098..67136eba\n", + "Fast-forward\n", + " .github/ISSUE_TEMPLATE/bug_report.md | 11 +-\n", + " .github/ISSUE_TEMPLATE/documentation-request.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/feature_request.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/operator_request.md | 14 +-\n", + " .github/ISSUE_TEMPLATE/research_question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/task.md | 4 +-\n", + " .github/release-drafter.yml | 44 ++--\n", + " .github/workflows/blossom-ci.yml | 230 ++++++++++-----------\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/conda-env-create.yml | 30 +--\n", + " .github/workflows/cpu-ci.yml | 138 -------------\n", + " .github/workflows/cpu-packages.yml | 179 ++++++++++++++++\n", + " .github/workflows/cpu-tests.yml | 75 +++++++\n", + " .github/workflows/docs-preview-pr.yaml | 4 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 30 ---\n", + " .github/workflows/gpu-tests.yml | 34 +++\n", + " .github/workflows/lint.yaml | 4 +\n", + " .github/workflows/release-drafter.yaml | 2 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .gitlab-ci.yml | 23 +--\n", + " .pre-commit-config.yaml | 47 +++--\n", + " .prettierignore | 2 +\n", + " CHANGELOG.md | 187 ++++++++---------\n", + " CONTRIBUTING.md | 30 +--\n", + " README.md | 48 ++---\n", + " bench/datasets/tools/nvt_etl.py | 4 +-\n", + " bench/datasets/tools/train_tensorflow.py | 1 -\n", + " bench/examples/MultiGPUBench.md | 67 +++---\n", + " bench/examples/dask-nvtabular-criteo-benchmark.py | 4 +-\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/environments/nvtabular_aws_sagemaker.yml | 2 +-\n", + " conda/recipes/meta.yaml | 2 +-\n", + " cpp/nvtabular/inference/categorify.cc | 10 +\n", + " docs/README.md | 29 ++-\n", + " docs/source/core_features.md | 48 ++---\n", + " docs/source/resources/architecture.md | 17 +-\n", + " docs/source/resources/cloud_integration.md | 24 ++-\n", + " docs/source/resources/links.md | 40 ++--\n", + " docs/source/toc.yaml | 12 +-\n", + " examples/01-Getting-started.ipynb | 5 +-\n", + " examples/02-Advanced-NVTabular-workflow.ipynb | 5 +-\n", + " .../03-Running-on-multiple-GPUs-or-on-CPU.ipynb | 24 ++-\n", + " examples/README.md | 1 +\n", + " .../tensorflow/tfrecords_to_parquet.py | 9 +-\n", + " nvtabular/inference/__init__.py | 4 +-\n", + " nvtabular/inference/triton/data_conversions.py | 24 +--\n", + " nvtabular/inference/triton/ensemble.py | 86 ++------\n", + " nvtabular/inference/triton/model/model_pt.py | 1 -\n", + " nvtabular/inference/workflow/hugectr.py | 2 +-\n", + " nvtabular/loader/backend.py | 31 +--\n", + " nvtabular/loader/tensorflow.py | 1 +\n", + " nvtabular/ops/categorify.py | 4 +-\n", + " nvtabular/ops/column_similarity.py | 42 ++--\n", + " nvtabular/ops/groupby.py | 35 ++--\n", + " nvtabular/ops/join_external.py | 7 +-\n", + " nvtabular/ops/join_groupby.py | 18 +-\n", + " nvtabular/ops/list_slice.py | 22 +-\n", + " nvtabular/ops/moments.py | 2 -\n", + " nvtabular/ops/reduce_dtype_size.py | 9 +-\n", + " nvtabular/ops/target_encoding.py | 2 +-\n", + " nvtabular/ops/value_counts.py | 14 +-\n", + " nvtabular/tools/data_gen.py | 31 ++-\n", + " nvtabular/utils.py | 2 +-\n", + " nvtabular/workflow/workflow.py | 169 +++++++++++++--\n", + " requirements-test.txt | 2 -\n", + " requirements/base.txt | 4 +-\n", + " requirements/test.txt | 15 +-\n", + " setup.py | 5 +\n", + " tests/conftest.py | 33 ++-\n", + " .../test_02-Advanced-NVTabular-workflow.py | 17 +-\n", + " .../test_03-Running-on-multiple-GPUs-or-on-CPU.py | 11 +-\n", + " tests/unit/loader/test_tf_dataloader.py | 206 +++---------------\n", + " tests/unit/loader/test_torch_dataloader.py | 79 ++-----\n", + " tests/unit/ops/test_categorify.py | 36 +++-\n", + " tests/unit/ops/test_column_similarity.py | 3 +-\n", + " tests/unit/ops/test_drop_low_cardinality.py | 7 +-\n", + " tests/unit/ops/test_groupyby.py | 9 +-\n", + " tests/unit/ops/test_join.py | 11 +-\n", + " tests/unit/ops/test_lambda.py | 28 ++-\n", + " tests/unit/ops/test_ops.py | 12 +-\n", + " tests/unit/ops/test_ops_schema.py | 25 ++-\n", + " tests/unit/ops/test_reduce_dtype_size.py | 7 +-\n", + " tests/unit/ops/test_target_encode.py | 11 +-\n", + " tests/unit/ops/test_value_count.py | 2 +\n", + " tests/unit/test_dask_nvt.py | 5 +-\n", + " tests/unit/test_s3.py | 8 +-\n", + " tests/unit/test_tf4rec.py | 11 +-\n", + " tests/unit/test_triton_inference.py | 3 +-\n", + " tests/unit/workflow/test_cpu_workflow.py | 6 +-\n", + " tests/unit/workflow/test_workflow.py | 92 ++++++++-\n", + " tox.ini | 10 +-\n", + " 93 files changed, 1448 insertions(+), 1196 deletions(-)\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " delete mode 100644 .github/workflows/cpu-ci.yml\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .github/workflows/cpu-tests.yml\n", + " delete mode 100644 .github/workflows/gpu-ci.yml\n", + " create mode 100644 .github/workflows/gpu-tests.yml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 .prettierignore\n", + " delete mode 100644 requirements-test.txt\n", + "Processing /nvtabular\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+66.g67136eba) (23.4.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+66.g67136eba) (1.9.3)\n", + "Processing /root/.cache/pip/wheels/42/ef/87/2c64bce8c3064a2c4e399933df4eda4838939355698ff8f7c7/merlin_core-23.4.0-py3-none-any.whl\n", + "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+66.g67136eba) (1.22.4)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.56.4)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.5)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.12.0)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.0.0)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.64.1)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.19.6)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (23.4.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (11.4.1)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.5.0)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.5)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (22.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (5.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.57.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.12.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.1.3)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (5.9.4)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.4)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.1)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.26.13)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.1.2)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.4.0)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.7.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2022.7)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.11.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0.4)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.14.0)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0.1)\n", + "Building wheels for collected packages: nvtabular\n", + " Building wheel for nvtabular (PEP 517): started\n", + " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", + " Created wheel for nvtabular: filename=nvtabular-1.6.0+66.g67136eba-cp38-cp38-linux_x86_64.whl size=259850 sha256=957958ecd0f9149dbe203eb5e2a3d1b5ec128421aee4e31572f4ca8574131719\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-btpmur92/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + "Successfully built nvtabular\n", + "Installing collected packages: merlin-core, nvtabular\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 0.9.0+125.ga0bcd30f\n", + " Uninstalling merlin-core-0.9.0+125.ga0bcd30f:\n", + " Successfully uninstalled merlin-core-0.9.0+125.ga0bcd30f\n", + " Attempting uninstall: nvtabular\n", + " Found existing installation: nvtabular 1.8.0\n", + " Uninstalling nvtabular-1.8.0:\n", + " Successfully uninstalled nvtabular-1.8.0\n", + "Successfully installed merlin-core-23.4.0 nvtabular-1.6.0+66.g67136eba\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was feaf748 adding async tf strategy for gpu memory (#264)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/systems\n", + " * branch main -> FETCH_HEAD\n", + " 20bb231..2b1b90b main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating 20bb231..2b1b90b\n", + "Fast-forward\n", + " .github/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", + " .github/ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", + " .github/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", + " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/task.md | 5 +-\n", + " .github/release-drafter.yml | 44 +-\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/cpu-ci.yml | 128 ++--\n", + " .github/workflows/docs-preview-pr.yaml | 6 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 40 +-\n", + " .github/workflows/lint.yaml | 18 +-\n", + " .github/workflows/packages.yaml | 118 ++++\n", + " .github/workflows/postmerge-cpu.yml | 60 ++\n", + " .github/workflows/postmerge-gpu.yml | 27 +\n", + " .github/workflows/release-drafter.yml | 4 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .pre-commit-config.yaml | 71 +-\n", + " .prettierignore | 2 +\n", + " CLA.md | 9 +-\n", + " CONTRIBUTING.md | 2 +-\n", + " README.md | 2 +-\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/recipes/meta.yaml | 18 +-\n", + " docs/README.md | 53 +-\n", + " ...ing-An-Implicit-Model-With-Merlin-Systems.ipynb | 5 +-\n", + " ...ving-An-XGboost-Model-With-Merlin-Systems.ipynb | 5 +-\n", + " ...erving-Ranking-Models-With-Merlin-Systems.ipynb | 5 +-\n", + " merlin/systems/dag/__init__.py | 2 -\n", + " merlin/systems/dag/dictarray.py | 345 ----------\n", + " merlin/systems/dag/ensemble.py | 2 +-\n", + " merlin/systems/dag/node.py | 29 +-\n", + " merlin/systems/dag/op_runner.py | 68 --\n", + " merlin/systems/dag/ops/__init__.py | 22 +-\n", + " merlin/systems/dag/ops/faiss.py | 116 +---\n", + " merlin/systems/dag/ops/feast.py | 110 +---\n", + " merlin/systems/dag/ops/fil.py | 74 +--\n", + " merlin/systems/dag/ops/implicit.py | 84 +--\n", + " merlin/systems/dag/ops/operator.py | 216 +-----\n", + " merlin/systems/dag/ops/pytorch.py | 23 +-\n", + " merlin/systems/dag/ops/session_filter.py | 72 +-\n", + " merlin/systems/dag/ops/softmax_sampling.py | 61 +-\n", + " merlin/systems/dag/ops/tensorflow.py | 143 ++--\n", + " merlin/systems/dag/ops/unroll_features.py | 36 +-\n", + " merlin/systems/dag/ops/workflow.py | 29 +-\n", + " merlin/systems/dag/runtimes/triton/ops/fil.py | 51 +-\n", + " merlin/systems/dag/runtimes/triton/ops/operator.py | 84 ++-\n", + " merlin/systems/dag/runtimes/triton/ops/pytorch.py | 27 +-\n", + " .../systems/dag/runtimes/triton/ops/tensorflow.py | 41 +-\n", + " merlin/systems/dag/runtimes/triton/ops/workflow.py | 132 +++-\n", + " merlin/systems/dag/runtimes/triton/runtime.py | 36 +-\n", + " merlin/systems/triton/__init__.py | 118 ++--\n", + " merlin/systems/triton/conversions.py | 198 ++++--\n", + " merlin/systems/triton/export.py | 731 +--------------------\n", + " merlin/systems/triton/models/executor_model.py | 46 +-\n", + " merlin/systems/triton/models/oprunner_model.py | 129 ----\n", + " merlin/systems/triton/models/pytorch_model.py | 139 ++--\n", + " merlin/systems/triton/models/workflow_model.py | 56 +-\n", + " merlin/systems/triton/utils.py | 58 +-\n", + " merlin/systems/workflow/base.py | 30 +-\n", + " merlin/systems/workflow/hugectr.py | 87 ---\n", + " merlin/systems/workflow/pytorch.py | 46 --\n", + " merlin/systems/workflow/tensorflow.py | 68 --\n", + " pytest.ini | 7 +-\n", + " requirements/test.txt | 2 +-\n", + " tests/conftest.py | 36 +-\n", + " ...erving_an_implicit_model_with_merlin_systems.py | 12 +-\n", + " ...serving_an_xgboost_model_with_merlin_systems.py | 4 +-\n", + " tests/integration/tf/test_transformer_model.py | 103 +++\n", + " .../systems/dag/test_column.py => test_passing.py} | 15 +-\n", + " tests/unit/systems/dag/ops/test_ops.py | 101 ++-\n", + " .../dag/runtimes/local/ops/fil/test_lightgbm.py | 15 +-\n", + " .../dag/runtimes/local/ops/fil/test_sklearn.py | 15 +-\n", + " .../dag/runtimes/local/ops/fil/test_xgboost.py | 18 +-\n", + " .../runtimes/local/ops/nvtabular/test_ensemble.py | 10 +-\n", + " .../runtimes/local/ops/tensorflow/test_ensemble.py | 35 +-\n", + " .../dag/runtimes/local/ops/torch/test_op.py | 6 +-\n", + " .../triton/ops/fil/test_lightgbm_triton.py | 11 +-\n", + " .../runtimes/triton/ops/fil/test_sklearn_triton.py | 4 +-\n", + " .../runtimes/triton/ops/fil/test_xgboost_triton.py | 7 +-\n", + " .../dag/runtimes/triton/ops/torch/test_op.py | 4 +-\n", + " .../runtimes/triton/ops/workflow/test_ensemble.py | 305 ++++++++-\n", + " .../systems/dag/runtimes/triton/test_triton.py | 21 +-\n", + " tests/unit/systems/dag/test_dict_array.py | 76 ---\n", + " tests/unit/systems/dag/test_ensemble.py | 4 +-\n", + " tests/unit/systems/dag/test_executors.py | 12 +-\n", + " tests/unit/systems/dag/test_op_runner.py | 210 ------\n", + " tests/unit/systems/ops/embedding_op.py | 56 ++\n", + " tests/unit/systems/ops/faiss/test_executor.py | 25 +-\n", + " tests/unit/systems/ops/feast/test_op.py | 76 +--\n", + " tests/unit/systems/ops/fil/test_ensemble.py | 21 +-\n", + " tests/unit/systems/ops/fil/test_forest.py | 47 +-\n", + " tests/unit/systems/ops/fil/test_op.py | 106 ++-\n", + " tests/unit/systems/ops/implicit/test_executor.py | 4 +-\n", + " tests/unit/systems/ops/implicit/test_op.py | 51 +-\n", + " tests/unit/systems/ops/padding_op.py | 62 ++\n", + " tests/unit/systems/ops/tf/test_ensemble.py | 15 +-\n", + " tests/unit/systems/ops/tf/test_op.py | 6 +-\n", + " tests/unit/systems/ops/torch/test_ensemble.py | 97 +++\n", + " tests/unit/systems/utils/ops.py | 13 +-\n", + " tests/unit/systems/utils/tf.py | 65 +-\n", + " tests/unit/test_export.py | 77 ---\n", + " tox.ini | 42 +-\n", + " 103 files changed, 2427 insertions(+), 3565 deletions(-)\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " create mode 100644 .github/workflows/packages.yaml\n", + " create mode 100644 .github/workflows/postmerge-cpu.yml\n", + " create mode 100644 .github/workflows/postmerge-gpu.yml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 .prettierignore\n", + " delete mode 100644 merlin/systems/dag/dictarray.py\n", + " delete mode 100644 merlin/systems/dag/op_runner.py\n", + " delete mode 100644 merlin/systems/triton/models/oprunner_model.py\n", + " delete mode 100644 merlin/systems/workflow/hugectr.py\n", + " delete mode 100644 merlin/systems/workflow/pytorch.py\n", + " delete mode 100644 merlin/systems/workflow/tensorflow.py\n", + " create mode 100644 tests/integration/tf/test_transformer_model.py\n", + " rename tests/{unit/systems/dag/test_column.py => test_passing.py} (66%)\n", + " delete mode 100644 tests/unit/systems/dag/test_dict_array.py\n", + " delete mode 100644 tests/unit/systems/dag/test_op_runner.py\n", + " create mode 100644 tests/unit/systems/ops/embedding_op.py\n", + " create mode 100644 tests/unit/systems/ops/padding_op.py\n", + " create mode 100644 tests/unit/systems/ops/torch/test_ensemble.py\n", + " delete mode 100644 tests/unit/test_export.py\n", + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.28.1)\n", + "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", + "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (1.6.0+66.g67136eba)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+98.g2b1b90b) (1.22.4)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+98.g2b1b90b) (1.9.3)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.19.6)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.5)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.0.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (11.4.1)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.5.0)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.4.1)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (22.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.56.4)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.12.0)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2.8)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (1.26.13)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2019.11.28)\n", + "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2022.7)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.4.3)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.7.0)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.9.4)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.4)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.1)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.12.0)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.1.2)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.1.3)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.39.1)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.57.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.14.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.4)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.11.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.0.0)\n", + "Building wheels for collected packages: merlin-systems\n", + " Building wheel for merlin-systems (PEP 517): started\n", + " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+98.g2b1b90b-py3-none-any.whl size=83152 sha256=282b1d3abe91766660d30dcbfa6d196c7f13d8d7d1b554eefd02455b7cdc1924\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ojtyyyod/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + "Successfully built merlin-systems\n", + "Installing collected packages: merlin-systems\n", + " Attempting uninstall: merlin-systems\n", + " Found existing installation: merlin-systems 0.9.0\n", + " Uninstalling merlin-systems-0.9.0:\n", + " Successfully uninstalled merlin-systems-0.9.0\n", + "Successfully installed merlin-systems-0.7.0+98.g2b1b90b\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was fd5d3fc Use tf.function for list column operations (#89)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/dataloader\n", + " * branch main -> FETCH_HEAD\n", + " 5b3fe46..d9e97b4 main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating 5b3fe46..d9e97b4\n", + "Fast-forward\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/cpu-ci.yml | 83 +----\n", + " .github/workflows/cpu-packages.yml | 125 +++++++\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 2 +-\n", + " .github/workflows/jax.yaml | 2 +-\n", + " .github/workflows/models.yml | 43 +++\n", + " .github/workflows/nvtabular.yml | 43 +++\n", + " .github/workflows/release-drafter.yaml | 2 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .github/workflows/systems.yml | 43 +++\n", + " .github/workflows/tensorflow.yml | 2 +-\n", + " .github/workflows/torch.yaml | 2 +-\n", + " .github/workflows/transformers4rec.yml | 43 +++\n", + " .pre-commit-config.yaml | 14 +-\n", + " ci/pr.gpu.Jenkinsfile | 44 +++\n", + " docs/README.md | 28 +-\n", + " examples/01a-Getting-started-Tensorflow.ipynb | 5 +-\n", + " examples/01b-Getting-started-Pytorch.ipynb | 5 +-\n", + " .../02-Multi-GPU-Tensorflow-with-Horovod.ipynb | 371 ++++++++++++++++++\n", + " merlin/dataloader/jax.py | 52 +--\n", + " merlin/dataloader/loader_base.py | 413 +++++++++------------\n", + " merlin/dataloader/ops/embeddings.py | 110 ++++++\n", + " merlin/dataloader/ops/embeddings/__init__.py | 15 -\n", + " merlin/dataloader/ops/embeddings/embedding_op.py | 237 ------------\n", + " .../dataloader/ops/embeddings/tf_embedding_op.py | 101 -----\n", + " .../ops/embeddings/torch_embedding_op.py | 106 ------\n", + " merlin/dataloader/ops/padding.py | 88 +++++\n", + " merlin/dataloader/tensorflow.py | 337 +++++------------\n", + " merlin/dataloader/torch.py | 225 +++++------\n", + " merlin/dataloader/utils/tf/tf_trainer.py | 13 +-\n", + " requirements/base.txt | 2 +-\n", + " tests/conftest.py | 11 +-\n", + " .../test_multi_GPU_with_horovod_and_tensorflow.py | 28 ++\n", + " tests/unit/dataloader/test_array_dataloader.py | 57 +++\n", + " tests/unit/dataloader/test_array_to_tensorflow.py | 54 +++\n", + " tests/unit/dataloader/test_array_to_torch.py | 69 ++++\n", + " .../{test_tf_embeddings.py => test_embeddings.py} | 188 +++++-----\n", + " tests/unit/dataloader/test_jax_dataloader.py | 29 +-\n", + " tests/unit/dataloader/test_padding.py | 46 +++\n", + " tests/unit/dataloader/test_tf_dataloader.py | 358 +++++++++---------\n", + " tests/unit/dataloader/test_torch_dataloader.py | 245 ++++++++----\n", + " tests/unit/dataloader/test_torch_embeddings.py | 242 ------------\n", + " tox.ini | 55 +++\n", + " 44 files changed, 2154 insertions(+), 1810 deletions(-)\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .github/workflows/models.yml\n", + " create mode 100644 .github/workflows/nvtabular.yml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 .github/workflows/systems.yml\n", + " create mode 100644 .github/workflows/transformers4rec.yml\n", + " create mode 100644 ci/pr.gpu.Jenkinsfile\n", + " create mode 100644 examples/02-Multi-GPU-Tensorflow-with-Horovod.ipynb\n", + " create mode 100644 merlin/dataloader/ops/embeddings.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/__init__.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/embedding_op.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/tf_embedding_op.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/torch_embedding_op.py\n", + " create mode 100644 merlin/dataloader/ops/padding.py\n", + " create mode 100644 tests/examples/test_multi_GPU_with_horovod_and_tensorflow.py\n", + " create mode 100644 tests/unit/dataloader/test_array_dataloader.py\n", + " create mode 100644 tests/unit/dataloader/test_array_to_tensorflow.py\n", + " create mode 100644 tests/unit/dataloader/test_array_to_torch.py\n", + " rename tests/unit/dataloader/{test_tf_embeddings.py => test_embeddings.py} (52%)\n", + " create mode 100644 tests/unit/dataloader/test_padding.py\n", + " delete mode 100644 tests/unit/dataloader/test_torch_embeddings.py\n", + "Processing /dataloader\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-core>=23.04.00 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+72.gd9e97b4) (23.4.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.56.4)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.5)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (8.0.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.2.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.64.1)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.4.1)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (11.4.1)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (23.4.0)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.12.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (22.0)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.22.4)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.19.6)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.5.0)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.4.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.39.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.2.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2022.7)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.2.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.4.3)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.9.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.0)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.4)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.1)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.1.2)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.4.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.7.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.12.0)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.26.13)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (8.1.3)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.57.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.14.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.1.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0.1)\n", + "Building wheels for collected packages: merlin-dataloader\n", + " Building wheel for merlin-dataloader (PEP 517): started\n", + " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+72.gd9e97b4-py3-none-any.whl size=34881 sha256=c39b7e146f814713447917029d09f8cf4978202ed3852dce51544461cd074e3b\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-t_njcpzr/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + "Successfully built merlin-dataloader\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: nvtabular 1.6.0+66.g67136eba has requirement merlin-dataloader>=23.4.0, but you'll have merlin-dataloader 0.0.2+72.gd9e97b4 which is incompatible.\n", + "ERROR: merlin-models 23.5.dev0+12.gd8133b8f has requirement merlin-dataloader>=23.4.0, but you'll have merlin-dataloader 0.0.2+72.gd9e97b4 which is incompatible.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Installing collected packages: merlin-dataloader\n", + " Attempting uninstall: merlin-dataloader\n", + " Found existing installation: merlin-dataloader 23.4.0\n", + " Uninstalling merlin-dataloader-23.4.0:\n", + " Successfully uninstalled merlin-dataloader-23.4.0\n", + "Successfully installed merlin-dataloader-0.0.2+72.gd9e97b4\n" + ] + } + ], "source": [ "# %%bash\n", "\n", - "# # cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", + "# cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", "# cd /models && git checkout main && git pull origin main && pip install .\n", "# cd /core && git checkout main && git pull origin main && pip install .\n", "# cd /nvtabular && git checkout main && git pull origin main && pip install .\n", "# cd /systems && git checkout main && git pull origin main && pip install .\n", - "# cd /dataloader && git checkout main && git pull origin main && pip install .\n", - "\n", - "# ---\n", - "# pip install matplotlib" + "# cd /dataloader && git checkout main && git pull origin main && pip install ." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "e9929dc8", "metadata": {}, "outputs": [ @@ -30,18 +1531,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.7.1)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", + "Collecting gdown\n", + " Downloading gdown-4.7.1-py3-none-any.whl (15 kB)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", - "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", - "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n" + "Collecting PySocks!=1.5.7,>=1.5.6; extra == \"socks\"\n", + " Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", + "Installing collected packages: gdown, PySocks\n", + "Successfully installed PySocks-1.7.1 gdown-4.7.1\n" ] }, { @@ -50,26 +1555,42 @@ "text": [ "Downloading...\n", "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", - "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=0dd96474-79af-47bb-9148-b96d64204e14\n", + "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=b5bb23eb-a2dd-4adc-b7b7-be5687c89aca\n", "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", - "100%|██████████| 43.4M/43.4M [00:12<00:00, 3.62MB/s]\n" + "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.20MB/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease\n", - "Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease\n", - "Hit:3 http://security.ubuntu.com/ubuntu focal-security InRelease\n", - "Hit:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease\n", - "Hit:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease\n", + "Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", + "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [1009 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", + "Get:4 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Get:5 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2674 kB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", + "Get:9 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [2203 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", + "Get:11 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:12 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", + "Get:13 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1045 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2341 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1341 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3157 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", + "Fetched 27.3 MB in 9s (2922 kB/s)\n", "Reading package lists...\n", "Reading package lists...\n", "Building dependency tree...\n", "Reading state information...\n", "unzip is already the newest version (6.0-25ubuntu1.1).\n", - "0 upgraded, 0 newly installed, 0 to remove and 98 not upgraded.\n", + "0 upgraded, 0 newly installed, 0 to remove and 99 not upgraded.\n", "Archive: rees46_ecom_dataset_small_for_ci.zip\n", " creating: ecom_dataset/0001/\n", " inflating: ecom_dataset/0001/valid.parquet \n", @@ -84,17 +1605,17 @@ } ], "source": [ - "%%bash\n", + "# %%bash\n", "\n", - "rm -rf ecom_dataset\n", - "mkdir -p ecom_dataset\n", + "# rm -rf ecom_dataset\n", + "# mkdir -p ecom_dataset\n", "\n", - "pip install gdown\n", - "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", - "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", - "apt-get update -y\n", - "apt-get install unzip -y\n", - "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + "# pip install gdown\n", + "# # gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "# gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "# apt-get update -y\n", + "# apt-get install unzip -y\n", + "# unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" ] }, { @@ -353,18 +1874,47 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "ceb3ae93", + "execution_count": 4, + "id": "0660887b", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "# os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ec38f1a6", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2023-04-13 11:21:28.090236: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + "2023-05-09 01:50:24.115697: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" ] - }, + } + ], + "source": [ + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", @@ -376,11 +1926,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-04-13 11:21:30.471061: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:30.471514: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:30.471678: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" + "2023-05-09 01:50:26.436605: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:26.437013: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:26.437158: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" ] }, { @@ -397,38 +1945,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-04-13 11:21:30.757567: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 01:50:26.674203: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:21:30.758435: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:30.758639: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:30.758792: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:31.508591: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:31.508802: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:31.508961: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:31.509071: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", - "2023-04-13 11:21:31.509079: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-04-13 11:21:31.509140: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-05-09 01:50:26.675123: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:26.675302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:26.675428: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:27.455564: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:27.455749: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:27.455877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:27.455980: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-05-09 01:50:27.456001: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ - "import os\n", - "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", - "import gc\n", - "import numpy as np\n", - "\n", - "import tensorflow as tf\n", - "\n", - "from merlin.schema.tags import Tags\n", - "from merlin.io.dataset import Dataset\n", "import merlin.models.tf as mm" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "11647dd3", "metadata": {}, "outputs": [], @@ -439,7 +1977,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "4ab4e0fb", "metadata": {}, "outputs": [], @@ -450,7 +1988,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "8d9903e6", "metadata": {}, "outputs": [], @@ -469,7 +2007,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "410ea223", "metadata": {}, "outputs": [], @@ -480,7 +2018,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "4328f03a", "metadata": {}, "outputs": [], @@ -492,231 +2030,11804 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "4571b92b", + "execution_count": 12, + "id": "d5a9dd50", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "ops = ['sess_pid_seq'] >> Categorify(dtype=np.int32) #>> Rename(name=seq_name)\n", + "\n", + "wf = Workflow(ops)\n", + "\n", + "train = wf.fit_transform(train)\n", + "valid = wf.transform(valid)" + ] }, { "cell_type": "code", "execution_count": 13, - "id": "d5a9dd50", + "id": "3116726e", "metadata": {}, "outputs": [], "source": [ - "ops = ['sess_pid_seq'] >> Categorify(dtype=np.int32) #>> Rename(name=seq_name)\n", + "# cat rees46_schema_modified.pbtxt" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "69e8f95c", + "metadata": {}, + "outputs": [], + "source": [ + "# %%writefile rees46_schema_modified_2.pbtxt\n", "\n", - "wf = Workflow(ops)\n", + "# feature {\n", + "# name: \"seq\"\n", + "# value_count {\n", + "# min: 2\n", + "# }\n", + "# type: INT\n", + "# int_domain {\n", + "# name: \"seq\"\n", + "# min: 1\n", + "# max: 390000\n", + "# is_categorical: true\n", + "# }\n", + "# annotation {\n", + "# tag: \"item_id\"\n", + "# tag: \"list\"\n", + "# tag: \"categorical\"\n", + "# tag: \"item\"\n", + "# }\n", + "# }" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", "\n", - "train = wf.fit_transform(train)\n", - "valid = wf.transform(valid)" + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + " \n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", + " input_block = mm.InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name(seq_name)\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "076f42cc", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "523fe2ac", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n", + "2023-05-09 01:50:35.053579: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 105s 143ms/step - loss: 7.2880 - recall_at_20: 0.1451 - mrr_at_20: 0.0813 - ndcg_at_20: 0.0954 - map_at_20: 0.0813 - precision_at_20: 0.0073 - regularization_loss: 0.0000e+00 - loss_batch: 7.2857\n", + "84/84 [==============================] - 4s 26ms/step - loss: 8.5378 - recall_at_20: 0.2315 - mrr_at_20: 0.0811 - ndcg_at_20: 0.1142 - map_at_20: 0.0811 - precision_at_20: 0.0116 - regularization_loss: 0.0000e+00 - loss_batch: 8.5385\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.537825584411621,\n", + " 'recall_at_20': 0.2337784469127655,\n", + " 'mrr_at_20': 0.07926096022129059,\n", + " 'ndcg_at_20': 0.11324834823608398,\n", + " 'map_at_20': 0.07926096022129059,\n", + " 'precision_at_20': 0.011688923463225365,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 8.566910743713379}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "febab09e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: t4rec_model/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: t4rec_model/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return generic_utils.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "model_transformer.save('t4rec_model')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8e0ea1b1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "Model(\n", + " (_should_compute_train_metrics_for_batch): \n", + " (blocks): _TupleWrapper((SequentialBlock(\n", + " (layers): List(\n", + " (0): ParallelBlock(\n", + " (_aggregation): ConcatFeatures(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (parallel_layers): Dict(\n", + " (categorical): ParallelBlock(\n", + " (parallel_layers): Dict(\n", + " (sess_pid_seq): EmbeddingTable(\n", + " (features): Dict(\n", + " (sess_pid_seq): ColumnSchema(name='sess_pid_seq', tags={, , , , }, properties={'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, dtype=DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), is_list=True, is_ragged=True)\n", + " )\n", + " (table): Embedding(\n", + " (embeddings): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): SequentialBlock(\n", + " (layers): List(\n", + " (0): _Dense(\n", + " (dense): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): XLNetBlock(\n", + " (transformer): TFXLNetMainLayer(\n", + " (word_embedding): TFSharedEmbeddings(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer): List(\n", + " (0): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_kwargs): Dict(\n", + " (name): 'transformer'\n", + " (trainable): True\n", + " (dtype): 'float32'\n", + " )\n", + " (mask_emb): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_pre): PrepareTransformerInputs(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_post): LastHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_post): SequentialBlock(\n", + " (layers): List(\n", + " (0): TransformerOutputToRagged(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TransformerInferenceHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): SequenceCausalLastInference(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): ExtractMaskFromTargets(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " ), SequentialBlock(\n", + " (layers): List(\n", + " (0): _Dense(\n", + " (dense): Dense(\n", + " 448, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " ), CategoricalOutput(\n", + " (to_call): EmbeddingTablePrediction(\n", + " (table): EmbeddingTable(\n", + " (features): Dict(\n", + " (sess_pid_seq): ColumnSchema(name='sess_pid_seq', tags={, , , , }, properties={'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, dtype=DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), is_list=True, is_ragged=True)\n", + " )\n", + " (table): Embedding(\n", + " (embeddings): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (output_layer_bias): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )))\n", + " (context): ModelContext(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_prepare_features): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (output_names): List(\n", + " (0): 'sess_pid_seq/categorical_output'\n", + " )\n", + " (optimizer): Adam()\n", + " (loss): Dict(\n", + " (sess_pid_seq/categorical_output): CategoricalCrossEntropy()\n", + " )\n", + " (train_pre): SequencePredictNext(\n", + " (_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (1): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " )\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (transformer): XLNetBlock(\n", + " (transformer): TFXLNetMainLayer(\n", + " (word_embedding): TFSharedEmbeddings(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer): List(\n", + " (0): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_kwargs): Dict(\n", + " (name): 'transformer'\n", + " (trainable): True\n", + " (dtype): 'float32'\n", + " )\n", + " (mask_emb): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_pre): PrepareTransformerInputs(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_post): LastHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_post): SequentialBlock(\n", + " (layers): List(\n", + " (0): TransformerOutputToRagged(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TransformerInferenceHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): SequenceCausalLastInference(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): ExtractMaskFromTargets(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (test_pre): SequencePredictLast(\n", + " (_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (1): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " )\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (transformer): XLNetBlock(\n", + " (transformer): TFXLNetMainLayer(\n", + " (word_embedding): TFSharedEmbeddings(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer): List(\n", + " (0): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_kwargs): Dict(\n", + " (name): 'transformer'\n", + " (trainable): True\n", + " (dtype): 'float32'\n", + " )\n", + " (mask_emb): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_pre): PrepareTransformerInputs(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_post): LastHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_post): SequentialBlock(\n", + " (layers): List(\n", + " (0): TransformerOutputToRagged(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TransformerInferenceHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): SequenceCausalLastInference(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): ExtractMaskFromTargets(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (signatures): _SignatureMap({'serving_default': })\n", + ")" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.load('t4rec_model')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "2f5a7984", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.loader.tensorflow import Loader" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "dc4df316", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dataloader/tensorflow.py:65: UserWarning: Due to a CUDA memory alignment issue in some Tensorflow operations such as Embedding ops, we recommend that 'batch_size' be at least 16 and also a power of two. Please change 'batch_size' to a number that is a power of two that is greater than or equal to 16.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "loader = Loader(valid, batch_size=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "f3bfca3f", + "metadata": {}, + "outputs": [], + "source": [ + "it = iter(loader)" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "3116726e", + "execution_count": 23, + "id": "7e1b9bbc", "metadata": {}, "outputs": [], "source": [ - "# cat rees46_schema_modified.pbtxt" + "while True:\n", + " b = next(it)\n", + " if b[0]['sess_pid_seq__offsets'].numpy()[1] == 20:\n", + " break" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "69e8f95c", + "execution_count": 24, + "id": "7ee5f149", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "({'sess_pid_seq__values': ,\n", + " 'sess_pid_seq__offsets': },\n", + " None)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# %%writefile rees46_schema_modified_2.pbtxt\n", - "\n", - "# feature {\n", - "# name: \"seq\"\n", - "# value_count {\n", - "# min: 2\n", - "# }\n", - "# type: INT\n", - "# int_domain {\n", - "# name: \"seq\"\n", - "# min: 1\n", - "# max: 390000\n", - "# is_categorical: true\n", - "# }\n", - "# annotation {\n", - "# tag: \"item_id\"\n", - "# tag: \"list\"\n", - "# tag: \"categorical\"\n", - "# tag: \"item\"\n", - "# }\n", - "# }" + "b" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "a6ade14a", + "execution_count": 25, + "id": "81d2b071", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "50.1 ms ± 78.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ - "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", - "\n", - "def get_model():\n", - " mlp_block = mm.MLPBlock(\n", - " [d_model],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", + "%%timeit\n", "\n", - " schema = TensorflowMetadata.from_proto_text_file(\n", - " './',\n", - " file_name='rees46_schema_modified.pbtxt'\n", - " ).to_merlin_schema()\n", - "\n", - " train.schema = schema\n", - " \n", - " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", - " input_block = mm.InputBlockV2(\n", - " schema_model,\n", - " categorical=mm.Embeddings(\n", - " schema_model.select_by_tag(Tags.CATEGORICAL),\n", - " dim=item_embedding_dim,\n", - " sequence_combiner=None,\n", - " )\n", - " )\n", - "\n", - " train.schema = train.schema.select_by_name(seq_name)\n", - "\n", - " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", - "\n", - " dense_block = mm.SequentialBlock(\n", - " input_block,\n", - " mlp_block,\n", - " xlnet_block\n", - " )\n", - "\n", - " mlp_block2 = mm.MLPBlock(\n", - " [item_embedding_dim],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", - "\n", - " prediction_task = mm.CategoricalOutput(\n", - " to_call=input_block[\"categorical\"][target],\n", - " )\n", - "\n", - " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", - "\n", - " optimizer = tf.keras.optimizers.Adam(\n", - " learning_rate=learning_rate,\n", - " )\n", - "\n", - " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", - " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", - " )\n", - " return model_transformer, xlnet_block" + "model_transformer.predict_step(b)" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "523fe2ac", + "execution_count": 26, + "id": "7b24e7fa", "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n", - "2023-04-13 11:21:38.342588: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + "297 ms ± 753 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] - }, + } + ], + "source": [ + "%%timeit\n", + "\n", + "with tf.device('/cpu:0'):\n", + " model_transformer.predict_step(b)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "5bd66ba8", + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", - " warnings.warn(\n" + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 106s 144ms/step - loss: 7.3129 - recall_at_20: 0.1424 - mrr_at_20: 0.0802 - ndcg_at_20: 0.0939 - map_at_20: 0.0802 - precision_at_20: 0.0071 - regularization_loss: 0.0000e+00 - loss_batch: 7.3149\n", - "84/84 [==============================] - 4s 27ms/step - loss: 8.5848 - recall_at_20: 0.2229 - mrr_at_20: 0.0736 - ndcg_at_20: 0.1066 - map_at_20: 0.0736 - precision_at_20: 0.0111 - regularization_loss: 0.0000e+00 - loss_batch: 8.5971\n" + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" ] }, { - "data": { - "text/plain": [ - "{'loss': 8.584781646728516,\n", - " 'recall_at_20': 0.2308632731437683,\n", - " 'mrr_at_20': 0.07471762597560883,\n", - " 'ndcg_at_20': 0.10908268392086029,\n", - " 'map_at_20': 0.07471762597560883,\n", - " 'precision_at_20': 0.011543160304427147,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.130510330200195}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")\n", - "\n", - "# model_transformer.save('t4rec_model')" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "5bd66ba8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", " (_feature_shapes): Dict(\n", " (sess_pid_seq): TensorShape([128, None, 1])\n", " )\n", " (_feature_dtypes): Dict(\n", " (sess_pid_seq): tf.int32\n", " )\n", - "), because it is not built.\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", " (_feature_shapes): Dict(\n", " (sess_pid_seq): TensorShape([128, None, 1])\n", @@ -724,7 +13835,13 @@ " (_feature_dtypes): Dict(\n", " (sess_pid_seq): tf.int32\n", " )\n", - "), because it is not built.\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", " (_feature_shapes): Dict(\n", " (sess_pid_seq): TensorShape([128, None, 1])\n", @@ -732,7 +13849,13 @@ " (_feature_dtypes): Dict(\n", " (sess_pid_seq): tf.int32\n", " )\n", - "), because it is not built.\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", " (_feature_shapes): Dict(\n", " (sess_pid_seq): TensorShape([128, None, 1])\n", @@ -747,6 +13870,14 @@ "name": "stderr", "output_type": "stream", "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" ] }, @@ -754,15 +13885,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n" + "INFO:tensorflow:Assets written to: /tmp/tmpvsz5e5b2/model.savedmodel/assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + "INFO:tensorflow:Assets written to: /tmp/tmpvsz5e5b2/model.savedmodel/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", @@ -816,7 +13947,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 33, "id": "3ef1e5fc", "metadata": {}, "outputs": [], @@ -830,7 +13961,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 28, "id": "e2a7b6ee", "metadata": {}, "outputs": [], @@ -843,7 +13974,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 29, "id": "55ad012c", "metadata": {}, "outputs": [ @@ -884,7 +14015,7 @@ " \n", " 0\n", " sess_pid_seq\n", - " (Tags.CATEGORICAL, Tags.ITEM, Tags.ID, Tags.IT...\n", + " (Tags.LIST, Tags.ITEM_ID, Tags.CATEGORICAL, Ta...\n", " DType(name='int64', element_type=<ElementType....\n", " True\n", " True\n", @@ -899,10 +14030,10 @@ "" ], "text/plain": [ - "[{'name': 'sess_pid_seq', 'tags': {, , , , }, 'properties': {'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), 'is_list': True, 'is_ragged': True}]" + "[{'name': 'sess_pid_seq', 'tags': {, , , , }, 'properties': {'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), 'is_list': True, 'is_ragged': True}]" ] }, - "execution_count": 21, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -913,7 +14044,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 30, "id": "1a39b4f8", "metadata": {}, "outputs": [ @@ -1042,7 +14173,7 @@ "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", @@ -1078,35 +14209,17 @@ }, { "cell_type": "code", - "execution_count": 23, - "id": "1720a5af", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '/workspace/models_for_benchmarking/1': No such file or directory\r\n" - ] - } - ], - "source": [ - "ls /workspace/models_for_benchmarking/1" - ] - }, - { - "cell_type": "code", - "execution_count": 24, + "execution_count": 31, "id": "d7cdc6cc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 24, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" }, @@ -1114,89 +14227,88 @@ "name": "stderr", "output_type": "stream", "text": [ - "I0413 11:24:28.716029 1527 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f7f2a000000' with size 268435456\n", - "I0413 11:24:28.716361 1527 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864\n", - "I0413 11:24:28.718446 1527 model_lifecycle.cc:459] loading: 0_transformworkflowtriton:1\n", - "I0413 11:24:28.718465 1527 model_lifecycle.cc:459] loading: 1_predicttensorflowtriton:1\n", - "I0413 11:24:28.718478 1527 model_lifecycle.cc:459] loading: executor_model:1\n", - "I0413 11:24:28.924940 1527 tensorflow.cc:2536] TRITONBACKEND_Initialize: tensorflow\n", - "I0413 11:24:28.924955 1527 tensorflow.cc:2546] Triton TRITONBACKEND API version: 1.10\n", - "I0413 11:24:28.924960 1527 tensorflow.cc:2552] 'tensorflow' TRITONBACKEND API version: 1.10\n", - "I0413 11:24:28.924962 1527 tensorflow.cc:2576] backend configuration:\n", + "I0509 01:55:17.511153 1205 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7ff516000000' with size 268435456\n", + "I0509 01:55:17.511472 1205 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864\n", + "I0509 01:55:17.513574 1205 model_lifecycle.cc:459] loading: executor_model:1\n", + "I0509 01:55:17.513595 1205 model_lifecycle.cc:459] loading: 0_transformworkflowtriton:1\n", + "I0509 01:55:17.513608 1205 model_lifecycle.cc:459] loading: 1_predicttensorflowtriton:1\n", + "I0509 01:55:17.693342 1205 tensorflow.cc:2536] TRITONBACKEND_Initialize: tensorflow\n", + "I0509 01:55:17.693362 1205 tensorflow.cc:2546] Triton TRITONBACKEND API version: 1.10\n", + "I0509 01:55:17.693365 1205 tensorflow.cc:2552] 'tensorflow' TRITONBACKEND API version: 1.10\n", + "I0509 01:55:17.693368 1205 tensorflow.cc:2576] backend configuration:\n", "{\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}}\n", - "2023-04-13 11:24:30.207841: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 01:55:18.992767: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:32.085748: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:32.086174: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:32.086365: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:20.814292: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:20.814710: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:20.814876: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "I0413 11:24:33.803267 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: executor_model_0 (GPU device 0)\n", - "2023-04-13 11:24:35.316462: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "I0509 01:55:22.571307 1205 tensorflow.cc:2642] TRITONBACKEND_ModelInitialize: 1_predicttensorflowtriton (version 1)\n", + "2023-05-09 01:55:22.571962: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:22.592315: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", + "2023-05-09 01:55:22.592352: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:22.592474: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:37.126873: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.127251: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.127427: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "I0413 11:24:37.157059 1527 tensorflow.cc:2642] TRITONBACKEND_ModelInitialize: 1_predicttensorflowtriton (version 1)\n", - "I0413 11:24:37.157179 1527 model_lifecycle.cc:694] successfully loaded 'executor_model' version 1\n", - "2023-04-13 11:24:37.157805: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:37.178699: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", - "2023-04-13 11:24:37.178742: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:37.178876: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 01:55:22.593417: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.609446: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.609627: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.855175: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.855338: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.855479: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.855607: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 29840 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-05-09 01:55:22.913337: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled\n", + "2023-05-09 01:55:22.922530: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n", + "2023-05-09 01:55:23.337695: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:23.403830: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 831878 microseconds.\n", + "2023-05-09 01:55:24.746386: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:37.179781: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196068: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196289: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196570: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196747: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196909: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.197031: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-04-13 11:24:37.203975: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", - "2023-04-13 11:24:37.262568: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled\n", - "2023-04-13 11:24:37.271889: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n", - "2023-04-13 11:24:37.678751: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:37.745105: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 587310 microseconds.\n", - "2023-04-13 11:24:39.105154: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:40.997532: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:40.997994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:40.998186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:26.581369: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:26.581724: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:26.581886: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "I0413 11:24:42.684588 1527 tensorflow.cc:2691] TRITONBACKEND_ModelInstanceInitialize: 1_predicttensorflowtriton_0 (GPU device 0)\n", - "2023-04-13 11:24:42.684902: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:42.702205: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", - "2023-04-13 11:24:42.702239: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:42.702447: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.702659: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.702822: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.703025: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.703189: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.703311: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", - "2023-04-13 11:24:42.742722: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n" + "I0509 01:55:28.344195 1205 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: executor_model_0 (GPU device 0)\n", + "2023-05-09 01:55:29.628356: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-05-09 01:55:31.434543: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.434993: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.435198: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "I0509 01:55:31.465538 1205 tensorflow.cc:2691] TRITONBACKEND_ModelInstanceInitialize: 1_predicttensorflowtriton_0 (GPU device 0)\n", + "I0509 01:55:31.465701 1205 model_lifecycle.cc:694] successfully loaded 'executor_model' version 1\n", + "2023-05-09 01:55:31.465951: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:31.490532: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", + "2023-05-09 01:55:31.490575: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:31.490777: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491003: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491411: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491588: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491744: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 29840 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-05-09 01:55:31.549442: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-04-13 11:24:43.330311: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:43.395816: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 710922 microseconds.\n", - "I0413 11:24:43.395921 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: 0_transformworkflowtriton_0 (GPU device 0)\n", - "I0413 11:24:43.396107 1527 model_lifecycle.cc:694] successfully loaded '1_predicttensorflowtriton' version 1\n", - "2023-04-13 11:24:44.668497: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 01:55:32.146750: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:32.213463: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 747520 microseconds.\n", + "I0509 01:55:32.213572 1205 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: 0_transformworkflowtriton_0 (GPU device 0)\n", + "I0509 01:55:32.213757 1205 model_lifecycle.cc:694] successfully loaded '1_predicttensorflowtriton' version 1\n", + "2023-05-09 01:55:33.476455: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:46.525315: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:46.525768: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:46.525978: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "I0413 11:24:46.583396 1527 model_lifecycle.cc:694] successfully loaded '0_transformworkflowtriton' version 1\n", - "I0413 11:24:46.583508 1527 server.cc:563] \n", + "2023-05-09 01:55:35.263779: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:35.264127: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:35.264284: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "I0509 01:55:35.317101 1205 model_lifecycle.cc:694] successfully loaded '0_transformworkflowtriton' version 1\n", + "I0509 01:55:35.317235 1205 server.cc:563] \n", "+------------------+------+\n", "| Repository Agent | Path |\n", "+------------------+------+\n", "+------------------+------+\n", "\n", - "I0413 11:24:46.583587 1527 server.cc:590] \n", + "I0509 01:55:35.317307 1205 server.cc:590] \n", "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "| Backend | Path | Config |\n", "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", @@ -1204,7 +14316,7 @@ "| tensorflow | /opt/tritonserver/backends/tensorflow2/libtriton_tensorflow2.so | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n", "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "\n", - "I0413 11:24:46.583634 1527 server.cc:633] \n", + "I0509 01:55:35.317350 1205 server.cc:633] \n", "+---------------------------+---------+--------+\n", "| Model | Version | Status |\n", "+---------------------------+---------+--------+\n", @@ -1213,9 +14325,9 @@ "| executor_model | 1 | READY |\n", "+---------------------------+---------+--------+\n", "\n", - "I0413 11:24:46.610538 1527 metrics.cc:864] Collecting metrics for GPU 0: Quadro RTX 8000\n", - "I0413 11:24:46.610778 1527 metrics.cc:757] Collecting CPU metrics\n", - "I0413 11:24:46.610913 1527 tritonserver.cc:2264] \n", + "I0509 01:55:35.343214 1205 metrics.cc:864] Collecting metrics for GPU 0: Quadro RTX 8000\n", + "I0509 01:55:35.343395 1205 metrics.cc:757] Collecting CPU metrics\n", + "I0509 01:55:35.343534 1205 tritonserver.cc:2264] \n", "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "| Option | Value |\n", "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", @@ -1234,16 +14346,16 @@ "| exit_timeout | 30 |\n", "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "\n", - "I0413 11:24:46.611676 1527 grpc_server.cc:4819] Started GRPCInferenceService at 0.0.0.0:8001\n", - "I0413 11:24:46.611833 1527 http_server.cc:3477] Started HTTPService at 0.0.0.0:8000\n", - "I0413 11:24:46.652586 1527 http_server.cc:184] Started Metrics Service at 0.0.0.0:8002\n" + "I0509 01:55:35.344357 1205 grpc_server.cc:4819] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0509 01:55:35.344507 1205 http_server.cc:3477] Started HTTPService at 0.0.0.0:8000\n", + "I0509 01:55:35.385232 1205 http_server.cc:184] Started Metrics Service at 0.0.0.0:8002\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-04-13 11:25:37.504455: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + "2023-05-09 01:56:23.448369: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" ] } ], From f14d7333aa650a4f13689ff7d95cf832974808cd Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Tue, 9 May 2023 13:02:02 +1000 Subject: [PATCH 13/15] update --- ...nd_save_model_for_benchmarking-Copy1.ipynb | 644 +++++++----------- 1 file changed, 256 insertions(+), 388 deletions(-) diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb index c048898d04..a9332b9c96 100644 --- a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb +++ b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "d062ceda", "metadata": {}, "outputs": [ @@ -10,134 +10,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "From https://github.com/NVIDIA-Merlin/Models\n", - " * [new branch] asvdb_metric_tracking -> origin/asvdb_metric_tracking\n", - " * [new branch] benchmark-session-based -> origin/benchmark-session-based\n", - " * [new branch] ci/horovod -> origin/ci/horovod\n", - " * [new branch] codespell_fix -> origin/codespell_fix\n", - " 16fb4149..fcaefc3e fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", - " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", - " * [new branch] fix_datetime_issue_add_inference_on_TIS -> origin/fix_datetime_issue_add_inference_on_TIS\n", - " 95462360..7d68dc88 gh-pages -> origin/gh-pages\n", - " * [new branch] implement_review_comments -> origin/implement_review_comments\n", - " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", - " * [new branch] laiacano/concurrency -> origin/laiacano/concurrency\n", - " 835ad186..d8133b8f main -> origin/main\n", - " * [new branch] mtl_example -> origin/mtl_example\n", - " cb431a8a..b90e9a1b release-22.12 -> origin/release-22.12\n", - " * [new branch] release-23.02 -> origin/release-23.02\n", - " * [new branch] release-23.04 -> origin/release-23.04\n", - " * [new branch] stable -> origin/stable\n", - " * [new branch] tf/batch_predict_fix -> origin/tf/batch_predict_fix\n", - " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", - " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", - " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", - " * [new branch] tf/dlrm_dropout_fix -> origin/tf/dlrm_dropout_fix\n", - " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", - " * [new branch] tf/fix_logq_correction -> origin/tf/fix_logq_correction\n", - " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", - " * [new branch] tf/loglossmetric_callbacks -> origin/tf/loglossmetric_callbacks\n", - " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", - " + 169f3df5...06eecddd tf/output-block -> origin/tf/output-block (forced update)\n", - " * [new branch] tf/pretrained_emb -> origin/tf/pretrained_emb\n", - " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", - " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", - " * [new branch] tf/transformer-api -> origin/tf/transformer-api\n", - " * [new branch] torch/dev -> origin/torch/dev\n", - " * [new branch] torch/masking -> origin/torch/masking\n", - " * [new branch] torch/prototype -> origin/torch/prototype\n", - " * [new branch] torch/remove-t4r-code -> origin/torch/remove-t4r-code\n", - " * [new branch] tox_github_actions_fix -> origin/tox_github_actions_fix\n", - " * [new branch] transformer-api -> origin/transformer-api\n", - " + 0a65d603...9f53e8ff update_07 -> origin/update_07 (forced update)\n", - " * [new tag] v23.02.00 -> v23.02.00\n", - " * [new tag] v23.04.00 -> v23.04.00\n", - " * [new tag] v23.05.dev0 -> v23.05.dev0\n", "Previous HEAD position was cb431a8a Fix the serialization of `SequenceSummary` block (#927)\n", - "HEAD is now at a86201ee add masking support to SequencePredictRandom transform\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing /models\n", - " Installing build dependencies: started\n", - " Installing build dependencies: finished with status 'done'\n", - " Getting requirements to build wheel: started\n", - " Getting requirements to build wheel: finished with status 'done'\n", - " Preparing wheel metadata: started\n", - " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.0.4)\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.10.0)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.56.4)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.64.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.0.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.19.6)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.12.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.5)\n", - "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", - "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (22.0)\n", - "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.5.0)\n", - "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.5)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (45.2.0)\n", - "Requirement already satisfied: numpy<1.24,>=1.18 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.22.4)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.2.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.57.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.0)\n", - "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", - "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.12.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0)\n", - "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", - "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.1.3)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", - "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.9.4)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.7.0)\n", - "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.4.0)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.26.13)\n", - "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.4)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.0)\n", - "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.1.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.8.2)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.11.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.4)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.1.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.1.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.14.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.0.0)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Building wheels for collected packages: merlin-models\n", - " Building wheel for merlin-models (PEP 517): started\n", - " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-models: filename=merlin_models-23.2.0+7.ga86201ee-py3-none-any.whl size=374647 sha256=e83a617585afdc41213cc3cf69dd7c136b778260ce9dc14c37e87c4a5675372a\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-uc9xl_m5/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", - "Successfully built merlin-models\n", - "Installing collected packages: merlin-models\n", - " Attempting uninstall: merlin-models\n", - " Found existing installation: merlin-models 0.11.0\n", - " Uninstalling merlin-models-0.11.0:\n", - " Successfully uninstalled merlin-models-0.11.0\n", - "Successfully installed merlin-models-23.2.0+7.ga86201ee\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Previous HEAD position was a86201ee add masking support to SequencePredictRandom transform\n", "Switched to branch 'main'\n" ] }, @@ -145,8 +18,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Your branch is behind 'origin/main' by 75 commits, and can be fast-forwarded.\n", - " (use \"git pull\" to update your local branch)\n" + "Your branch is up to date with 'origin/main'.\n" ] }, { @@ -154,7 +26,8 @@ "output_type": "stream", "text": [ "From https://github.com/NVIDIA-Merlin/Models\n", - " * branch main -> FETCH_HEAD\n" + " * branch main -> FETCH_HEAD\n", + " 835ad186..d8133b8f main -> origin/main\n" ] }, { @@ -472,73 +345,73 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (8.0.0)\n", - "Collecting dask-cuda>=22.12.0\n", - " Downloading dask_cuda-23.4.0-py3-none-any.whl (125 kB)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.56.4)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.12.0)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (11.4.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (22.0)\n", "Collecting fsspec>=2022.7.1\n", " Downloading fsspec-2023.5.0-py3-none-any.whl (160 kB)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.22.4)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (0.56.4)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (3.19.6)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (8.0.0)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.12.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.2.5)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (11.4.1)\n", "Collecting dask>=2022.11.1\n", " Downloading dask-2023.4.1-py3-none-any.whl (1.2 MB)\n", - "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.22.4)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (22.0)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (3.19.6)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.3.5)\n", "Collecting distributed>=2022.11.1\n", " Downloading distributed-2023.4.1-py3-none-any.whl (962 kB)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.2.5)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (4.64.1)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.2.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (5.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.39.1)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.3.0)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.12.0)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (8.1.3)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.2.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.0)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.3.0)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.8.2)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (3.1.2)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.7.0)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (5.9.4)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.0.0)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.4.0)\n", - "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.26.13)\n", - "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.1)\n", - "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.0.4)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.2.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.0.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (3.11.0)\n" + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.3.5)\n", + "Collecting dask-cuda>=22.12.0\n", + " Downloading dask_cuda-23.4.0-py3-none-any.whl (125 kB)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (5.2.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.3.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.2.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (0.12.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (6.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.3.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.2.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (8.1.3)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.26.13)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (5.9.4)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.2.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.4.0)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (6.1)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.7.0)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.0.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.0.0)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (3.1.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.8.2)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (3.11.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (4.1.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.14.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.1.1)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.0.4)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (4.0.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (6.0.4)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.14.0)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (6.0.1)\n", "Building wheels for collected packages: merlin-models, merlin-core, merlin-dataloader\n", " Building wheel for merlin-models (PEP 517): started\n", " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-models: filename=merlin_models-23.5.dev0+12.gd8133b8f-py3-none-any.whl size=343289 sha256=1f20f65acef288535cc4e5bca6de216485c546156d707b17b3bb9b8ceedc3ec7\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-0prgr6hn/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + " Created wheel for merlin-models: filename=merlin_models-0.9.0+157.gd8133b8f-py3-none-any.whl size=343257 sha256=2c9ef3392cbe77d1daad7c766b221d7bec14cc3c18c7b000c9312e00a7d1a16f\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-5qnt9sgn/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", " Building wheel for merlin-core (PEP 517): started\n", " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-core: filename=merlin_core-23.4.0-py3-none-any.whl size=159556 sha256=9a716886c9862c32bd19979d286f32eb664022c85bcee19ca2d762fa014c8e85\n", + " Created wheel for merlin-core: filename=merlin_core-23.4.0-py3-none-any.whl size=159556 sha256=f8418cb4ec8a321feabf92606e7da3f7e6f913de2757c44ea02db38e7ea51494\n", " Stored in directory: /root/.cache/pip/wheels/42/ef/87/2c64bce8c3064a2c4e399933df4eda4838939355698ff8f7c7\n", " Building wheel for merlin-dataloader (PEP 517): started\n", " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-dataloader: filename=merlin_dataloader-23.4.0-py3-none-any.whl size=34732 sha256=a7853a487205c4a6fdf99d03bda0cacba559264387e507e2f8d6cd87dc471c80\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-23.4.0-py3-none-any.whl size=34732 sha256=e09b59834d26dbdb9418925dc395adf47d9ea26c53daea3d18cdb79d5211d04b\n", " Stored in directory: /root/.cache/pip/wheels/90/b0/66/48e52cc29f544ffbd105154b8be0901b5bb80cc85842b778fc\n", "Successfully built merlin-models merlin-core merlin-dataloader\n" ] @@ -587,10 +460,10 @@ " Uninstalling merlin-dataloader-0.0.4:\n", " Successfully uninstalled merlin-dataloader-0.0.4\n", " Attempting uninstall: merlin-models\n", - " Found existing installation: merlin-models 23.2.0+7.ga86201ee\n", - " Uninstalling merlin-models-23.2.0+7.ga86201ee:\n", - " Successfully uninstalled merlin-models-23.2.0+7.ga86201ee\n", - "Successfully installed dask-2023.4.1 dask-cuda-23.4.0 distributed-2023.4.1 fsspec-2023.5.0 merlin-core-23.4.0 merlin-dataloader-23.4.0 merlin-models-23.5.dev0+12.gd8133b8f\n" + " Found existing installation: merlin-models 0.11.0\n", + " Uninstalling merlin-models-0.11.0:\n", + " Successfully uninstalled merlin-models-0.11.0\n", + "Successfully installed dask-2023.4.1 dask-cuda-23.4.0 distributed-2023.4.1 fsspec-2023.5.0 merlin-core-23.4.0 merlin-dataloader-23.4.0 merlin-models-0.9.0+157.gd8133b8f\n" ] }, { @@ -792,49 +665,49 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.2.5)\n", "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.3.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (4.64.1)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.12.0)\n", "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (0.56.4)\n", - "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (23.4.0)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (11.4.1)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (22.0)\n", - "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.4.1)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (3.19.6)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (23.4.0)\n", "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.4.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (3.19.6)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (11.4.1)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.5.0)\n", "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.22.4)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (4.64.1)\n", "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (8.0.0)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.12.0)\n", - "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.5.0)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (0.4.3)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.4.1)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.2.5)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (2022.7)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (2.8.2)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.57.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (45.2.0)\n", "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (5.2.0)\n", "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (45.2.0)\n", "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core==0.9.0+125.ga0bcd30f) (2.2.0)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (3.1.2)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (5.9.4)\n", - "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.26.13)\n", - "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.1)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.0)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.4.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.2.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (8.1.3)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (0.12.0)\n", "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.7.0)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.1)\n", "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.0.0)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (5.9.4)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.4.0)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.26.13)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (3.1.2)\n", "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.0.4)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (8.1.3)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (0.12.0)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (6.0.4)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (4.1.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (1.2.0)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.14.0)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (3.11.0)\n", "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core==0.9.0+125.ga0bcd30f) (1.0.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.1.1)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (4.1.0)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (4.0.0)\n" ] }, @@ -846,8 +719,8 @@ "Building wheels for collected packages: merlin-core\n", " Building wheel for merlin-core (PEP 517): started\n", " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-core: filename=merlin_core-0.9.0+125.ga0bcd30f-py3-none-any.whl size=161449 sha256=57d8552cb7abbed6b1d1b2860391c64e7dfea045c442fc0f94c0fc940aed7e3d\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-0yemn26u/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + " Created wheel for merlin-core: filename=merlin_core-0.9.0+125.ga0bcd30f-py3-none-any.whl size=161449 sha256=0c37c110ad7a9dc1a4721b776063e9d1571a000763322b27df7ea731ae78164e\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-yeu5mq1c/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", "Successfully built merlin-core\n" ] }, @@ -855,7 +728,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "ERROR: merlin-models 23.5.dev0+12.gd8133b8f has requirement merlin-core>=23.4.0, but you'll have merlin-core 0.9.0+125.ga0bcd30f which is incompatible.\n", + "ERROR: merlin-models 0.9.0+157.gd8133b8f has requirement merlin-core>=23.4.0, but you'll have merlin-core 0.9.0+125.ga0bcd30f which is incompatible.\n", "ERROR: merlin-dataloader 23.4.0 has requirement merlin-core>=23.4.0, but you'll have merlin-core 0.9.0+125.ga0bcd30f which is incompatible.\n" ] }, @@ -1011,17 +884,16 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+66.g67136eba) (23.4.0)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+66.g67136eba) (1.9.3)\n", "Processing /root/.cache/pip/wheels/42/ef/87/2c64bce8c3064a2c4e399933df4eda4838939355698ff8f7c7/merlin_core-23.4.0-py3-none-any.whl\n", + "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+66.g67136eba) (23.4.0)\n", "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+66.g67136eba) (1.22.4)\n", "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.56.4)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.5)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (11.4.1)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.12.0)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.0.0)\n", - "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.64.1)\n", - "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n" + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.5)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n" ] }, { @@ -1029,47 +901,48 @@ "output_type": "stream", "text": [ "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.19.6)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.64.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (22.0)\n", "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (23.4.0)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (11.4.1)\n", "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.5.0)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.5)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (22.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.39.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.0.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.5)\n", "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.39.1)\n", "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (5.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.0)\n", "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.0)\n", "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.57.0)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.12.0)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.1.3)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (5.9.4)\n", - "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.4)\n", - "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.1)\n", - "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.26.13)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.1.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.8.2)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0)\n", "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.0)\n", "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.4.0)\n", - "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.1.3)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.4)\n", "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.7.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2022.7)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.12.0)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.26.13)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (5.9.4)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.1)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.1.2)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.0)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.4.3)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.14.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.1.1)\n", "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.1.0)\n", "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0.4)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.1.1)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.14.0)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.0.0)\n", "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0.1)\n", "Building wheels for collected packages: nvtabular\n", " Building wheel for nvtabular (PEP 517): started\n", " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", - " Created wheel for nvtabular: filename=nvtabular-1.6.0+66.g67136eba-cp38-cp38-linux_x86_64.whl size=259850 sha256=957958ecd0f9149dbe203eb5e2a3d1b5ec128421aee4e31572f4ca8574131719\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-btpmur92/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + " Created wheel for nvtabular: filename=nvtabular-1.6.0+66.g67136eba-cp38-cp38-linux_x86_64.whl size=259850 sha256=b7b2ec970d1e905ffca54a11728068e88a5ef40dfcd582124e0d0d1c8ca7d590\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-kfeyyfk1/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", "Successfully built nvtabular\n", "Installing collected packages: merlin-core, nvtabular\n", " Attempting uninstall: merlin-core\n", @@ -1244,78 +1117,78 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n" + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", + "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.28.1)\n", - "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (1.6.0+66.g67136eba)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+98.g2b1b90b) (1.22.4)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+98.g2b1b90b) (1.9.3)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.19.6)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.5)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.5)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.64.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.0.0)\n", + "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", + "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.28.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (22.0)\n", "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (11.4.1)\n", - "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.5.0)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.4.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.0.0)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.64.1)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.56.4)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.22.4)\n", "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.4.1)\n", "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (22.0)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.56.4)\n", "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.12.0)\n", - "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.4.1)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.5)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.5)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.19.6)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.5.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+98.g2b1b90b) (1.9.3)\n", + "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2.8)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (1.26.13)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2019.11.28)\n", - "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2022.7)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.4.3)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.7.0)\n", - "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.9.4)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.1.3)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.12.0)\n", + "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (45.2.0)\n", "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.4)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.9.4)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.7.0)\n", "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.1)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.12.0)\n", "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.1.2)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0)\n", "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.0)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.1.3)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.2.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (45.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.39.1)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.57.0)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.14.0)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.4)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.1)\n" + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.4.3)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.8.2)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata>=4.13.0->dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.11.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.4)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.11.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.1)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.1.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.14.0)\n", "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.1)\n", "Building wheels for collected packages: merlin-systems\n", " Building wheel for merlin-systems (PEP 517): started\n", " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+98.g2b1b90b-py3-none-any.whl size=83152 sha256=282b1d3abe91766660d30dcbfa6d196c7f13d8d7d1b554eefd02455b7cdc1924\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-ojtyyyod/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+98.g2b1b90b-py3-none-any.whl size=83152 sha256=929338ae18fc3ba7e4b48667542c61c8468ba170761cc9e43b7060d9fb636b0a\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-gwpk5ek7/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", "Successfully built merlin-systems\n", "Installing collected packages: merlin-systems\n", " Attempting uninstall: merlin-systems\n", @@ -1430,62 +1303,62 @@ " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", "Requirement already satisfied: merlin-core>=23.04.00 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+72.gd9e97b4) (23.4.0)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.56.4)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.5)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (8.0.0)\n", "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.2.5)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.64.1)\n", - "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.4.1)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (11.4.1)\n", - "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (23.4.0)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.12.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (22.0)\n", - "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.22.4)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (8.0.0)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.5)\n", "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.19.6)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.22.4)\n", "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.5.0)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.12.0)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (23.4.0)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.4.1)\n", "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.4.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (45.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.39.1)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.2.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.56.4)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (11.4.1)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.64.1)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.4.3)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2022.7)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.2.0)\n" + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.4.3)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.9.4)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.0)\n", - "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.4)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.1)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.1.2)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.4.0)\n", "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.7.0)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.12.0)\n", - "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.26.13)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.4)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.1.2)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (8.1.3)\n", - "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.57.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.9.4)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.26.13)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.12.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.4.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.0)\n", + "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.2.0)\n", "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.11.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.14.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0.4)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.39.1)\n", "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0.4)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.14.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.1.1)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.0.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata>=4.13.0->dask>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.11.0)\n", "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.0.0)\n", "Building wheels for collected packages: merlin-dataloader\n", " Building wheel for merlin-dataloader (PEP 517): started\n", " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+72.gd9e97b4-py3-none-any.whl size=34881 sha256=c39b7e146f814713447917029d09f8cf4978202ed3852dce51544461cd074e3b\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-t_njcpzr/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+72.gd9e97b4-py3-none-any.whl size=34881 sha256=3b59ffde476328ed024b3610d55773d48ee2a39a5c9dcc7bc4429f86ecdb3307\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-3z4lu_lg/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", "Successfully built merlin-dataloader\n" ] }, @@ -1494,7 +1367,7 @@ "output_type": "stream", "text": [ "ERROR: nvtabular 1.6.0+66.g67136eba has requirement merlin-dataloader>=23.4.0, but you'll have merlin-dataloader 0.0.2+72.gd9e97b4 which is incompatible.\n", - "ERROR: merlin-models 23.5.dev0+12.gd8133b8f has requirement merlin-dataloader>=23.4.0, but you'll have merlin-dataloader 0.0.2+72.gd9e97b4 which is incompatible.\n" + "ERROR: merlin-models 0.9.0+157.gd8133b8f has requirement merlin-dataloader>=23.4.0, but you'll have merlin-dataloader 0.0.2+72.gd9e97b4 which is incompatible.\n" ] }, { @@ -1511,19 +1384,18 @@ } ], "source": [ - "# %%bash\n", + "%%bash\n", "\n", - "# cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", - "# cd /models && git checkout main && git pull origin main && pip install .\n", - "# cd /core && git checkout main && git pull origin main && pip install .\n", - "# cd /nvtabular && git checkout main && git pull origin main && pip install .\n", - "# cd /systems && git checkout main && git pull origin main && pip install .\n", - "# cd /dataloader && git checkout main && git pull origin main && pip install ." + "cd /models && git checkout main && git pull origin main && pip install .\n", + "cd /core && git checkout main && git pull origin main && pip install .\n", + "cd /nvtabular && git checkout main && git pull origin main && pip install .\n", + "cd /systems && git checkout main && git pull origin main && pip install .\n", + "cd /dataloader && git checkout main && git pull origin main && pip install ." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "e9929dc8", "metadata": {}, "outputs": [ @@ -1533,15 +1405,15 @@ "text": [ "Collecting gdown\n", " Downloading gdown-4.7.1-py3-none-any.whl (15 kB)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", "Collecting PySocks!=1.5.7,>=1.5.6; extra == \"socks\"\n", " Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", @@ -1555,9 +1427,9 @@ "text": [ "Downloading...\n", "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", - "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=b5bb23eb-a2dd-4adc-b7b7-be5687c89aca\n", + "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=c6c52af8-65d0-4308-84a7-f680f5add55c\n", "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", - "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.20MB/s]\n" + "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.16MB/s]\n" ] }, { @@ -1566,25 +1438,25 @@ "text": [ "Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [1009 kB]\n", - "Get:3 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", - "Get:4 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", - "Get:5 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2674 kB]\n", + "Get:3 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", + "Get:5 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1045 kB]\n", "Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", "Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", - "Get:8 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", + "Get:8 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", "Get:9 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [2203 kB]\n", - "Get:10 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", - "Get:11 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", - "Get:12 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", - "Get:13 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1045 kB]\n", - "Get:14 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", - "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2341 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", + "Get:11 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", + "Get:12 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2674 kB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1341 kB]\n", "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3157 kB]\n", - "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2341 kB]\n", "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", - "Fetched 27.3 MB in 9s (2922 kB/s)\n", + "Fetched 27.3 MB in 9s (2917 kB/s)\n", "Reading package lists...\n", "Reading package lists...\n", "Building dependency tree...\n", @@ -1605,17 +1477,17 @@ } ], "source": [ - "# %%bash\n", + "%%bash\n", "\n", - "# rm -rf ecom_dataset\n", - "# mkdir -p ecom_dataset\n", + "rm -rf ecom_dataset\n", + "mkdir -p ecom_dataset\n", "\n", - "# pip install gdown\n", - "# # gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", - "# gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", - "# apt-get update -y\n", - "# apt-get install unzip -y\n", - "# unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + "pip install gdown\n", + "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "apt-get update -y\n", + "apt-get install unzip -y\n", + "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" ] }, { @@ -1874,8 +1746,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "0660887b", + "execution_count": 7, + "id": "fd80de2a", "metadata": {}, "outputs": [], "source": [ @@ -1887,15 +1759,15 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "ec38f1a6", + "execution_count": 8, + "id": "d5a1e610", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2023-05-09 01:50:24.115697: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 02:55:54.458160: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" @@ -1911,7 +1783,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "id": "ceb3ae93", "metadata": {}, "outputs": [ @@ -1926,9 +1798,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-05-09 01:50:26.436605: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:50:26.437013: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:50:26.437158: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" + "2023-05-09 02:55:56.823309: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 02:55:56.823677: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 02:55:56.823805: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" ] }, { @@ -1937,26 +1809,22 @@ "text": [ "[INFO]: sparse_operation_kit is imported\n", "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", - "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", - "[SOK INFO] Initialize finished, communication tool: horovod\n" + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-09 01:50:26.674203: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-05-09 01:50:26.675123: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:50:26.675302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:50:26.675428: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:50:27.455564: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:50:27.455749: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:50:27.455877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:50:27.455980: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", - "2023-05-09 01:50:27.456001: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", - "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" + "ename": "TypeError", + "evalue": "init() got an unexpected keyword argument 'use_legacy_optimizer'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mmm\u001b[39;00m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/__init__.py:34\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcross\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CrossBlock\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdlrm\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DLRMBlock\n\u001b[0;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexperts\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CGCBlock, ExpertsGate, MMOEBlock, PLEBlock\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minteraction\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 36\u001b[0m DotProductInteraction,\n\u001b[1;32m 37\u001b[0m FMBlock,\n\u001b[1;32m 38\u001b[0m FMPairwiseInteraction,\n\u001b[1;32m 39\u001b[0m )\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmlp\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DenseResidualBlock, MLPBlock\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/blocks/experts.py:28\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Block\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcombinators\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 23\u001b[0m ParallelBlock,\n\u001b[1;32m 24\u001b[0m SequentialBlock,\n\u001b[1;32m 25\u001b[0m TabularBlock,\n\u001b[1;32m 26\u001b[0m WithShortcut,\n\u001b[1;32m 27\u001b[0m )\n\u001b[0;32m---> 28\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_task_names_from_outputs\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprediction_tasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ParallelPredictionBlock, PredictionTask\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TabularData\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/models/base.py:51\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprediction\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Prediction, PredictionContext, TensorLike\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtabular\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TabularBlock\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdistributed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbackend\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m hvd, hvd_installed\n\u001b[1;32m 52\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minputs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m InputBlock\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mloader\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Loader\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/distributed/backend.py:33\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m sok_installed:\n\u001b[0;32m---> 33\u001b[0m \u001b[43msok\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minit\u001b[49m\u001b[43m(\u001b[49m\u001b[43muse_legacy_optimizer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: init() got an unexpected keyword argument 'use_legacy_optimizer'" ] } ], @@ -1966,7 +1834,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "11647dd3", "metadata": {}, "outputs": [], @@ -2151,7 +2019,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "076f42cc", + "id": "7baec64f", "metadata": {}, "outputs": [ { @@ -2246,7 +2114,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "febab09e", + "id": "569113e1", "metadata": {}, "outputs": [ { @@ -2326,7 +2194,7 @@ { "cell_type": "code", "execution_count": 19, - "id": "8e0ea1b1", + "id": "2b09261c", "metadata": {}, "outputs": [ { @@ -13647,7 +13515,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "2f5a7984", + "id": "4c62973a", "metadata": {}, "outputs": [], "source": [ @@ -13657,7 +13525,7 @@ { "cell_type": "code", "execution_count": 21, - "id": "dc4df316", + "id": "e5db703a", "metadata": {}, "outputs": [ { @@ -13676,7 +13544,7 @@ { "cell_type": "code", "execution_count": 22, - "id": "f3bfca3f", + "id": "e11f107c", "metadata": {}, "outputs": [], "source": [ @@ -13686,7 +13554,7 @@ { "cell_type": "code", "execution_count": 23, - "id": "7e1b9bbc", + "id": "c216e7fb", "metadata": {}, "outputs": [], "source": [ @@ -13699,7 +13567,7 @@ { "cell_type": "code", "execution_count": 24, - "id": "7ee5f149", + "id": "ea436b46", "metadata": {}, "outputs": [ { @@ -13724,7 +13592,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "81d2b071", + "id": "dcd414a9", "metadata": {}, "outputs": [ { @@ -13744,7 +13612,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "7b24e7fa", + "id": "b6244062", "metadata": {}, "outputs": [ { From 2ed210f0489d108ccd842f63728435ba882e8391 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Tue, 9 May 2023 16:27:38 +1000 Subject: [PATCH 14/15] update --- ...nd_save_model_for_benchmarking-Copy1.ipynb | 15250 +--------------- 1 file changed, 863 insertions(+), 14387 deletions(-) diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb index a9332b9c96..dc41a41849 100644 --- a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb +++ b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb @@ -3,760 +3,133 @@ { "cell_type": "code", "execution_count": 1, - "id": "d062ceda", + "id": "026bd245", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Previous HEAD position was cb431a8a Fix the serialization of `SequenceSummary` block (#927)\n", - "Switched to branch 'main'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Your branch is up to date with 'origin/main'.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "From https://github.com/NVIDIA-Merlin/Models\n", - " * branch main -> FETCH_HEAD\n", - " 835ad186..d8133b8f main -> origin/main\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Updating 835ad186..d8133b8f\n", - "Fast-forward\n", - " .github/workflows/blossom-ci.yml | 102 --\n", - " .github/workflows/check-base-branch.yaml | 9 +\n", - " .github/workflows/cpu-horovod.yml | 53 +\n", - " .github/workflows/cpu-nvtabular.yml | 10 +-\n", - " .github/workflows/cpu-systems.yml | 10 +-\n", - " .github/workflows/cpu-t4r.yml | 41 +\n", - " .github/workflows/datasets.yml | 8 +-\n", - " .github/workflows/docs-build.yaml | 2 +-\n", - " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", - " .github/workflows/gpu-ci.yml | 12 +-\n", - " .github/workflows/implicit.yml | 8 +-\n", - " .github/workflows/lightfm.yml | 14 +-\n", - " .github/workflows/multi-gpu-ci.yml | 34 +\n", - " .github/workflows/packages.yaml | 120 ++\n", - " .github/workflows/pre-commit.yml | 8 +\n", - " .github/workflows/pytorch.yml | 85 +-\n", - " .github/workflows/release-drafter.yaml | 2 +-\n", - " .github/workflows/set-stable-branch.yaml | 10 +\n", - " .github/workflows/tensorflow.yml | 49 +-\n", - " .github/workflows/xgboost.yml | 8 +-\n", - " .pre-commit-config.yaml | 10 +-\n", - " MANIFEST.in | 5 +-\n", - " README.md | 2 +-\n", - " ci/pr.gpu.Jenkinsfile | 2 +-\n", - " conda/recipes/meta.yaml | 17 +-\n", - " docs/README.md | 46 +-\n", - " docs/source/api.rst | 99 +-\n", - " examples/01-Getting-started.ipynb | 101 +-\n", - " ...2-Merlin-Models-and-NVTabular-integration.ipynb | 13 +-\n", - " examples/03-Exploring-different-models.ipynb | 25 +-\n", - " examples/04-Exporting-ranking-models.ipynb | 9 +-\n", - " examples/05-Retrieval-Model.ipynb | 30 +-\n", - " ...-your-own-architecture-with-Merlin-Models.ipynb | 546 +++----\n", - " ...nal-ML-models-using-the-Merlin-Models-API.ipynb | 701 +++++++-\n", - " examples/images/mtl_architectures.png | Bin 0 -> 72404 bytes\n", - " ...ing-of-large-embedding-tables-by-LazyAdam.ipynb | 12 +-\n", - " ...on-based-next-item-prediction-for-fashion.ipynb | 11 +-\n", - " .../entertainment-with-pretrained-embeddings.ipynb | 8 +-\n", - " .../incremental-training-with-layer-freezing.ipynb | 275 ++--\n", - " .../multi-gpu-data-parallel-training.ipynb | 7 +-\n", - " .../multi-gpu/install_sparse_operation_kit.sh | 16 +\n", - " .../usecases/ranking_with_multitask_learning.ipynb | 1718 ++++++++++++++++++++\n", - " ...etrieval-with-hyperparameter-optimization.ipynb | 5 +-\n", - " .../transformers-next-item-prediction.ipynb | 1085 ++++++++----\n", - " .../ecommerce/booking/transformed/schema.pbtxt | 15 +-\n", - " merlin/datasets/ecommerce/small/schema.json | 7 +-\n", - " .../entertainment/movielens/100k/schema.pbtxt | 1 +\n", - " .../entertainment/movielens/1m/schema.pbtxt | 3 +-\n", - " .../entertainment/movielens/25m/schema.pbtxt | 1 +\n", - " .../entertainment/music_streaming/schema.json | 10 +-\n", - " .../entertainment/tenrec_video}/__init__.py | 0\n", - " .../entertainment/tenrec_video/schema.pbtxt | 159 ++\n", - " merlin/datasets/synthetic.py | 104 +-\n", - " .../datasets/testing/sequence_testing/schema.json | 24 +-\n", - " merlin/models/implicit/__init__.py | 115 +-\n", - " merlin/models/io.py | 2 -\n", - " merlin/models/lightfm/__init__.py | 132 +-\n", - " merlin/models/tf/__init__.py | 12 +-\n", - " merlin/models/tf/blocks/dlrm.py | 21 +-\n", - " merlin/models/tf/blocks/experts.py | 33 +-\n", - " merlin/models/tf/blocks/optimizer.py | 74 +-\n", - " merlin/models/tf/blocks/retrieval/base.py | 1 -\n", - " merlin/models/tf/core/aggregation.py | 87 +-\n", - " merlin/models/tf/core/combinators.py | 6 +-\n", - " merlin/models/tf/core/encoder.py | 54 +-\n", - " merlin/models/tf/core/tabular.py | 3 +-\n", - " merlin/models/tf/distributed/backend.py | 20 +\n", - " merlin/models/tf/distributed/embedding.py | 232 +++\n", - " merlin/models/tf/experimental/sample_weight.py | 177 ++\n", - " merlin/models/tf/inputs/base.py | 26 +-\n", - " merlin/models/tf/inputs/continuous.py | 41 +-\n", - " merlin/models/tf/inputs/embedding.py | 138 +-\n", - " merlin/models/tf/loader.py | 36 +-\n", - " merlin/models/tf/metrics/__init__.py | 31 +-\n", - " merlin/models/tf/metrics/evaluation.py | 4 +-\n", - " merlin/models/tf/metrics/topk.py | 17 +-\n", - " merlin/models/tf/models/base.py | 887 +++++++---\n", - " merlin/models/tf/models/benchmark.py | 20 +-\n", - " merlin/models/tf/models/ranking.py | 93 +-\n", - " merlin/models/tf/models/retrieval.py | 5 +\n", - " merlin/models/tf/models/utils.py | 38 +\n", - " merlin/models/tf/outputs/base.py | 27 +-\n", - " merlin/models/tf/outputs/block.py | 300 ++++\n", - " merlin/models/tf/outputs/classification.py | 14 +-\n", - " merlin/models/tf/outputs/contrastive.py | 65 +-\n", - " merlin/models/tf/outputs/regression.py | 8 +-\n", - " merlin/models/tf/outputs/sampling/base.py | 34 +-\n", - " merlin/models/tf/outputs/sampling/popularity.py | 93 +-\n", - " merlin/models/tf/outputs/topk.py | 2 -\n", - " merlin/models/tf/prediction_tasks/base.py | 15 +\n", - " .../models/tf/prediction_tasks/classification.py | 11 +-\n", - " merlin/models/tf/prediction_tasks/regression.py | 3 +-\n", - " merlin/models/tf/transformers/block.py | 61 +-\n", - " merlin/models/tf/transformers/transforms.py | 52 +-\n", - " merlin/models/tf/transforms/bias.py | 18 +-\n", - " merlin/models/tf/transforms/features.py | 579 +++++--\n", - " merlin/models/tf/transforms/negative_sampling.py | 25 +-\n", - " merlin/models/tf/transforms/sequence.py | 523 ++++--\n", - " merlin/models/tf/transforms/tensor.py | 249 +--\n", - " merlin/models/tf/utils/batch_utils.py | 8 +-\n", - " merlin/models/tf/utils/testing_utils.py | 81 +-\n", - " merlin/models/tf/utils/tf_utils.py | 85 +-\n", - " merlin/models/torch/__init__.py | 97 --\n", - " merlin/models/torch/block/base.py | 321 ----\n", - " merlin/models/torch/block/mlp.py | 95 --\n", - " merlin/models/torch/features/base.py | 23 -\n", - " merlin/models/torch/features/continuous.py | 66 -\n", - " merlin/models/torch/features/embedding.py | 497 ------\n", - " merlin/models/torch/features/tabular.py | 217 ---\n", - " merlin/models/torch/losses.py | 75 -\n", - " merlin/models/torch/model/__init__.py | 15 -\n", - " merlin/models/torch/model/base.py | 660 --------\n", - " merlin/models/torch/model/prediction_task.py | 101 --\n", - " merlin/models/torch/tabular/__init__.py | 15 -\n", - " merlin/models/torch/tabular/aggregation.py | 149 --\n", - " merlin/models/torch/tabular/base.py | 640 --------\n", - " merlin/models/torch/tabular/transformations.py | 124 --\n", - " merlin/models/torch/typing.py | 30 -\n", - " merlin/models/torch/utils/__init__.py | 15 -\n", - " merlin/models/torch/utils/data_utils.py | 376 -----\n", - " merlin/models/torch/utils/examples_utils.py | 107 --\n", - " merlin/models/torch/utils/torch_utils.py | 210 ---\n", - " merlin/models/utils/dataset.py | 59 +-\n", - " merlin/models/utils/misc_utils.py | 7 +-\n", - " merlin/models/utils/nvt_utils.py | 6 +-\n", - " merlin/models/utils/schema_utils.py | 24 +-\n", - " merlin/models/xgb/__init__.py | 1 -\n", - " pytest.ini | 15 +\n", - " requirements/base.txt | 4 +-\n", - " requirements/docs.txt | 3 +-\n", - " requirements/horovod-cpu-environment.yml | 18 +\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " requirements/horovod.txt | 1 +\n", - " requirements/tensorflow.txt | 2 +-\n", - " requirements/test.txt | 2 +-\n", - " requirements/transformers.txt | 2 +-\n", - " tests/common/tf/retrieval/retrieval_utils.py | 4 +-\n", - " tests/integration/tf/test_ci_01_getting_started.py | 20 +-\n", - " .../tf/test_ci_03_exploring_different_models.py | 8 +-\n", - " .../tf/test_ci_06_advanced_own_architecture.py | 8 +-\n", - " tests/unit/datasets/test_ecommerce.py | 27 +-\n", - " tests/unit/datasets/test_synthetic.py | 15 +-\n", - " tests/unit/implicit/test_implicit.py | 60 +-\n", - " tests/unit/lightfm/test_lightfm.py | 68 +\n", - " .../blocks/retrieval/test_matrix_factorization.py | 7 +-\n", - " tests/unit/tf/blocks/retrieval/test_two_tower.py | 9 +-\n", - " tests/unit/tf/blocks/test_cross.py | 2 -\n", - " tests/unit/tf/blocks/test_interactions.py | 6 +-\n", - " tests/unit/tf/blocks/test_mlp.py | 39 +\n", - " tests/unit/tf/blocks/test_optimizer.py | 64 +-\n", - " tests/unit/tf/core/test_base.py | 5 +-\n", - " tests/unit/tf/core/test_combinators.py | 1 +\n", - " tests/unit/tf/core/test_encoder.py | 6 +-\n", - " tests/unit/tf/core/test_prediction.py | 2 +-\n", - " tests/unit/tf/examples/test_01_getting_started.py | 8 +-\n", - " .../examples/test_03_exploring_different_models.py | 8 +-\n", - " ...test_usecase_accelerate_training_by_lazyadam.py | 1 +\n", - " ..._usecase_incremental_training_layer_freezing.py | 2 +-\n", - " ...test_usecase_ranking_with_multitask_learning.py | 46 +\n", - " ...st_usecase_transformers_next_item_prediction.py | 36 +-\n", - " .../unit/tf/experimental}/__init__.py | 0\n", - " tests/unit/tf/experimental/test_sample_weight.py | 112 ++\n", - " tests/unit/tf/horovod/__init__.py | 2 +-\n", - " tests/unit/tf/horovod/test_embedding.py | 46 +\n", - " tests/unit/tf/horovod/test_horovod.py | 10 +-\n", - " tests/unit/tf/inputs/test_base.py | 2 +-\n", - " tests/unit/tf/inputs/test_block.py | 202 +++\n", - " tests/unit/tf/inputs/test_continuous.py | 4 +-\n", - " tests/unit/tf/inputs/test_embedding.py | 41 +-\n", - " tests/unit/tf/inputs/test_tabular.py | 10 +-\n", - " tests/unit/tf/metrics/test_metrics_topk.py | 2 -\n", - " tests/unit/tf/models/test_base.py | 93 +-\n", - " tests/unit/tf/models/test_benchmark.py | 13 +-\n", - " tests/unit/tf/models/test_ranking.py | 103 +-\n", - " tests/unit/tf/models/test_retrieval.py | 35 +-\n", - " tests/unit/tf/outputs/test_base.py | 78 +-\n", - " tests/unit/tf/outputs/test_block.py | 936 +++++++++++\n", - " tests/unit/tf/outputs/test_classification.py | 69 +-\n", - " tests/unit/tf/outputs/test_contrastive.py | 28 +-\n", - " tests/unit/tf/outputs/test_sampling.py | 17 +-\n", - " tests/unit/tf/prediction_tasks/test_multi_task.py | 281 +++-\n", - " tests/unit/tf/test_loader.py | 28 +-\n", - " tests/unit/tf/transformers/test_block.py | 187 ++-\n", - " tests/unit/tf/transforms/test_features.py | 123 +-\n", - " tests/unit/tf/transforms/test_negative_sampling.py | 63 +-\n", - " tests/unit/tf/transforms/test_noise.py | 1 -\n", - " tests/unit/tf/transforms/test_sequence.py | 55 +-\n", - " tests/unit/tf/transforms/test_tensor.py | 20 +-\n", - " tests/unit/tf/utils/test_batch.py | 20 +-\n", - " tests/unit/torch/__init__.py | 18 -\n", - " tests/unit/torch/_conftest.py | 151 --\n", - " tests/unit/torch/block/__init__.py | 15 -\n", - " tests/unit/torch/block/test_base.py | 62 -\n", - " tests/unit/torch/block/test_mlp.py | 30 -\n", - " tests/unit/torch/features/__init__.py | 15 -\n", - " tests/unit/torch/features/test_continuous.py | 34 -\n", - " tests/unit/torch/features/test_embedding.py | 250 ---\n", - " tests/unit/torch/features/test_tabular.py | 84 -\n", - " tests/unit/torch/model/__init__.py | 15 -\n", - " tests/unit/torch/model/test_head.py | 92 --\n", - " tests/unit/torch/model/test_model.py | 122 --\n", - " tests/unit/torch/tabular/__init__.py | 15 -\n", - " tests/unit/torch/tabular/test_aggregation.py | 106 --\n", - " tests/unit/torch/tabular/test_tabular.py | 88 -\n", - " tests/unit/torch/tabular/test_transformations.py | 122 --\n", - " tests/unit/torch/test_dataloader_utils.py | 86 -\n", - " tests/unit/torch/test_losses.py | 53 -\n", - " tests/unit/torch/test_public_api.py | 27 -\n", - " tests/unit/torch/utils/__init__.py | 15 -\n", - " tests/unit/xgb/test_xgboost.py | 2 +-\n", - " tox.ini | 78 +-\n", - " 210 files changed, 10688 insertions(+), 8019 deletions(-)\n", - " delete mode 100644 .github/workflows/blossom-ci.yml\n", - " create mode 100644 .github/workflows/check-base-branch.yaml\n", - " create mode 100644 .github/workflows/cpu-horovod.yml\n", - " create mode 100644 .github/workflows/cpu-t4r.yml\n", - " create mode 100644 .github/workflows/multi-gpu-ci.yml\n", - " create mode 100644 .github/workflows/packages.yaml\n", - " create mode 100644 .github/workflows/set-stable-branch.yaml\n", - " create mode 100644 examples/images/mtl_architectures.png\n", - " create mode 100644 examples/usecases/multi-gpu/install_sparse_operation_kit.sh\n", - " create mode 100644 examples/usecases/ranking_with_multitask_learning.ipynb\n", - " rename merlin/{models/torch/block => datasets/entertainment/tenrec_video}/__init__.py (100%)\n", - " create mode 100644 merlin/datasets/entertainment/tenrec_video/schema.pbtxt\n", - " create mode 100644 merlin/models/tf/distributed/embedding.py\n", - " create mode 100644 merlin/models/tf/experimental/sample_weight.py\n", - " create mode 100644 merlin/models/tf/outputs/block.py\n", - " delete mode 100644 merlin/models/torch/__init__.py\n", - " delete mode 100644 merlin/models/torch/block/base.py\n", - " delete mode 100644 merlin/models/torch/block/mlp.py\n", - " delete mode 100644 merlin/models/torch/features/base.py\n", - " delete mode 100644 merlin/models/torch/features/continuous.py\n", - " delete mode 100644 merlin/models/torch/features/embedding.py\n", - " delete mode 100644 merlin/models/torch/features/tabular.py\n", - " delete mode 100644 merlin/models/torch/losses.py\n", - " delete mode 100644 merlin/models/torch/model/__init__.py\n", - " delete mode 100644 merlin/models/torch/model/base.py\n", - " delete mode 100644 merlin/models/torch/model/prediction_task.py\n", - " delete mode 100644 merlin/models/torch/tabular/__init__.py\n", - " delete mode 100644 merlin/models/torch/tabular/aggregation.py\n", - " delete mode 100644 merlin/models/torch/tabular/base.py\n", - " delete mode 100644 merlin/models/torch/tabular/transformations.py\n", - " delete mode 100644 merlin/models/torch/typing.py\n", - " delete mode 100644 merlin/models/torch/utils/__init__.py\n", - " delete mode 100644 merlin/models/torch/utils/data_utils.py\n", - " delete mode 100644 merlin/models/torch/utils/examples_utils.py\n", - " delete mode 100644 merlin/models/torch/utils/torch_utils.py\n", - " create mode 100644 pytest.ini\n", - " create mode 100644 requirements/horovod-cpu-environment.yml\n", - " create mode 100644 tests/unit/tf/examples/test_usecase_ranking_with_multitask_learning.py\n", - " rename {merlin/models/torch/features => tests/unit/tf/experimental}/__init__.py (100%)\n", - " create mode 100644 tests/unit/tf/experimental/test_sample_weight.py\n", - " create mode 100644 tests/unit/tf/horovod/test_embedding.py\n", - " create mode 100644 tests/unit/tf/inputs/test_block.py\n", - " create mode 100644 tests/unit/tf/outputs/test_block.py\n", - " delete mode 100644 tests/unit/torch/__init__.py\n", - " delete mode 100644 tests/unit/torch/_conftest.py\n", - " delete mode 100644 tests/unit/torch/block/__init__.py\n", - " delete mode 100644 tests/unit/torch/block/test_base.py\n", - " delete mode 100644 tests/unit/torch/block/test_mlp.py\n", - " delete mode 100644 tests/unit/torch/features/__init__.py\n", - " delete mode 100644 tests/unit/torch/features/test_continuous.py\n", - " delete mode 100644 tests/unit/torch/features/test_embedding.py\n", - " delete mode 100644 tests/unit/torch/features/test_tabular.py\n", - " delete mode 100644 tests/unit/torch/model/__init__.py\n", - " delete mode 100644 tests/unit/torch/model/test_head.py\n" + "From https://github.com/NVIDIA-Merlin/core\n", + " * [new branch] feature/merlin-array-dispatch -> origin/feature/merlin-array-dispatch\n", + " * [new branch] fix-repartition -> origin/fix-repartition\n", + " * [new branch] fix-with-properties -> origin/fix-with-properties\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] laiacano/docs-on-pr -> origin/laiacano/docs-on-pr\n", + " * [new branch] main -> origin/main\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-23.04 -> origin/release-23.04\n", + " * [new branch] revert-163-refactor/dictarray-columns -> origin/revert-163-refactor/dictarray-columns\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] tags-intersection -> origin/tags-intersection\n", + " * [new branch] v0.2.0-docs -> origin/v0.2.0-docs\n", + " * [new tag] v0.10.0 -> v0.10.0\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v23.02.01 -> v23.02.01\n", + " * [new tag] v23.04.00 -> v23.04.00\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.1.1 -> v0.1.1\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n", + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Previous HEAD position was a824ab7a import pytest\n", + "Switched to a new branch 'main'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " delete mode 100644 tests/unit/torch/model/test_model.py\n", - " delete mode 100644 tests/unit/torch/tabular/__init__.py\n", - " delete mode 100644 tests/unit/torch/tabular/test_aggregation.py\n", - " delete mode 100644 tests/unit/torch/tabular/test_tabular.py\n", - " delete mode 100644 tests/unit/torch/tabular/test_transformations.py\n", - " delete mode 100644 tests/unit/torch/test_dataloader_utils.py\n", - " delete mode 100644 tests/unit/torch/test_losses.py\n", - " delete mode 100644 tests/unit/torch/test_public_api.py\n", - " delete mode 100644 tests/unit/torch/utils/__init__.py\n", - "Processing /models\n", - " Installing build dependencies: started\n", - " Installing build dependencies: finished with status 'done'\n", - " Getting requirements to build wheel: started\n", - " Getting requirements to build wheel: finished with status 'done'\n", - " Preparing wheel metadata: started\n", - " Preparing wheel metadata: finished with status 'done'\n", - "Collecting merlin-core>=23.4.0\n", - " Downloading merlin-core-23.4.0.tar.gz (133 kB)\n", - " Installing build dependencies: started\n", - " Installing build dependencies: finished with status 'done'\n", - " Getting requirements to build wheel: started\n", - " Getting requirements to build wheel: finished with status 'done'\n", - " Preparing wheel metadata: started\n", - " Preparing wheel metadata: finished with status 'done'\n", - "Collecting merlin-dataloader>=23.4.0\n", - " Downloading merlin-dataloader-23.4.0.tar.gz (46 kB)\n", + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /core\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", " Getting requirements to build wheel: started\n", " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (22.0)\n", - "Collecting fsspec>=2022.7.1\n", - " Downloading fsspec-2023.5.0-py3-none-any.whl (160 kB)\n", - "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.22.4)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (0.56.4)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (3.19.6)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (4.64.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (8.0.0)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.12.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.2.5)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (11.4.1)\n", - "Collecting dask>=2022.11.1\n", - " Downloading dask-2023.4.1-py3-none-any.whl (1.2 MB)\n", - "Collecting distributed>=2022.11.1\n", - " Downloading distributed-2023.4.1-py3-none-any.whl (962 kB)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.3.5)\n", - "Collecting dask-cuda>=22.12.0\n", - " Downloading dask_cuda-23.4.0-py3-none-any.whl (125 kB)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (5.2.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.3.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.2.0)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (0.12.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (6.0)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.3.0)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.2.0)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (8.1.3)\n", - "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.26.13)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (5.9.4)\n", - "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.2.0)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.4.0)\n", - "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (6.1)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.7.0)\n", - "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.0.4)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.0.0)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (3.1.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.8.2)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (3.11.0)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (4.1.0)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (6.0.4)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (2.1.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (1.14.0)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (4.0.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==0.9.0+157.gd8133b8f) (6.0.1)\n", - "Building wheels for collected packages: merlin-models, merlin-core, merlin-dataloader\n", - " Building wheel for merlin-models (PEP 517): started\n", - " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-models: filename=merlin_models-0.9.0+157.gd8133b8f-py3-none-any.whl size=343257 sha256=2c9ef3392cbe77d1daad7c766b221d7bec14cc3c18c7b000c9312e00a7d1a16f\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-5qnt9sgn/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + "Building wheels for collected packages: merlin-core\n", " Building wheel for merlin-core (PEP 517): started\n", " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-core: filename=merlin_core-23.4.0-py3-none-any.whl size=159556 sha256=f8418cb4ec8a321feabf92606e7da3f7e6f913de2757c44ea02db38e7ea51494\n", - " Stored in directory: /root/.cache/pip/wheels/42/ef/87/2c64bce8c3064a2c4e399933df4eda4838939355698ff8f7c7\n", - " Building wheel for merlin-dataloader (PEP 517): started\n", - " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-dataloader: filename=merlin_dataloader-23.4.0-py3-none-any.whl size=34732 sha256=e09b59834d26dbdb9418925dc395adf47d9ea26c53daea3d18cdb79d5211d04b\n", - " Stored in directory: /root/.cache/pip/wheels/90/b0/66/48e52cc29f544ffbd105154b8be0901b5bb80cc85842b778fc\n", - "Successfully built merlin-models merlin-core merlin-dataloader\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty requires cupy-cuda118<12,>=9.5.0, which is not installed.\n", - "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty requires cupy-cuda118<12,>=9.5.0, which is not installed.\n", - "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement dask==2022.7.1, but you'll have dask 2023.4.1 which is incompatible.\n", - "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement distributed==2022.7.1, but you'll have distributed 2023.4.1 which is incompatible.\n", - "ERROR: dask-cuda 23.4.0 has requirement dask==2023.3.2, but you'll have dask 2023.4.1 which is incompatible.\n", - "ERROR: dask-cuda 23.4.0 has requirement distributed==2023.3.2.1, but you'll have distributed 2023.4.1 which is incompatible.\n", - "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement cuda-python<11.7.1,>=11.5, but you'll have cuda-python 11.8.1 which is incompatible.\n", - "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement protobuf<3.21.0a0,>=3.20.1, but you'll have protobuf 3.19.6 which is incompatible.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Installing collected packages: fsspec, dask, distributed, dask-cuda, merlin-core, merlin-dataloader, merlin-models\n", - " Attempting uninstall: fsspec\n", - " Found existing installation: fsspec 2022.5.0\n", - " Uninstalling fsspec-2022.5.0:\n", - " Successfully uninstalled fsspec-2022.5.0\n", - " Attempting uninstall: dask\n", - " Found existing installation: dask 2022.7.1\n", - " Uninstalling dask-2022.7.1:\n", - " Successfully uninstalled dask-2022.7.1\n", - " Attempting uninstall: distributed\n", - " Found existing installation: distributed 2022.7.1\n", - " Uninstalling distributed-2022.7.1:\n", - " Successfully uninstalled distributed-2022.7.1\n", - " Attempting uninstall: dask-cuda\n", - " Found existing installation: dask-cuda 22.8.0a0+36.g9860cad\n", - " Uninstalling dask-cuda-22.8.0a0+36.g9860cad:\n", - " Successfully uninstalled dask-cuda-22.8.0a0+36.g9860cad\n", + " Created wheel for merlin-core: filename=merlin_core-23.5.dev0+21.ga0bcd30f-py3-none-any.whl size=161483 sha256=46bd0c2ab8672b0d312287b28514d5dc920be76bc42454a91acdc29c3e603e45\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ynb25ulq/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + "Successfully built merlin-core\n", + "Installing collected packages: merlin-core\n", " Attempting uninstall: merlin-core\n", - " Found existing installation: merlin-core 0.10.0\n", - " Uninstalling merlin-core-0.10.0:\n", - " Successfully uninstalled merlin-core-0.10.0\n", - " Attempting uninstall: merlin-dataloader\n", - " Found existing installation: merlin-dataloader 0.0.4\n", - " Uninstalling merlin-dataloader-0.0.4:\n", - " Successfully uninstalled merlin-dataloader-0.0.4\n", - " Attempting uninstall: merlin-models\n", - " Found existing installation: merlin-models 0.11.0\n", - " Uninstalling merlin-models-0.11.0:\n", - " Successfully uninstalled merlin-models-0.11.0\n", - "Successfully installed dask-2023.4.1 dask-cuda-23.4.0 distributed-2023.4.1 fsspec-2023.5.0 merlin-core-23.4.0 merlin-dataloader-23.4.0 merlin-models-0.9.0+157.gd8133b8f\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Previous HEAD position was 2fc6889 add schema parameter to the `repartition` method (#192)\n", - "Switched to branch 'main'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Your branch is up to date with 'origin/main'.\n" + " Found existing installation: merlin-core 23.2.0\n", + " Uninstalling merlin-core-23.2.0:\n", + " Successfully uninstalled merlin-core-23.2.0\n", + "Successfully installed merlin-core-23.5.dev0+21.ga0bcd30f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "From https://github.com/NVIDIA-Merlin/core\n", - " * branch main -> FETCH_HEAD\n", - " cd96ca5f..a0bcd30f main -> origin/main\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Updating cd96ca5f..a0bcd30f\n", - "Fast-forward\n", - " .github/actionlint.yaml | 5 +\n", - " .github/release-drafter.yml | 44 +--\n", - " .github/workflows/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", - " .../ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", - " .../workflows/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", - " .../workflows/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", - " .github/workflows/ISSUE_TEMPLATE/task.md | 5 +-\n", - " .github/workflows/check-base-branch.yaml | 9 +\n", - " .github/workflows/cpu-ci.yml | 138 +-------\n", - " .github/workflows/cpu-models.yml | 44 ---\n", - " .github/workflows/cpu-nvtabular.yml | 44 ---\n", - " .github/workflows/cpu-systems.yml | 44 ---\n", - " .github/workflows/docs-preview-pr.yaml | 4 +-\n", - " .github/workflows/docs-sched-rebuild.yaml | 51 +--\n", - " .github/workflows/gpu-ci.yml | 52 ++-\n", - " .github/workflows/lint.yaml | 11 +-\n", - " .github/workflows/merlin.yml | 35 ++\n", - " .github/workflows/packages.yaml | 154 ++++++++\n", - " .github/workflows/release-drafter.yaml | 4 +-\n", - " .github/workflows/set-stable-branch.yaml | 10 +\n", - " .github/workflows/tox.yml | 38 ++\n", - " .pre-commit-config.yaml | 55 +--\n", - " .prettierignore | 2 +\n", - " CLA.md | 9 +-\n", - " CONTRIBUTING.md | 28 +-\n", - " README.md | 68 ++--\n", - " ci/pr.gpu.Jenkinsfile | 2 +-\n", - " conda/recipe/meta.yaml | 4 +-\n", - " docs/README.md | 49 ++-\n", - " merlin/core/compat/__init__.py | 143 ++++++++\n", - " merlin/core/compat/tensorflow.py | 92 +++++\n", - " merlin/core/compat/torch.py | 22 ++\n", - " merlin/core/dispatch.py | 245 ++++++++-----\n", - " merlin/core/has_gpu.py | 46 +++\n", - " merlin/core/utils.py | 88 +----\n", - " merlin/dag/__init__.py | 1 +\n", - " merlin/dag/base_operator.py | 30 +-\n", - " merlin/dag/dictarray.py | 3 +-\n", - " merlin/dag/executors.py | 242 +++++++------\n", - " merlin/dag/graph.py | 20 ++\n", - " merlin/dag/node.py | 5 +-\n", - " merlin/dag/selector.py | 10 +-\n", - " merlin/dag/utils.py | 69 ++++\n", - " merlin/dispatch/lazy.py | 156 +++++++++\n", - " merlin/dtypes/__init__.py | 61 ++++\n", - " merlin/dtypes/aliases.py | 53 +++\n", - " merlin/dtypes/base.py | 179 ++++++++++\n", - " merlin/dtypes/mapping.py | 177 ++++++++++\n", - " .../compat.py => dtypes/mappings/__init__.py} | 17 +-\n", - " merlin/dtypes/mappings/cudf.py | 61 ++++\n", - " merlin/dtypes/mappings/merlin.py | 51 +++\n", - " merlin/dtypes/mappings/numpy.py | 52 +++\n", - " merlin/dtypes/mappings/pandas.py | 38 ++\n", - " merlin/dtypes/mappings/python.py | 28 ++\n", - " merlin/dtypes/mappings/tf.py | 52 +++\n", - " merlin/dtypes/mappings/torch.py | 43 +++\n", - " merlin/dtypes/mappings/triton.py | 53 +++\n", - " merlin/dtypes/registry.py | 136 ++++++++\n", - " merlin/dtypes/shape.py | 200 +++++++++++\n", - " merlin/io/__init__.py | 2 +-\n", - " merlin/io/avro.py | 6 +-\n", - " merlin/io/csv.py | 9 +-\n", - " merlin/io/dask.py | 74 +++-\n", - " merlin/io/dataframe_engine.py | 6 +-\n", - " merlin/io/dataset.py | 112 ++++--\n", - " merlin/io/fsspec_utils.py | 16 +-\n", - " merlin/io/parquet.py | 25 +-\n", - " merlin/io/shuffle.py | 13 +-\n", - " merlin/io/worker.py | 104 +++---\n", - " merlin/io/writer.py | 7 +-\n", - " merlin/io/writer_factory.py | 10 +-\n", - " merlin/schema/io/tensorflow_metadata.py | 115 ++++--\n", - " merlin/schema/schema.py | 331 +++++++++++-------\n", - " merlin/schema/tags.py | 7 +-\n", - " merlin/table/__init__.py | 24 ++\n", - " merlin/table/conversions.py | 226 ++++++++++++\n", - " merlin/table/cupy_column.py | 108 ++++++\n", - " merlin/table/numpy_column.py | 122 +++++++\n", - " merlin/table/tensor_column.py | 261 ++++++++++++++\n", - " merlin/table/tensor_table.py | 294 ++++++++++++++++\n", - " merlin/table/tensorflow_column.py | 173 +++++++++\n", - " merlin/table/torch_column.py | 135 +++++++\n", - " requirements-gpu.txt | 2 +-\n", - " requirements.txt | 13 +-\n", - " tests/conftest.py | 35 +-\n", - " tests/unit/core/test_dispatch.py | 43 ++-\n", - " tests/unit/core/test_protocols.py | 10 +-\n", - " tests/unit/core/test_version.py | 2 +\n", - " tests/unit/dag/test_column_selector.py | 6 +\n", - " tests/unit/dag/test_dag_utils.py | 31 ++\n", - " tests/unit/dispatch/test_lazy_dispatch.py | 61 ++++\n", - " tests/unit/dtypes/test_cudf.py | 30 ++\n", - " tests/unit/dtypes/test_module.py | 61 ++++\n", - " tests/unit/dtypes/test_shape.py | 222 ++++++++++++\n", - " tests/unit/io/test_avro.py | 8 +-\n", - " tests/unit/io/test_dataset.py | 51 +++\n", - " tests/unit/io/test_io.py | 95 ++++-\n", - " tests/unit/io/test_worker.py | 142 ++++++++\n", - " tests/unit/schema/test_column_schemas.py | 142 +++++---\n", - " tests/unit/schema/test_schema.py | 60 +++-\n", - " tests/unit/schema/test_schema_io.py | 54 ++-\n", - " tests/unit/table/test_convert_column.py | 164 +++++++++\n", - " tests/unit/table/test_tensor_column.py | 262 ++++++++++++++\n", - " tests/unit/table/test_tensor_table.py | 387 +++++++++++++++++++++\n", - " tests/unit/utils/test_utils.py | 16 +-\n", - " tox.ini | 49 ++-\n", - " 106 files changed, 6299 insertions(+), 1146 deletions(-)\n", - " create mode 100644 .github/actionlint.yaml\n", - " create mode 100644 .github/workflows/check-base-branch.yaml\n", - " delete mode 100644 .github/workflows/cpu-models.yml\n", - " delete mode 100644 .github/workflows/cpu-nvtabular.yml\n", - " delete mode 100644 .github/workflows/cpu-systems.yml\n", - " create mode 100644 .github/workflows/merlin.yml\n", - " create mode 100644 .github/workflows/packages.yaml\n", - " create mode 100644 .github/workflows/set-stable-branch.yaml\n", - " create mode 100644 .github/workflows/tox.yml\n", - " create mode 100644 .prettierignore\n", - " create mode 100644 merlin/core/compat/__init__.py\n", - " create mode 100644 merlin/core/compat/tensorflow.py\n", - " create mode 100644 merlin/core/compat/torch.py\n", - " create mode 100644 merlin/core/has_gpu.py\n", - " create mode 100644 merlin/dag/utils.py\n", - " create mode 100644 merlin/dispatch/lazy.py\n", - " create mode 100644 merlin/dtypes/__init__.py\n", - " create mode 100644 merlin/dtypes/aliases.py\n", - " create mode 100644 merlin/dtypes/base.py\n", - " create mode 100644 merlin/dtypes/mapping.py\n", - " rename merlin/{core/compat.py => dtypes/mappings/__init__.py} (60%)\n", - " create mode 100644 merlin/dtypes/mappings/cudf.py\n", - " create mode 100644 merlin/dtypes/mappings/merlin.py\n", - " create mode 100644 merlin/dtypes/mappings/numpy.py\n", - " create mode 100644 merlin/dtypes/mappings/pandas.py\n", - " create mode 100644 merlin/dtypes/mappings/python.py\n", - " create mode 100644 merlin/dtypes/mappings/tf.py\n", - " create mode 100644 merlin/dtypes/mappings/torch.py\n" + "From https://github.com/NVIDIA-Merlin/dataloader\n", + " * [new branch] chore/comprehensive-shapes -> origin/chore/comprehensive-shapes\n", + " * [new branch] chore/packages-action -> origin/chore/packages-action\n", + " * [new branch] collabify_examples -> origin/collabify_examples\n", + " * [new branch] docs-add-seo -> origin/docs-add-seo\n", + " * [new branch] docs-calver-banner -> origin/docs-calver-banner\n", + " * [new branch] ds-api -> origin/ds-api\n", + " * [new branch] feature/embedding-tags -> origin/feature/embedding-tags\n", + " * [new branch] fix-sparse-logic -> origin/fix-sparse-logic\n", + " * [new branch] fix/tf-batch-size-warning -> origin/fix/tf-batch-size-warning\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] gha-test -> origin/gha-test\n", + " * [new branch] laiacano/docs-pr -> origin/laiacano/docs-pr\n", + " * [new branch] main -> origin/main\n", + " * [new branch] no_gpu -> origin/no_gpu\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-23.04 -> origin/release-23.04\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] update_github_actions -> origin/update_github_actions\n", + " * [new tag] v0.0.3 -> v0.0.3\n", + " * [new tag] v0.0.4 -> v0.0.4\n", + " * [new tag] v23.02.01 -> v23.02.01\n", + " * [new tag] v23.04.00 -> v23.04.00\n", + " * [new tag] v0.0.1 -> v0.0.1\n", + " * [new tag] v0.0.2 -> v0.0.2\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Previous HEAD position was 02aad21 Replace `nnzs` with `row_lengths` for clarity (#99)\n", + "Switched to a new branch 'main'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " create mode 100644 merlin/dtypes/mappings/triton.py\n", - " create mode 100644 merlin/dtypes/registry.py\n", - " create mode 100644 merlin/dtypes/shape.py\n", - " create mode 100644 merlin/table/__init__.py\n", - " create mode 100644 merlin/table/conversions.py\n", - " create mode 100644 merlin/table/cupy_column.py\n", - " create mode 100644 merlin/table/numpy_column.py\n", - " create mode 100644 merlin/table/tensor_column.py\n", - " create mode 100644 merlin/table/tensor_table.py\n", - " create mode 100644 merlin/table/tensorflow_column.py\n", - " create mode 100644 merlin/table/torch_column.py\n", - " create mode 100644 tests/unit/dag/test_dag_utils.py\n", - " create mode 100644 tests/unit/dispatch/test_lazy_dispatch.py\n", - " create mode 100644 tests/unit/dtypes/test_cudf.py\n", - " create mode 100644 tests/unit/dtypes/test_module.py\n", - " create mode 100644 tests/unit/dtypes/test_shape.py\n", - " create mode 100644 tests/unit/io/test_dataset.py\n", - " create mode 100644 tests/unit/io/test_worker.py\n", - " create mode 100644 tests/unit/table/test_convert_column.py\n", - " create mode 100644 tests/unit/table/test_tensor_column.py\n", - " create mode 100644 tests/unit/table/test_tensor_table.py\n", - "Processing /core\n", + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /dataloader\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", " Getting requirements to build wheel: started\n", " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.3.5)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (4.64.1)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.12.0)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (0.56.4)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (22.0)\n", - "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (23.4.0)\n", - "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.4.1)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (3.19.6)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (11.4.1)\n", - "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.5.0)\n", - "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.22.4)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (8.0.0)\n", - "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.4.1)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.2.5)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (2.8.2)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.57.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (45.2.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (5.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (0.39.1)\n", - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core==0.9.0+125.ga0bcd30f) (2.2.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.0)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.2.0)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (8.1.3)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (0.12.0)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.7.0)\n", - "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.1)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.0.0)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (5.9.4)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.4.0)\n", - "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.26.13)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (3.1.2)\n", - "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.0.4)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (0.4.3)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (1.2.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.14.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (3.11.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core==0.9.0+125.ga0bcd30f) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.1.1)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (6.0.4)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (4.1.0)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (4.0.0)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (6.0.1)\n", - "Building wheels for collected packages: merlin-core\n", - " Building wheel for merlin-core (PEP 517): started\n", - " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-core: filename=merlin_core-0.9.0+125.ga0bcd30f-py3-none-any.whl size=161449 sha256=0c37c110ad7a9dc1a4721b776063e9d1571a000763322b27df7ea731ae78164e\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-yeu5mq1c/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", - "Successfully built merlin-core\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR: merlin-models 0.9.0+157.gd8133b8f has requirement merlin-core>=23.4.0, but you'll have merlin-core 0.9.0+125.ga0bcd30f which is incompatible.\n", - "ERROR: merlin-dataloader 23.4.0 has requirement merlin-core>=23.4.0, but you'll have merlin-core 0.9.0+125.ga0bcd30f which is incompatible.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Installing collected packages: merlin-core\n", - " Attempting uninstall: merlin-core\n", - " Found existing installation: merlin-core 23.4.0\n", - " Uninstalling merlin-core-23.4.0:\n", - " Successfully uninstalled merlin-core-23.4.0\n", - "Successfully installed merlin-core-0.9.0+125.ga0bcd30f\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Previous HEAD position was 020b24b7 Fix output error occurring due to check if it is a dict or not (#1742)\n", - "Switched to branch 'main'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Your branch is up to date with 'origin/main'.\n" + "Building wheels for collected packages: merlin-dataloader\n", + " Building wheel for merlin-dataloader (PEP 517): started\n", + " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-23.5.dev0+8.gd9e97b4-py3-none-any.whl size=34916 sha256=607302e63f936c0f5d381f67a9d388d72c5f1883fc7ba595863caee1d38277b3\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-p8vl5h52/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + "Successfully built merlin-dataloader\n", + "Installing collected packages: merlin-dataloader\n", + " Attempting uninstall: merlin-dataloader\n", + " Found existing installation: merlin-dataloader 23.2.0\n", + " Uninstalling merlin-dataloader-23.2.0:\n", + " Successfully uninstalled merlin-dataloader-23.2.0\n", + "Successfully installed merlin-dataloader-23.5.dev0+8.gd9e97b4\n" ] }, { @@ -764,119 +137,116 @@ "output_type": "stream", "text": [ "From https://github.com/NVIDIA-Merlin/NVTabular\n", - " * branch main -> FETCH_HEAD\n", - " c5bc4098..67136eba main -> origin/main\n" + " * [new branch] 1077-implement -> origin/1077-implement\n", + " * [new branch] 21.09/column-tagging -> origin/21.09/column-tagging\n", + " * [new branch] 21.09/dataset-collection -> origin/21.09/dataset-collection\n", + " * [new branch] 21.09/operator-block -> origin/21.09/operator-block\n", + " * [new branch] 21.09/schema -> origin/21.09/schema\n", + " * [new branch] add_sum_to_supported_aggregations -> origin/add_sum_to_supported_aggregations\n", + " * [new branch] aiobotocore_v2 -> origin/aiobotocore_v2\n", + " * [new branch] alexanderronquillo-patch-1 -> origin/alexanderronquillo-patch-1\n", + " * [new branch] automate_pypi -> origin/automate_pypi\n", + " * [new branch] bench-pynvml-fix -> origin/bench-pynvml-fix\n", + " * [new branch] branch-0.6 -> origin/branch-0.6\n", + " * [new branch] bschifferer-remove_examples_1 -> origin/bschifferer-remove_examples_1\n", + " * [new branch] categorify-inference-int16 -> origin/categorify-inference-int16\n", + " * [new branch] columns_with_aggs_in_names -> origin/columns_with_aggs_in_names\n", + " * [new branch] conda-package-python-versions -> origin/conda-package-python-versions\n", + " * [new branch] conda_gh_action -> origin/conda_gh_action\n", + " * [new branch] dataloader-remove-sparse -> origin/dataloader-remove-sparse\n", + " * [new branch] dataloader_doc_fix -> origin/dataloader_doc_fix\n", + " * [new branch] disable-package-build-on-pull-requests -> origin/disable-package-build-on-pull-requests\n", + " * [new branch] dont_install_tests -> origin/dont_install_tests\n", + " * [new branch] drop_low_cardinality -> origin/drop_low_cardinality\n", + " * [new branch] fix-docs-tox-env -> origin/fix-docs-tox-env\n", + " * [new branch] fix-wf-file -> origin/fix-wf-file\n", + " * [new branch] fix/inference-deprecation -> origin/fix/inference-deprecation\n", + " * [new branch] fix_data_path -> origin/fix_data_path\n", + " * [new branch] fix_hugectr_nb -> origin/fix_hugectr_nb\n", + " * [new branch] fix_nbs -> origin/fix_nbs\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] groupby_without_groupby_col_in_col_selector -> origin/groupby_without_groupby_col_in_col_selector\n", + " * [new branch] hugectr-newapi -> origin/hugectr-newapi\n", + " * [new branch] laiacano/check-list-from-schema -> origin/laiacano/check-list-from-schema\n", + " * [new branch] laiacano/workflow-subgraph -> origin/laiacano/workflow-subgraph\n", + " * [new branch] main -> origin/main\n", + " * [new branch] na_sentinel -> origin/na_sentinel\n", + " * [new branch] notebooks-21.10 -> origin/notebooks-21.10\n", + " * [new branch] nvt-1195 -> origin/nvt-1195\n", + " * [new branch] nvtabular_examples -> origin/nvtabular_examples\n", + " * [new branch] packages-workflow-split -> origin/packages-workflow-split\n", + " * [new branch] readme_updates -> origin/readme_updates\n", + " * [new branch] refactor/fit-schema -> origin/refactor/fit-schema\n", + " * [new branch] refactor/input-column-selection -> origin/refactor/input-column-selection\n", + " * [new branch] refactor/postpone-schema-binding -> origin/refactor/postpone-schema-binding\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-23.04 -> origin/release-23.04\n", + " * [new branch] remove_poetry -> origin/remove_poetry\n", + " * [new branch] remove_release_notes -> origin/remove_release_notes\n", + " * [new branch] repeat-ops -> origin/repeat-ops\n", + " * [new branch] rjzamora-simplify-criteo -> origin/rjzamora-simplify-criteo\n", + " * [new branch] rnyak-patch-1 -> origin/rnyak-patch-1\n", + " * [new branch] romeyn/input-api -> origin/romeyn/input-api\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] test-column-similarity-dataset-cpu-default-none -> origin/test-column-similarity-dataset-cpu-default-none\n", + " * [new branch] test-torch-dataloader-dataset-cpu-default-none -> origin/test-torch-dataloader-dataset-cpu-default-none\n", + " * [new branch] torch_catch -> origin/torch_catch\n", + " * [new branch] update-dask-reqs -> origin/update-dask-reqs\n", + " * [new branch] update_merlin_core -> origin/update_merlin_core\n", + " * [new branch] update_requirements -> origin/update_requirements\n", + " * [new branch] v0.10.0-docs -> origin/v0.10.0-docs\n", + " * [new branch] v0.11.0-docs -> origin/v0.11.0-docs\n", + " * [new branch] v0.7.1-docs -> origin/v0.7.1-docs\n", + " * [new branch] v0.8.0-docs -> origin/v0.8.0-docs\n", + " * [new branch] v0.9.0-docs -> origin/v0.9.0-docs\n", + " * [new branch] v1.0.0-docs -> origin/v1.0.0-docs\n", + " * [new tag] v0.6.1 -> v0.6.1\n", + " * [new tag] v1.6.0 -> v1.6.0\n", + " * [new tag] v1.7.0 -> v1.7.0\n", + " * [new tag] v1.8.1 -> v1.8.1\n", + " * [new tag] v23.04.00 -> v23.04.00\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.1.1 -> v0.1.1\n", + " * [new tag] v0.10.0 -> v0.10.0\n", + " * [new tag] v0.11.0 -> v0.11.0\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n", + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.5.1 -> v0.5.1\n", + " * [new tag] v0.5.2 -> v0.5.2\n", + " * [new tag] v0.5.3 -> v0.5.3\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v0.7.1 -> v0.7.1\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v1.0.0 -> v1.0.0\n", + " * [new tag] v1.1.0 -> v1.1.0\n", + " * [new tag] v1.1.1 -> v1.1.1\n", + " * [new tag] v1.2.0 -> v1.2.0\n", + " * [new tag] v1.2.1 -> v1.2.1\n", + " * [new tag] v1.2.2 -> v1.2.2\n", + " * [new tag] v1.3.0 -> v1.3.0\n", + " * [new tag] v1.3.1 -> v1.3.1\n", + " * [new tag] v1.3.2 -> v1.3.2\n", + " * [new tag] v1.3.3 -> v1.3.3\n", + " * [new tag] v1.4.0 -> v1.4.0\n", + " * [new tag] v1.5.0 -> v1.5.0\n", + " * [new tag] v1.8.0 -> v1.8.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Previous HEAD position was 371195ac Replace `nvtabular` inference back-end with `python` (#1771)\n", + "Switched to a new branch 'main'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Updating c5bc4098..67136eba\n", - "Fast-forward\n", - " .github/ISSUE_TEMPLATE/bug_report.md | 11 +-\n", - " .github/ISSUE_TEMPLATE/documentation-request.md | 3 +-\n", - " .github/ISSUE_TEMPLATE/feature_request.md | 3 +-\n", - " .github/ISSUE_TEMPLATE/operator_request.md | 14 +-\n", - " .github/ISSUE_TEMPLATE/research_question.md | 3 +-\n", - " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", - " .github/ISSUE_TEMPLATE/task.md | 4 +-\n", - " .github/release-drafter.yml | 44 ++--\n", - " .github/workflows/blossom-ci.yml | 230 ++++++++++-----------\n", - " .github/workflows/check-base-branch.yaml | 9 +\n", - " .github/workflows/conda-env-create.yml | 30 +--\n", - " .github/workflows/cpu-ci.yml | 138 -------------\n", - " .github/workflows/cpu-packages.yml | 179 ++++++++++++++++\n", - " .github/workflows/cpu-tests.yml | 75 +++++++\n", - " .github/workflows/docs-preview-pr.yaml | 4 +-\n", - " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", - " .github/workflows/gpu-ci.yml | 30 ---\n", - " .github/workflows/gpu-tests.yml | 34 +++\n", - " .github/workflows/lint.yaml | 4 +\n", - " .github/workflows/release-drafter.yaml | 2 +-\n", - " .github/workflows/set-stable-branch.yaml | 10 +\n", - " .gitlab-ci.yml | 23 +--\n", - " .pre-commit-config.yaml | 47 +++--\n", - " .prettierignore | 2 +\n", - " CHANGELOG.md | 187 ++++++++---------\n", - " CONTRIBUTING.md | 30 +--\n", - " README.md | 48 ++---\n", - " bench/datasets/tools/nvt_etl.py | 4 +-\n", - " bench/datasets/tools/train_tensorflow.py | 1 -\n", - " bench/examples/MultiGPUBench.md | 67 +++---\n", - " bench/examples/dask-nvtabular-criteo-benchmark.py | 4 +-\n", - " ci/pr.gpu.Jenkinsfile | 2 +-\n", - " conda/environments/nvtabular_aws_sagemaker.yml | 2 +-\n", - " conda/recipes/meta.yaml | 2 +-\n", - " cpp/nvtabular/inference/categorify.cc | 10 +\n", - " docs/README.md | 29 ++-\n", - " docs/source/core_features.md | 48 ++---\n", - " docs/source/resources/architecture.md | 17 +-\n", - " docs/source/resources/cloud_integration.md | 24 ++-\n", - " docs/source/resources/links.md | 40 ++--\n", - " docs/source/toc.yaml | 12 +-\n", - " examples/01-Getting-started.ipynb | 5 +-\n", - " examples/02-Advanced-NVTabular-workflow.ipynb | 5 +-\n", - " .../03-Running-on-multiple-GPUs-or-on-CPU.ipynb | 24 ++-\n", - " examples/README.md | 1 +\n", - " .../tensorflow/tfrecords_to_parquet.py | 9 +-\n", - " nvtabular/inference/__init__.py | 4 +-\n", - " nvtabular/inference/triton/data_conversions.py | 24 +--\n", - " nvtabular/inference/triton/ensemble.py | 86 ++------\n", - " nvtabular/inference/triton/model/model_pt.py | 1 -\n", - " nvtabular/inference/workflow/hugectr.py | 2 +-\n", - " nvtabular/loader/backend.py | 31 +--\n", - " nvtabular/loader/tensorflow.py | 1 +\n", - " nvtabular/ops/categorify.py | 4 +-\n", - " nvtabular/ops/column_similarity.py | 42 ++--\n", - " nvtabular/ops/groupby.py | 35 ++--\n", - " nvtabular/ops/join_external.py | 7 +-\n", - " nvtabular/ops/join_groupby.py | 18 +-\n", - " nvtabular/ops/list_slice.py | 22 +-\n", - " nvtabular/ops/moments.py | 2 -\n", - " nvtabular/ops/reduce_dtype_size.py | 9 +-\n", - " nvtabular/ops/target_encoding.py | 2 +-\n", - " nvtabular/ops/value_counts.py | 14 +-\n", - " nvtabular/tools/data_gen.py | 31 ++-\n", - " nvtabular/utils.py | 2 +-\n", - " nvtabular/workflow/workflow.py | 169 +++++++++++++--\n", - " requirements-test.txt | 2 -\n", - " requirements/base.txt | 4 +-\n", - " requirements/test.txt | 15 +-\n", - " setup.py | 5 +\n", - " tests/conftest.py | 33 ++-\n", - " .../test_02-Advanced-NVTabular-workflow.py | 17 +-\n", - " .../test_03-Running-on-multiple-GPUs-or-on-CPU.py | 11 +-\n", - " tests/unit/loader/test_tf_dataloader.py | 206 +++---------------\n", - " tests/unit/loader/test_torch_dataloader.py | 79 ++-----\n", - " tests/unit/ops/test_categorify.py | 36 +++-\n", - " tests/unit/ops/test_column_similarity.py | 3 +-\n", - " tests/unit/ops/test_drop_low_cardinality.py | 7 +-\n", - " tests/unit/ops/test_groupyby.py | 9 +-\n", - " tests/unit/ops/test_join.py | 11 +-\n", - " tests/unit/ops/test_lambda.py | 28 ++-\n", - " tests/unit/ops/test_ops.py | 12 +-\n", - " tests/unit/ops/test_ops_schema.py | 25 ++-\n", - " tests/unit/ops/test_reduce_dtype_size.py | 7 +-\n", - " tests/unit/ops/test_target_encode.py | 11 +-\n", - " tests/unit/ops/test_value_count.py | 2 +\n", - " tests/unit/test_dask_nvt.py | 5 +-\n", - " tests/unit/test_s3.py | 8 +-\n", - " tests/unit/test_tf4rec.py | 11 +-\n", - " tests/unit/test_triton_inference.py | 3 +-\n", - " tests/unit/workflow/test_cpu_workflow.py | 6 +-\n", - " tests/unit/workflow/test_workflow.py | 92 ++++++++-\n", - " tox.ini | 10 +-\n", - " 93 files changed, 1448 insertions(+), 1196 deletions(-)\n", - " create mode 100644 .github/workflows/check-base-branch.yaml\n", - " delete mode 100644 .github/workflows/cpu-ci.yml\n", - " create mode 100644 .github/workflows/cpu-packages.yml\n", - " create mode 100644 .github/workflows/cpu-tests.yml\n", - " delete mode 100644 .github/workflows/gpu-ci.yml\n", - " create mode 100644 .github/workflows/gpu-tests.yml\n", - " create mode 100644 .github/workflows/set-stable-branch.yaml\n", - " create mode 100644 .prettierignore\n", - " delete mode 100644 requirements-test.txt\n", + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", "Processing /nvtabular\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", @@ -884,513 +254,650 @@ " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+66.g67136eba) (1.9.3)\n", - "Processing /root/.cache/pip/wheels/42/ef/87/2c64bce8c3064a2c4e399933df4eda4838939355698ff8f7c7/merlin_core-23.4.0-py3-none-any.whl\n", - "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+66.g67136eba) (23.4.0)\n", - "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+66.g67136eba) (1.22.4)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.56.4)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (11.4.1)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.12.0)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.5)\n", - "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n", - "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.19.6)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.64.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (22.0)\n", - "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (23.4.0)\n", - "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.5.0)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.0.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.5)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (45.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.39.1)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (5.2.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.57.0)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.8.2)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.0)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.4.0)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.1.3)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", - "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.4)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.7.0)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.12.0)\n", - "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.26.13)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (5.9.4)\n", - "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", - "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.1)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.1.2)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.0)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.4.3)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.11.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.14.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.1.1)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0.4)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.0.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0.1)\n", "Building wheels for collected packages: nvtabular\n", " Building wheel for nvtabular (PEP 517): started\n", " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", - " Created wheel for nvtabular: filename=nvtabular-1.6.0+66.g67136eba-cp38-cp38-linux_x86_64.whl size=259850 sha256=b7b2ec970d1e905ffca54a11728068e88a5ef40dfcd582124e0d0d1c8ca7d590\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-kfeyyfk1/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + " Created wheel for nvtabular: filename=nvtabular-23.5.dev0+7.g67136eba-cp38-cp38-linux_x86_64.whl size=259872 sha256=a449abf03f9b7d8ea9a0810fb4e25776beaf1a96332e07547ec89c033cd34f7b\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-7f9__rd9/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", "Successfully built nvtabular\n", - "Installing collected packages: merlin-core, nvtabular\n", - " Attempting uninstall: merlin-core\n", - " Found existing installation: merlin-core 0.9.0+125.ga0bcd30f\n", - " Uninstalling merlin-core-0.9.0+125.ga0bcd30f:\n", - " Successfully uninstalled merlin-core-0.9.0+125.ga0bcd30f\n", + "Installing collected packages: nvtabular\n", " Attempting uninstall: nvtabular\n", - " Found existing installation: nvtabular 1.8.0\n", - " Uninstalling nvtabular-1.8.0:\n", - " Successfully uninstalled nvtabular-1.8.0\n", - "Successfully installed merlin-core-23.4.0 nvtabular-1.6.0+66.g67136eba\n" + " Found existing installation: nvtabular 23.2.0\n", + " Uninstalling nvtabular-23.2.0:\n", + " Successfully uninstalled nvtabular-23.2.0\n", + "Successfully installed nvtabular-23.5.dev0+7.g67136eba\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Previous HEAD position was feaf748 adding async tf strategy for gpu memory (#264)\n", - "Switched to branch 'main'\n" + "From https://github.com/NVIDIA-Merlin/Models\n", + " * [new branch] Mai -> origin/Mai\n", + " * [new branch] add_category_encoding_test -> origin/add_category_encoding_test\n", + " * [new branch] add_lightfm_and_explicit_training_example -> origin/add_lightfm_and_explicit_training_example\n", + " * [new branch] add_logo_tracking_to_07 -> origin/add_logo_tracking_to_07\n", + " * [new branch] add_notebooks_test -> origin/add_notebooks_test\n", + " * [new branch] advanced_example -> origin/advanced_example\n", + " * [new branch] asvdb_metric_tracking -> origin/asvdb_metric_tracking\n", + " * [new branch] batched-dataset/schema -> origin/batched-dataset/schema\n", + " * [new branch] benchmark-session-based -> origin/benchmark-session-based\n", + " * [new branch] block-context -> origin/block-context\n", + " * [new branch] blossom_report_skipped -> origin/blossom_report_skipped\n", + " * [new branch] break_ties -> origin/break_ties\n", + " * [new branch] bs_unittest_examples_v2 -> origin/bs_unittest_examples_v2\n", + " * [new branch] bschifferer-patch-1 -> origin/bschifferer-patch-1\n", + " * [new branch] change_two_tower_api_test -> origin/change_two_tower_api_test\n", + " * [new branch] ci/backend-tests -> origin/ci/backend-tests\n", + " * [new branch] ci/example-linting -> origin/ci/example-linting\n", + " * [new branch] ci/horovod -> origin/ci/horovod\n", + " * [new branch] cicd -> origin/cicd\n", + " * [new branch] codespell_fix -> origin/codespell_fix\n", + " * [new branch] compare_ranking_models -> origin/compare_ranking_models\n", + " * [new branch] conda_recipe -> origin/conda_recipe\n", + " * [new branch] consolidate-abstractions -> origin/consolidate-abstractions\n", + " * [new branch] dataloader_tag_fix -> origin/dataloader_tag_fix\n", + " * [new branch] dcn_tests -> origin/dcn_tests\n", + " * [new branch] deps/merlin-core-commit -> origin/deps/merlin-core-commit\n", + " * [new branch] docs-strings -> origin/docs-strings\n", + " * [new branch] docs/interrogate-cfg -> origin/docs/interrogate-cfg\n", + " * [new branch] docs/interrogate-config -> origin/docs/interrogate-config\n", + " * [new branch] emb_export_fix -> origin/emb_export_fix\n", + " * [new branch] evaluate_fixes -> origin/evaluate_fixes\n", + " * [new branch] examples/unit-tests -> origin/examples/unit-tests\n", + " * [new branch] examples/update_link -> origin/examples/update_link\n", + " * [new branch] examples_fixes -> origin/examples_fixes\n", + " * [new branch] fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", + " * [new branch] feature/multi-hot-columns -> origin/feature/multi-hot-columns\n", + " * [new branch] feature/retrieval-dnn -> origin/feature/retrieval-dnn\n", + " * [new branch] fix-contrastive-predictions -> origin/fix-contrastive-predictions\n", + " * [new branch] fix/aliccp_workflow -> origin/fix/aliccp_workflow\n", + " * [new branch] fix/batch_predict -> origin/fix/batch_predict\n", + " * [new branch] fix/example-tests -> origin/fix/example-tests\n", + " * [new branch] fix/python-version -> origin/fix/python-version\n", + " * [new branch] fix/shared_embeddings -> origin/fix/shared_embeddings\n", + " * [new branch] fix_aliccp_schema -> origin/fix_aliccp_schema\n", + " * [new branch] fix_cated_ohe -> origin/fix_cated_ohe\n", + " * [new branch] fix_datetime_issue_add_inference_on_TIS -> origin/fix_datetime_issue_add_inference_on_TIS\n", + " * [new branch] fix_lightfm_evaluate -> origin/fix_lightfm_evaluate\n", + " * [new branch] fix_masking -> origin/fix_masking\n", + " * [new branch] fix_mtl_metrics -> origin/fix_mtl_metrics\n", + " * [new branch] fix_notebooks -> origin/fix_notebooks\n", + " * [new branch] fix_regression -> origin/fix_regression\n", + " * [new branch] fix_retrieval -> origin/fix_retrieval\n", + " * [new branch] fix_retrieval_eval_loss -> origin/fix_retrieval_eval_loss\n", + " * [new branch] fix_sampled_softmax_evaluation -> origin/fix_sampled_softmax_evaluation\n", + " * [new branch] fix_test_07 -> origin/fix_test_07\n", + " * [new branch] getting_started_exp -> origin/getting_started_exp\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] hashed_cross_test -> origin/hashed_cross_test\n", + " * [new branch] implement_review_comments -> origin/implement_review_comments\n", + " * [new branch] in-bath-sampling-bug -> origin/in-bath-sampling-bug\n", + " * [new branch] infer_embeddings -> origin/infer_embeddings\n", + " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", + " * [new branch] laiacano/concurrency -> origin/laiacano/concurrency\n", + " * [new branch] laiacano/tox -> origin/laiacano/tox\n", + " * [new branch] layer_freezing_test -> origin/layer_freezing_test\n", + " * [new branch] load_retrieval_model -> origin/load_retrieval_model\n", + " * [new branch] logit_correction_nol2_temp -> origin/logit_correction_nol2_temp\n", + " * [new branch] losses -> origin/losses\n", + " * [new branch] main -> origin/main\n", + " * [new branch] masking_transforms -> origin/masking_transforms\n", + " * [new branch] merlin-standard-lib -> origin/merlin-standard-lib\n", + " * [new branch] metrics_opt -> origin/metrics_opt\n", + " * [new branch] metrics_opt2 -> origin/metrics_opt2\n", + " * [new branch] mikemckiernan-patch-1 -> origin/mikemckiernan-patch-1\n", + " * [new branch] mlm -> origin/mlm\n", + " * [new branch] mlm_alt -> origin/mlm_alt\n", + " * [new branch] mlp_selu -> origin/mlp_selu\n", + " * [new branch] mrr_fix -> origin/mrr_fix\n", + " * [new branch] mtl_example -> origin/mtl_example\n", + " * [new branch] mtl_loss -> origin/mtl_loss\n", + " * [new branch] mtl_models -> origin/mtl_models\n", + " * [new branch] mtl_regularization -> origin/mtl_regularization\n", + " * [new branch] multi_optimizer_example -> origin/multi_optimizer_example\n", + " * [new branch] neg_sampling -> origin/neg_sampling\n", + " * [new branch] poc -> origin/poc\n", + " * [new branch] pretrained_init -> origin/pretrained_init\n", + " * [new branch] radekosmulski-patch-2 -> origin/radekosmulski-patch-2\n", + " * [new branch] ragged_embeddings -> origin/ragged_embeddings\n", + " * [new branch] ranking_models_inputs -> origin/ranking_models_inputs\n", + " * [new branch] ranking_tests -> origin/ranking_tests\n", + " * [new branch] ranking_tests3 -> origin/ranking_tests3\n", + " * [new branch] readme_bash -> origin/readme_bash\n", + " * [new branch] refactor-docs-reqs -> origin/refactor-docs-reqs\n", + " * [new branch] refactor/docs-reqs -> origin/refactor/docs-reqs\n", + " * [new branch] refactor/embedding-layers -> origin/refactor/embedding-layers\n", + " * [new branch] refactor/youtube-retrieval -> origin/refactor/youtube-retrieval\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-23.04 -> origin/release-23.04\n", + " * [new branch] remove/masking -> origin/remove/masking\n", + " * [new branch] reset-metrics -> origin/reset-metrics\n", + " * [new branch] retrieval-sample-weights -> origin/retrieval-sample-weights\n", + " * [new branch] retrieval_debug -> origin/retrieval_debug\n", + " * [new branch] retrieval_debug_no_l2norm -> origin/retrieval_debug_no_l2norm\n", + " * [new branch] retrieval_debug_scores_temp -> origin/retrieval_debug_scores_temp\n", + " * [new branch] retrieval_eval_fix -> origin/retrieval_eval_fix\n", + " * [new branch] retrieval_fixes -> origin/retrieval_fixes\n", + " * [new branch] retrieval_fixes_2 -> origin/retrieval_fixes_2\n" ] }, { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Your branch is up to date with 'origin/main'.\n" + " * [new branch] retrieval_integration_tests -> origin/retrieval_integration_tests\n", + " * [new branch] revert-813-laiacano/tox-and-tmpdir -> origin/revert-813-laiacano/tox-and-tmpdir\n", + " * [new branch] romeyn/block-api -> origin/romeyn/block-api\n", + " * [new branch] romeyn/block-cleanup -> origin/romeyn/block-cleanup\n", + " * [new branch] romeyn/inputs -> origin/romeyn/inputs\n", + " * [new branch] sampling -> origin/sampling\n", + " * [new branch] select-by-tag -> origin/select-by-tag\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] t4rec_use_case -> origin/t4rec_use_case\n", + " * [new branch] tf/add-bokeh-to-dev -> origin/tf/add-bokeh-to-dev\n", + " * [new branch] tf/base-model-test-graph-mode -> origin/tf/base-model-test-graph-mode\n", + " * [new branch] tf/batch_predict_fix -> origin/tf/batch_predict_fix\n", + " * [new branch] tf/categorical-prediction -> origin/tf/categorical-prediction\n", + " * [new branch] tf/categorical-prediction-2 -> origin/tf/categorical-prediction-2\n", + " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", + " * [new branch] tf/combinators-base -> origin/tf/combinators-base\n", + " * [new branch] tf/cond -> origin/tf/cond\n", + " * [new branch] tf/context-tensor -> origin/tf/context-tensor\n", + " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", + " * [new branch] tf/contrastive-prediction -> origin/tf/contrastive-prediction\n", + " * [new branch] tf/core -> origin/tf/core\n", + " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", + " * [new branch] tf/dep-prediction-tasks -> origin/tf/dep-prediction-tasks\n", + " * [new branch] tf/dlrm_dropout_fix -> origin/tf/dlrm_dropout_fix\n", + " * [new branch] tf/dynamic-memory-growth -> origin/tf/dynamic-memory-growth\n", + " * [new branch] tf/embedding-tables -> origin/tf/embedding-tables\n", + " * [new branch] tf/embeddings_regularization -> origin/tf/embeddings_regularization\n", + " * [new branch] tf/evaluate_retrieval -> origin/tf/evaluate_retrieval\n", + " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", + " * [new branch] tf/fix_logq_correction -> origin/tf/fix_logq_correction\n", + " * [new branch] tf/fix_mlm_test -> origin/tf/fix_mlm_test\n", + " * [new branch] tf/fix_tag_item_id -> origin/tf/fix_tag_item_id\n", + " * [new branch] tf/fix_tests_shared_state -> origin/tf/fix_tests_shared_state\n", + " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", + " * [new branch] tf/input-block -> origin/tf/input-block\n", + " * [new branch] tf/input-block-filter -> origin/tf/input-block-filter\n", + " * [new branch] tf/inputs-concat -> origin/tf/inputs-concat\n", + " * [new branch] tf/keras-embedding -> origin/tf/keras-embedding\n", + " * [new branch] tf/logit_correction -> origin/tf/logit_correction\n", + " * [new branch] tf/loglossmetric_callbacks -> origin/tf/loglossmetric_callbacks\n", + " * [new branch] tf/logq_correction -> origin/tf/logq_correction\n", + " * [new branch] tf/loss_batch_metric -> origin/tf/loss_batch_metric\n", + " * [new branch] tf/map-values -> origin/tf/map-values\n", + " * [new branch] tf/masking_block -> origin/tf/masking_block\n", + " * [new branch] tf/mf-retrieval-model -> origin/tf/mf-retrieval-model\n", + " * [new branch] tf/mlm-schema -> origin/tf/mlm-schema\n", + " * [new branch] tf/model-tests -> origin/tf/model-tests\n", + " * [new branch] tf/model/sequential -> origin/tf/model/sequential\n", + " * [new branch] tf/move-core -> origin/tf/move-core\n", + " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", + " * [new branch] tf/multi_task_improv -> origin/tf/multi_task_improv\n", + " * [new branch] tf/ncf_model -> origin/tf/ncf_model\n", + " * [new branch] tf/output-block -> origin/tf/output-block\n", + " * [new branch] tf/pop_metrics -> origin/tf/pop_metrics\n", + " * [new branch] tf/prediction -> origin/tf/prediction\n", + " * [new branch] tf/prediction-block -> origin/tf/prediction-block\n", + " * [new branch] tf/pretrained_emb -> origin/tf/pretrained_emb\n", + " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", + " * [new branch] tf/pruning-parallel-block -> origin/tf/pruning-parallel-block\n", + " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", + " * [new branch] tf/ragged-tensors -> origin/tf/ragged-tensors\n", + " * [new branch] tf/ranking_metrics_sort -> origin/tf/ranking_metrics_sort\n", + " * [new branch] tf/refactor -> origin/tf/refactor\n", + " * [new branch] tf/retireval_eval -> origin/tf/retireval_eval\n", + " * [new branch] tf/retrieval-eval -> origin/tf/retrieval-eval\n", + " * [new branch] tf/retrieval-model-v2 -> origin/tf/retrieval-model-v2\n", + " * [new branch] tf/retrieval-models -> origin/tf/retrieval-models\n", + " * [new branch] tf/sampling/items -> origin/tf/sampling/items\n", + " * [new branch] tf/save-regularizer -> origin/tf/save-regularizer\n", + " * [new branch] tf/target-propagation -> origin/tf/target-propagation\n", + " * [new branch] tf/targets -> origin/tf/targets\n", + " * [new branch] tf/tf-cont-list -> origin/tf/tf-cont-list\n", + " * [new branch] tf/topk_recommender -> origin/tf/topk_recommender\n", + " * [new branch] tf/tower-save -> origin/tf/tower-save\n", + " * [new branch] tf/train_metrics_steps_fix -> origin/tf/train_metrics_steps_fix\n", + " * [new branch] tf/transformer-api -> origin/tf/transformer-api\n", + " * [new branch] tf/transformer-block -> origin/tf/transformer-block\n", + " * [new branch] tf/transformer_block -> origin/tf/transformer_block\n", + " * [new branch] tf/wide_and_deep -> origin/tf/wide_and_deep\n", + " * [new branch] tf/wrap-as-model -> origin/tf/wrap-as-model\n", + " * [new branch] tf/xlnet-bug -> origin/tf/xlnet-bug\n", + " * [new branch] torch/clean-up -> origin/torch/clean-up\n", + " * [new branch] torch/dev -> origin/torch/dev\n", + " * [new branch] torch/masking -> origin/torch/masking\n", + " * [new branch] torch/prototype -> origin/torch/prototype\n", + " * [new branch] torch/remove-t4r-code -> origin/torch/remove-t4r-code\n", + " * [new branch] tox_github_actions_fix -> origin/tox_github_actions_fix\n", + " * [new branch] transformer-api -> origin/transformer-api\n", + " * [new branch] two_tower_fixes -> origin/two_tower_fixes\n", + " * [new branch] update_07 -> origin/update_07\n", + " * [new branch] update_advanced_notebook -> origin/update_advanced_notebook\n", + " * [new branch] update_example_01 -> origin/update_example_01\n", + " * [new branch] update_examples_with_tracking_logo -> origin/update_examples_with_tracking_logo\n", + " * [new branch] v0.2.0-docs -> origin/v0.2.0-docs\n", + " * [new branch] v0.3.0-docs -> origin/v0.3.0-docs\n", + " * [new branch] validation_data_fix -> origin/validation_data_fix\n", + " * [new branch] validation_data_fix2 -> origin/validation_data_fix2\n", + " * [new branch] wide_deep_example_test -> origin/wide_deep_example_test\n", + " * [new branch] wideanddeep_example -> origin/wideanddeep_example\n", + " * [new branch] xgboost/predict-without-target -> origin/xgboost/predict-without-target\n", + " * [new branch] youtube_dnn_retrieval -> origin/youtube_dnn_retrieval\n", + " * [new branch] youtubednn_improv -> origin/youtubednn_improv\n", + " * [new branch] youtubednn_logq -> origin/youtubednn_logq\n", + " * [new tag] v0.10.0 -> v0.10.0\n", + " * [new tag] v0.11.0 -> v0.11.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v23.04.00 -> v23.04.00\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "From https://github.com/NVIDIA-Merlin/systems\n", - " * branch main -> FETCH_HEAD\n", - " 20bb231..2b1b90b main -> origin/main\n" + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Previous HEAD position was ee03bb1c Increase tolerance in retrieval transformer test and random seed (#1007)\n", + "Switched to a new branch 'main'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Updating 20bb231..2b1b90b\n", - "Fast-forward\n", - " .github/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", - " .github/ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", - " .github/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", - " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", - " .github/ISSUE_TEMPLATE/task.md | 5 +-\n", - " .github/release-drafter.yml | 44 +-\n", - " .github/workflows/check-base-branch.yaml | 9 +\n", - " .github/workflows/cpu-ci.yml | 128 ++--\n", - " .github/workflows/docs-preview-pr.yaml | 6 +-\n", - " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", - " .github/workflows/gpu-ci.yml | 40 +-\n", - " .github/workflows/lint.yaml | 18 +-\n", - " .github/workflows/packages.yaml | 118 ++++\n", - " .github/workflows/postmerge-cpu.yml | 60 ++\n", - " .github/workflows/postmerge-gpu.yml | 27 +\n", - " .github/workflows/release-drafter.yml | 4 +-\n", - " .github/workflows/set-stable-branch.yaml | 10 +\n", - " .pre-commit-config.yaml | 71 +-\n", - " .prettierignore | 2 +\n", - " CLA.md | 9 +-\n", - " CONTRIBUTING.md | 2 +-\n", - " README.md | 2 +-\n", - " ci/pr.gpu.Jenkinsfile | 2 +-\n", - " conda/recipes/meta.yaml | 18 +-\n", - " docs/README.md | 53 +-\n", - " ...ing-An-Implicit-Model-With-Merlin-Systems.ipynb | 5 +-\n", - " ...ving-An-XGboost-Model-With-Merlin-Systems.ipynb | 5 +-\n", - " ...erving-Ranking-Models-With-Merlin-Systems.ipynb | 5 +-\n", - " merlin/systems/dag/__init__.py | 2 -\n", - " merlin/systems/dag/dictarray.py | 345 ----------\n", - " merlin/systems/dag/ensemble.py | 2 +-\n", - " merlin/systems/dag/node.py | 29 +-\n", - " merlin/systems/dag/op_runner.py | 68 --\n", - " merlin/systems/dag/ops/__init__.py | 22 +-\n", - " merlin/systems/dag/ops/faiss.py | 116 +---\n", - " merlin/systems/dag/ops/feast.py | 110 +---\n", - " merlin/systems/dag/ops/fil.py | 74 +--\n", - " merlin/systems/dag/ops/implicit.py | 84 +--\n", - " merlin/systems/dag/ops/operator.py | 216 +-----\n", - " merlin/systems/dag/ops/pytorch.py | 23 +-\n", - " merlin/systems/dag/ops/session_filter.py | 72 +-\n", - " merlin/systems/dag/ops/softmax_sampling.py | 61 +-\n", - " merlin/systems/dag/ops/tensorflow.py | 143 ++--\n", - " merlin/systems/dag/ops/unroll_features.py | 36 +-\n", - " merlin/systems/dag/ops/workflow.py | 29 +-\n", - " merlin/systems/dag/runtimes/triton/ops/fil.py | 51 +-\n", - " merlin/systems/dag/runtimes/triton/ops/operator.py | 84 ++-\n", - " merlin/systems/dag/runtimes/triton/ops/pytorch.py | 27 +-\n", - " .../systems/dag/runtimes/triton/ops/tensorflow.py | 41 +-\n", - " merlin/systems/dag/runtimes/triton/ops/workflow.py | 132 +++-\n", - " merlin/systems/dag/runtimes/triton/runtime.py | 36 +-\n", - " merlin/systems/triton/__init__.py | 118 ++--\n", - " merlin/systems/triton/conversions.py | 198 ++++--\n", - " merlin/systems/triton/export.py | 731 +--------------------\n", - " merlin/systems/triton/models/executor_model.py | 46 +-\n", - " merlin/systems/triton/models/oprunner_model.py | 129 ----\n", - " merlin/systems/triton/models/pytorch_model.py | 139 ++--\n", - " merlin/systems/triton/models/workflow_model.py | 56 +-\n", - " merlin/systems/triton/utils.py | 58 +-\n", - " merlin/systems/workflow/base.py | 30 +-\n", - " merlin/systems/workflow/hugectr.py | 87 ---\n", - " merlin/systems/workflow/pytorch.py | 46 --\n", - " merlin/systems/workflow/tensorflow.py | 68 --\n", - " pytest.ini | 7 +-\n", - " requirements/test.txt | 2 +-\n", - " tests/conftest.py | 36 +-\n", - " ...erving_an_implicit_model_with_merlin_systems.py | 12 +-\n", - " ...serving_an_xgboost_model_with_merlin_systems.py | 4 +-\n", - " tests/integration/tf/test_transformer_model.py | 103 +++\n", - " .../systems/dag/test_column.py => test_passing.py} | 15 +-\n", - " tests/unit/systems/dag/ops/test_ops.py | 101 ++-\n", - " .../dag/runtimes/local/ops/fil/test_lightgbm.py | 15 +-\n", - " .../dag/runtimes/local/ops/fil/test_sklearn.py | 15 +-\n", - " .../dag/runtimes/local/ops/fil/test_xgboost.py | 18 +-\n", - " .../runtimes/local/ops/nvtabular/test_ensemble.py | 10 +-\n", - " .../runtimes/local/ops/tensorflow/test_ensemble.py | 35 +-\n", - " .../dag/runtimes/local/ops/torch/test_op.py | 6 +-\n", - " .../triton/ops/fil/test_lightgbm_triton.py | 11 +-\n", - " .../runtimes/triton/ops/fil/test_sklearn_triton.py | 4 +-\n", - " .../runtimes/triton/ops/fil/test_xgboost_triton.py | 7 +-\n", - " .../dag/runtimes/triton/ops/torch/test_op.py | 4 +-\n", - " .../runtimes/triton/ops/workflow/test_ensemble.py | 305 ++++++++-\n", - " .../systems/dag/runtimes/triton/test_triton.py | 21 +-\n", - " tests/unit/systems/dag/test_dict_array.py | 76 ---\n", - " tests/unit/systems/dag/test_ensemble.py | 4 +-\n", - " tests/unit/systems/dag/test_executors.py | 12 +-\n", - " tests/unit/systems/dag/test_op_runner.py | 210 ------\n", - " tests/unit/systems/ops/embedding_op.py | 56 ++\n", - " tests/unit/systems/ops/faiss/test_executor.py | 25 +-\n", - " tests/unit/systems/ops/feast/test_op.py | 76 +--\n", - " tests/unit/systems/ops/fil/test_ensemble.py | 21 +-\n", - " tests/unit/systems/ops/fil/test_forest.py | 47 +-\n", - " tests/unit/systems/ops/fil/test_op.py | 106 ++-\n", - " tests/unit/systems/ops/implicit/test_executor.py | 4 +-\n", - " tests/unit/systems/ops/implicit/test_op.py | 51 +-\n", - " tests/unit/systems/ops/padding_op.py | 62 ++\n", - " tests/unit/systems/ops/tf/test_ensemble.py | 15 +-\n", - " tests/unit/systems/ops/tf/test_op.py | 6 +-\n", - " tests/unit/systems/ops/torch/test_ensemble.py | 97 +++\n", - " tests/unit/systems/utils/ops.py | 13 +-\n", - " tests/unit/systems/utils/tf.py | 65 +-\n", - " tests/unit/test_export.py | 77 ---\n", - " tox.ini | 42 +-\n", - " 103 files changed, 2427 insertions(+), 3565 deletions(-)\n", - " create mode 100644 .github/workflows/check-base-branch.yaml\n", - " create mode 100644 .github/workflows/packages.yaml\n", - " create mode 100644 .github/workflows/postmerge-cpu.yml\n", - " create mode 100644 .github/workflows/postmerge-gpu.yml\n", - " create mode 100644 .github/workflows/set-stable-branch.yaml\n", - " create mode 100644 .prettierignore\n", - " delete mode 100644 merlin/systems/dag/dictarray.py\n", - " delete mode 100644 merlin/systems/dag/op_runner.py\n", - " delete mode 100644 merlin/systems/triton/models/oprunner_model.py\n", - " delete mode 100644 merlin/systems/workflow/hugectr.py\n", - " delete mode 100644 merlin/systems/workflow/pytorch.py\n", - " delete mode 100644 merlin/systems/workflow/tensorflow.py\n", - " create mode 100644 tests/integration/tf/test_transformer_model.py\n", - " rename tests/{unit/systems/dag/test_column.py => test_passing.py} (66%)\n", - " delete mode 100644 tests/unit/systems/dag/test_dict_array.py\n", - " delete mode 100644 tests/unit/systems/dag/test_op_runner.py\n", - " create mode 100644 tests/unit/systems/ops/embedding_op.py\n", - " create mode 100644 tests/unit/systems/ops/padding_op.py\n", - " create mode 100644 tests/unit/systems/ops/torch/test_ensemble.py\n", - " delete mode 100644 tests/unit/test_export.py\n", - "Processing /systems\n", + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /models\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", " Getting requirements to build wheel: started\n", " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", - "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n" + "Building wheels for collected packages: merlin-models\n", + " Building wheel for merlin-models (PEP 517): started\n", + " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-models: filename=merlin_models-23.5.dev0+12.gd8133b8f-py3-none-any.whl size=343289 sha256=ae06460f4a2d29eb360acf9a1f0c15d732fc12d4d825c87c293f82719ceda62c\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-zb4niy12/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + "Successfully built merlin-models\n", + "Installing collected packages: merlin-models\n", + " Attempting uninstall: merlin-models\n", + " Found existing installation: merlin-models 23.2.0\n", + " Uninstalling merlin-models-23.2.0:\n", + " Successfully uninstalled merlin-models-23.2.0\n", + "Successfully installed merlin-models-23.5.dev0+12.gd8133b8f\n" ] }, { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (1.6.0+66.g67136eba)\n", - "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", - "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.28.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (22.0)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (11.4.1)\n", - "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.4.1)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.0.0)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.64.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.56.4)\n", - "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.22.4)\n", - "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.4.1)\n", - "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.12.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.5)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.5)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.19.6)\n", - "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.5.0)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+98.g2b1b90b) (1.9.3)\n", - "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2.8)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (1.26.13)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2019.11.28)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.1.3)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.12.0)\n", - "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (45.2.0)\n", - "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.4)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.9.4)\n", - "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.7.0)\n", - "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.1)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.1.2)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.0)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.4.3)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2022.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.8.2)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata>=4.13.0->dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.11.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.4)\n" + "From https://github.com/NVIDIA-Merlin/systems\n", + " * [new branch] add_xgboost_serving_example -> origin/add_xgboost_serving_example\n", + " * [new branch] bschifferer-patch-1 -> origin/bschifferer-patch-1\n", + " * [new branch] bschifferer-patch-2 -> origin/bschifferer-patch-2\n", + " * [new branch] ci/cpu-action -> origin/ci/cpu-action\n", + " * [new branch] dataset-cpu-default-None -> origin/dataset-cpu-default-None\n", + " * [new branch] docs-nightly-build -> origin/docs-nightly-build\n", + " * [new branch] docs-remove-deps -> origin/docs-remove-deps\n", + " * [new branch] docs-tox -> origin/docs-tox\n", + " * [new branch] docs/contributing -> origin/docs/contributing\n", + " * [new branch] docs/coverage-threshold -> origin/docs/coverage-threshold\n", + " * [new branch] docs/docstring-coverage -> origin/docs/docstring-coverage\n", + " * [new branch] docs/interrogate-cfg -> origin/docs/interrogate-cfg\n", + " * [new branch] docs/interrogate-config -> origin/docs/interrogate-config\n", + " * [new branch] docs/issue-templates -> origin/docs/issue-templates\n", + " * [new branch] docs/readme -> origin/docs/readme\n", + " * [new branch] feast-errors -> origin/feast-errors\n", + " * [new branch] feature/pytorch -> origin/feature/pytorch\n", + " * [new branch] feature/t4r-serving -> origin/feature/t4r-serving\n", + " * [new branch] feature/torchscript -> origin/feature/torchscript\n", + " * [new branch] fix/dask-dist-deps -> origin/fix/dask-dist-deps\n", + " * [new branch] fix/faiss-types -> origin/fix/faiss-types\n", + " * [new branch] fix/multi-hot-dtypes -> origin/fix/multi-hot-dtypes\n", + " * [new branch] fix/multihot-schemas -> origin/fix/multihot-schemas\n", + " * [new branch] fix/pkg-build-lib -> origin/fix/pkg-build-lib\n", + " * [new branch] fix/pytest-feast -> origin/fix/pytest-feast\n", + " * [new branch] fix/skipped-tests -> origin/fix/skipped-tests\n", + " * [new branch] fix/tf-input-shapes -> origin/fix/tf-input-shapes\n", + " * [new branch] fix/torch-importorskip -> origin/fix/torch-importorskip\n", + " * [new branch] fix_model_outputnames -> origin/fix_model_outputnames\n", + " * [new branch] fix_nb -> origin/fix_nb\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] laiacano/slack-notify -> origin/laiacano/slack-notify\n", + " * [new branch] laiacano/transformer-import -> origin/laiacano/transformer-import\n", + " * [new branch] laiacano/upgrade-feast -> origin/laiacano/upgrade-feast\n", + " * [new branch] main -> origin/main\n", + " * [new branch] merlin_models_xgboost -> origin/merlin_models_xgboost\n", + " * [new branch] migration/from-nvt -> origin/migration/from-nvt\n", + " * [new branch] polish/remove-dtype-matching -> origin/polish/remove-dtype-matching\n", + " * [new branch] radekosmulski-patch-1 -> origin/radekosmulski-patch-1\n", + " * [new branch] radekosmulski-patch-1-1 -> origin/radekosmulski-patch-1-1\n", + " * [new branch] refactor/dtypes -> origin/refactor/dtypes\n", + " * [new branch] refactor/organize-tests -> origin/refactor/organize-tests\n", + " * [new branch] refactor/schema-validation-hook -> origin/refactor/schema-validation-hook\n", + " * [new branch] refactor/virtual-dataframe -> origin/refactor/virtual-dataframe\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-23.04 -> origin/release-23.04\n", + " * [new branch] run_triton_utils -> origin/run_triton_utils\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] update-reqs -> origin/update-reqs\n", + " * [new branch] update/precommit-hooks -> origin/update/precommit-hooks\n", + " * [new branch] use_dataloader -> origin/use_dataloader\n", + " * [new branch] v0.0.1-docs -> origin/v0.0.1-docs\n", + " * [new branch] v0.1.0-docs -> origin/v0.1.0-docs\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v23.04.00 -> v23.04.00\n", + " * [new tag] v0.0.1 -> v0.0.1\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n", + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Previous HEAD position was 15c6543 Return the original predicted scores from `SoftmaxSampling` (#290)\n", + "Switched to a new branch 'main'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.1.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.14.0)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.0.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.1)\n", + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", "Building wheels for collected packages: merlin-systems\n", " Building wheel for merlin-systems (PEP 517): started\n", " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+98.g2b1b90b-py3-none-any.whl size=83152 sha256=929338ae18fc3ba7e4b48667542c61c8468ba170761cc9e43b7060d9fb636b0a\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-gwpk5ek7/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + " Created wheel for merlin-systems: filename=merlin_systems-23.5.dev0+8.g2b1b90b-py3-none-any.whl size=83188 sha256=1e789fb001120de3654e579f37f3f532fd595058f1c575e3aee285c57468a052\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-dso9c6dl/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", "Successfully built merlin-systems\n", "Installing collected packages: merlin-systems\n", " Attempting uninstall: merlin-systems\n", - " Found existing installation: merlin-systems 0.9.0\n", - " Uninstalling merlin-systems-0.9.0:\n", - " Successfully uninstalled merlin-systems-0.9.0\n", - "Successfully installed merlin-systems-0.7.0+98.g2b1b90b\n" + " Found existing installation: merlin-systems 23.2.0\n", + " Uninstalling merlin-systems-23.2.0:\n", + " Successfully uninstalled merlin-systems-23.2.0\n", + "Successfully installed merlin-systems-23.5.dev0+8.g2b1b90b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Previous HEAD position was fd5d3fc Use tf.function for list column operations (#89)\n", - "Switched to branch 'main'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Your branch is up to date with 'origin/main'.\n" + "From https://github.com/NVIDIA-Merlin/Transformers4Rec\n", + " * [new branch] DDP_fix -> origin/DDP_fix\n", + " * [new branch] HF-update -> origin/HF-update\n", + " * [new branch] add_benchmarking_scripts -> origin/add_benchmarking_scripts\n", + " * [new branch] add_topk_layer -> origin/add_topk_layer\n", + " * [new branch] albert17-check -> origin/albert17-check\n", + " * [new branch] batches -> origin/batches\n", + " * [new branch] benfred/datasetschema -> origin/benfred/datasetschema\n", + " * [new branch] clean_rnn_block -> origin/clean_rnn_block\n", + " * [new branch] core-schema/deprecation-warning -> origin/core-schema/deprecation-warning\n", + " * [new branch] core-schema/tabular-features -> origin/core-schema/tabular-features\n", + " * [new branch] core-schema/trainer -> origin/core-schema/trainer\n", + " * [new branch] dataloader -> origin/dataloader\n", + " * [new branch] dataparallel_fix -> origin/dataparallel_fix\n", + " * [new branch] doc/supported_transformers -> origin/doc/supported_transformers\n", + " * [new branch] doc_fix -> origin/doc_fix\n", + " * [new branch] docs -> origin/docs\n", + " * [new branch] etl-nvt -> origin/etl-nvt\n", + " * [new branch] examples -> origin/examples\n", + " * [new branch] fix-data-repartition -> origin/fix-data-repartition\n", + " * [new branch] fix-failing-ci -> origin/fix-failing-ci\n", + " * [new branch] fix-inference -> origin/fix-inference\n", + " * [new branch] fix/transformers_config -> origin/fix/transformers_config\n", + " * [new branch] fix_gettingstarted_nb -> origin/fix_gettingstarted_nb\n", + " * [new branch] fix_inference -> origin/fix_inference\n", + " * [new branch] fix_nbs -> origin/fix_nbs\n", + " * [new branch] fix_oom_tests -> origin/fix_oom_tests\n", + " * [new branch] fix_req_paper_repro -> origin/fix_req_paper_repro\n", + " * [new branch] fix_stochastic -> origin/fix_stochastic\n", + " * [new branch] fix_unit_test -> origin/fix_unit_test\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] github-templates -> origin/github-templates\n", + " * [new branch] ignore-masking -> origin/ignore-masking\n", + " * [new branch] laiacano/merlin-core-schema -> origin/laiacano/merlin-core-schema\n", + " * [new branch] laiacano/skip-ci-on-closed-pr -> origin/laiacano/skip-ci-on-closed-pr\n", + " * [new branch] license -> origin/license\n", + " * [new branch] main -> origin/main\n", + " * [new branch] masking_quick_fix -> origin/masking_quick_fix\n", + " * [new branch] metric-names-prefix -> origin/metric-names-prefix\n", + " * [new branch] model_save_load -> origin/model_save_load\n", + " * [new branch] multi_gpu_doc -> origin/multi_gpu_doc\n", + " * [new branch] multi_gpu_doc_fix -> origin/multi_gpu_doc_fix\n", + " * [new branch] post_fusion_context -> origin/post_fusion_context\n", + " * [new branch] pretrained_embeddings_init -> origin/pretrained_embeddings_init\n", + " * [new branch] pretrained_module -> origin/pretrained_module\n", + " * [new branch] pyt_serving -> origin/pyt_serving\n", + " * [new branch] pytorch/item-id-aggregator -> origin/pytorch/item-id-aggregator\n", + " * [new branch] pytorch/label_smoothing -> origin/pytorch/label_smoothing\n", + " * [new branch] pytorch/model-and-heads -> origin/pytorch/model-and-heads\n", + " * [new branch] pytorch/model-updates -> origin/pytorch/model-updates\n", + " * [new branch] read_schema_from_core -> origin/read_schema_from_core\n", + " * [new branch] recsys22 -> origin/recsys22\n", + " * [new branch] refactor-prediction-task -> origin/refactor-prediction-task\n", + " * [new branch] refactor_part1 -> origin/refactor_part1\n", + " * [new branch] refactor_part2 -> origin/refactor_part2\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-23.04 -> origin/release-23.04\n", + " * [new branch] release-jperez999 -> origin/release-jperez999\n", + " * [new branch] remove_paper_assets -> origin/remove_paper_assets\n", + " * [new branch] romeyn/dev -> origin/romeyn/dev\n", + " * [new branch] romeyn/transformer-configs -> origin/romeyn/transformer-configs\n", + " * [new branch] save-schema-for-t4rec-model -> origin/save-schema-for-t4rec-model\n", + " * [new branch] schema-pbtxt-bug -> origin/schema-pbtxt-bug\n", + " * [new branch] schema-shape-fix -> origin/schema-shape-fix\n", + " * [new branch] seq_binary_classification -> origin/seq_binary_classification\n", + " * [new branch] serve_nvt_and__model -> origin/serve_nvt_and__model\n", + " * [new branch] session_features -> origin/session_features\n", + " * [new branch] slim_doc_deps -> origin/slim_doc_deps\n", + " * [new branch] soft_embeddings -> origin/soft_embeddings\n", + " * [new branch] ssn_seed -> origin/ssn_seed\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] stochastic_noise -> origin/stochastic_noise\n", + " * [new branch] stochastic_noise2 -> origin/stochastic_noise2\n", + " * [new branch] synthetic-data -> origin/synthetic-data\n", + " * [new branch] t4rec-MM-repro -> origin/t4rec-MM-repro\n", + " * [new branch] t4rec_paper_repro2 -> origin/t4rec_paper_repro2\n", + " * [new branch] t4rec_refactor -> origin/t4rec_refactor\n", + " * [new branch] tensorflow -> origin/tensorflow\n", + " * [new branch] test-data -> origin/test-data\n", + " * [new branch] test/text_module -> origin/test/text_module\n", + " * [new branch] testing/updates -> origin/testing/updates\n", + " * [new branch] tf/example_notebook -> origin/tf/example_notebook\n", + " * [new branch] tf/fix_compute_loss -> origin/tf/fix_compute_loss\n", + " * [new branch] tf/fix_graph_mode -> origin/tf/fix_graph_mode\n", + " * [new branch] tf/model_saving_and_loading -> origin/tf/model_saving_and_loading\n", + " * [new branch] tf/refactor_item_prediction_task -> origin/tf/refactor_item_prediction_task\n", + " * [new branch] tf/refactor_masking -> origin/tf/refactor_masking\n", + " * [new branch] tf/refactor_ranking_metric -> origin/tf/refactor_ranking_metric\n", + " * [new branch] tf/refactor_transformer_block -> origin/tf/refactor_transformer_block\n", + " * [new branch] tf/save_load_model -> origin/tf/save_load_model\n", + " * [new branch] tf/test-utils -> origin/tf/test-utils\n", + " * [new branch] tf/to_tf_model -> origin/tf/to_tf_model\n", + " * [new branch] torch/demo_utils -> origin/torch/demo_utils\n", + " * [new branch] torch/fit_eval -> origin/torch/fit_eval\n", + " * [new branch] torch/fix_evaluation -> origin/torch/fix_evaluation\n", + " * [new branch] torch/fix_examples_utils -> origin/torch/fix_examples_utils\n", + " * [new branch] torch/fix_wipe_memory -> origin/torch/fix_wipe_memory\n", + " * [new branch] torch/label_smoothing_loss -> origin/torch/label_smoothing_loss\n", + " * [new branch] torch/next_item_prediction -> origin/torch/next_item_prediction\n", + " * [new branch] torch/stochastic_swap_noise -> origin/torch/stochastic_swap_noise\n", + " * [new branch] trainer_predict_step -> origin/trainer_predict_step\n", + " * [new branch] tutorial -> origin/tutorial\n", + " * [new branch] unittest_endtoend_multi -> origin/unittest_endtoend_multi\n", + " * [new branch] update/torchmetrics -> origin/update/torchmetrics\n", + " * [new branch] utils -> origin/utils\n", + " * [new branch] v0.1.2-docs -> origin/v0.1.2-docs\n", + " * [new branch] v0.1.3-docs -> origin/v0.1.3-docs\n", + " * [new branch] v0.1.4-docs -> origin/v0.1.4-docs\n", + " * [new branch] v0.1.5-docs -> origin/v0.1.5-docs\n", + " * [new branch] v0.1.6-docs -> origin/v0.1.6-docs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "From https://github.com/NVIDIA-Merlin/dataloader\n", - " * branch main -> FETCH_HEAD\n", - " 5b3fe46..d9e97b4 main -> origin/main\n" + " * [new branch] v0.1.7-docs -> origin/v0.1.7-docs\n", + " * [new tag] v0.1.14 -> v0.1.14\n", + " * [new tag] v0.1.15 -> v0.1.15\n", + " * [new tag] v0.1.16 -> v0.1.16\n", + " * [new tag] v23.04.00 -> v23.04.00\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + " * [new tag] custom_dataloader -> custom_dataloader\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.1.1 -> v0.1.1\n", + " * [new tag] v0.1.10 -> v0.1.10\n", + " * [new tag] v0.1.11 -> v0.1.11\n", + " * [new tag] v0.1.12 -> v0.1.12\n", + " * [new tag] v0.1.13 -> v0.1.13\n", + " * [new tag] v0.1.2 -> v0.1.2\n", + " * [new tag] v0.1.3 -> v0.1.3\n", + " * [new tag] v0.1.4 -> v0.1.4\n", + " * [new tag] v0.1.5 -> v0.1.5\n", + " * [new tag] v0.1.6 -> v0.1.6\n", + " * [new tag] v0.1.7 -> v0.1.7\n", + " * [new tag] v0.1.8 -> v0.1.8\n", + " * [new tag] v0.1.9 -> v0.1.9\n", + "Previous HEAD position was bffb8476 Adjust serving notebook to account for underlying shape changes (#631)\n", + "Switched to a new branch 'main'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Updating 5b3fe46..d9e97b4\n", - "Fast-forward\n", - " .github/workflows/check-base-branch.yaml | 9 +\n", - " .github/workflows/cpu-ci.yml | 83 +----\n", - " .github/workflows/cpu-packages.yml | 125 +++++++\n", - " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", - " .github/workflows/gpu-ci.yml | 2 +-\n", - " .github/workflows/jax.yaml | 2 +-\n", - " .github/workflows/models.yml | 43 +++\n", - " .github/workflows/nvtabular.yml | 43 +++\n", - " .github/workflows/release-drafter.yaml | 2 +-\n", - " .github/workflows/set-stable-branch.yaml | 10 +\n", - " .github/workflows/systems.yml | 43 +++\n", - " .github/workflows/tensorflow.yml | 2 +-\n", - " .github/workflows/torch.yaml | 2 +-\n", - " .github/workflows/transformers4rec.yml | 43 +++\n", - " .pre-commit-config.yaml | 14 +-\n", - " ci/pr.gpu.Jenkinsfile | 44 +++\n", - " docs/README.md | 28 +-\n", - " examples/01a-Getting-started-Tensorflow.ipynb | 5 +-\n", - " examples/01b-Getting-started-Pytorch.ipynb | 5 +-\n", - " .../02-Multi-GPU-Tensorflow-with-Horovod.ipynb | 371 ++++++++++++++++++\n", - " merlin/dataloader/jax.py | 52 +--\n", - " merlin/dataloader/loader_base.py | 413 +++++++++------------\n", - " merlin/dataloader/ops/embeddings.py | 110 ++++++\n", - " merlin/dataloader/ops/embeddings/__init__.py | 15 -\n", - " merlin/dataloader/ops/embeddings/embedding_op.py | 237 ------------\n", - " .../dataloader/ops/embeddings/tf_embedding_op.py | 101 -----\n", - " .../ops/embeddings/torch_embedding_op.py | 106 ------\n", - " merlin/dataloader/ops/padding.py | 88 +++++\n", - " merlin/dataloader/tensorflow.py | 337 +++++------------\n", - " merlin/dataloader/torch.py | 225 +++++------\n", - " merlin/dataloader/utils/tf/tf_trainer.py | 13 +-\n", - " requirements/base.txt | 2 +-\n", - " tests/conftest.py | 11 +-\n", - " .../test_multi_GPU_with_horovod_and_tensorflow.py | 28 ++\n", - " tests/unit/dataloader/test_array_dataloader.py | 57 +++\n", - " tests/unit/dataloader/test_array_to_tensorflow.py | 54 +++\n", - " tests/unit/dataloader/test_array_to_torch.py | 69 ++++\n", - " .../{test_tf_embeddings.py => test_embeddings.py} | 188 +++++-----\n", - " tests/unit/dataloader/test_jax_dataloader.py | 29 +-\n", - " tests/unit/dataloader/test_padding.py | 46 +++\n", - " tests/unit/dataloader/test_tf_dataloader.py | 358 +++++++++---------\n", - " tests/unit/dataloader/test_torch_dataloader.py | 245 ++++++++----\n", - " tests/unit/dataloader/test_torch_embeddings.py | 242 ------------\n", - " tox.ini | 55 +++\n", - " 44 files changed, 2154 insertions(+), 1810 deletions(-)\n", - " create mode 100644 .github/workflows/check-base-branch.yaml\n", - " create mode 100644 .github/workflows/cpu-packages.yml\n", - " create mode 100644 .github/workflows/models.yml\n", - " create mode 100644 .github/workflows/nvtabular.yml\n", - " create mode 100644 .github/workflows/set-stable-branch.yaml\n", - " create mode 100644 .github/workflows/systems.yml\n", - " create mode 100644 .github/workflows/transformers4rec.yml\n", - " create mode 100644 ci/pr.gpu.Jenkinsfile\n", - " create mode 100644 examples/02-Multi-GPU-Tensorflow-with-Horovod.ipynb\n", - " create mode 100644 merlin/dataloader/ops/embeddings.py\n", - " delete mode 100644 merlin/dataloader/ops/embeddings/__init__.py\n", - " delete mode 100644 merlin/dataloader/ops/embeddings/embedding_op.py\n", - " delete mode 100644 merlin/dataloader/ops/embeddings/tf_embedding_op.py\n", - " delete mode 100644 merlin/dataloader/ops/embeddings/torch_embedding_op.py\n", - " create mode 100644 merlin/dataloader/ops/padding.py\n", - " create mode 100644 tests/examples/test_multi_GPU_with_horovod_and_tensorflow.py\n", - " create mode 100644 tests/unit/dataloader/test_array_dataloader.py\n", - " create mode 100644 tests/unit/dataloader/test_array_to_tensorflow.py\n", - " create mode 100644 tests/unit/dataloader/test_array_to_torch.py\n", - " rename tests/unit/dataloader/{test_tf_embeddings.py => test_embeddings.py} (52%)\n", - " create mode 100644 tests/unit/dataloader/test_padding.py\n", - " delete mode 100644 tests/unit/dataloader/test_torch_embeddings.py\n", - "Processing /dataloader\n", + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /transformers4rec\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", " Getting requirements to build wheel: started\n", " Getting requirements to build wheel: finished with status 'done'\n", " Preparing wheel metadata: started\n", " Preparing wheel metadata: finished with status 'done'\n", - "Requirement already satisfied: merlin-core>=23.04.00 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+72.gd9e97b4) (23.4.0)\n", - "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.2.5)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (22.0)\n", - "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (8.0.0)\n", - "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.5)\n", - "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.19.6)\n", - "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.22.4)\n", - "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.5.0)\n", - "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.12.0)\n", - "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (23.4.0)\n", - "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.4.1)\n", - "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.4.1)\n", - "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.56.4)\n", - "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (11.4.1)\n", - "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.64.1)\n", - "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.2.0)\n", - "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.4.3)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2022.7)\n", - "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.57.0)\n", - "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.0)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", - "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.1)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.7.0)\n", - "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.4)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.1.2)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (8.1.3)\n", - "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.9.4)\n", - "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.26.13)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.12.0)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.4.0)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0)\n", - "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.0)\n", - "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.2.0)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.0)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (45.2.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.39.1)\n", - "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.1.0)\n", - "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0.4)\n", - "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.14.0)\n", - "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.1.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata>=4.13.0->dask>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.11.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.0.0)\n", - "Building wheels for collected packages: merlin-dataloader\n", - " Building wheel for merlin-dataloader (PEP 517): started\n", - " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", - " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+72.gd9e97b4-py3-none-any.whl size=34881 sha256=3b59ffde476328ed024b3610d55773d48ee2a39a5c9dcc7bc4429f86ecdb3307\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-3z4lu_lg/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", - "Successfully built merlin-dataloader\n" + "Building wheels for collected packages: transformers4rec\n", + " Building wheel for transformers4rec (PEP 517): started\n", + " Building wheel for transformers4rec (PEP 517): finished with status 'done'\n", + " Created wheel for transformers4rec: filename=transformers4rec-23.5.dev0+11.ga070e77f-py3-none-any.whl size=481639 sha256=f084abe1999165aea2adf372e62f46aa50356b2b0eab88960b98b57a3b78b78d\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-0i0a5fke/wheels/24/44/e3/c29f7de8e7315585705f880ad32ffeae66fcaeb79003405ef6\n", + "Successfully built transformers4rec\n", + "Installing collected packages: transformers4rec\n", + " Attempting uninstall: transformers4rec\n", + " Found existing installation: transformers4rec 23.2.0\n", + " Uninstalling transformers4rec-23.2.0:\n", + " Successfully uninstalled transformers4rec-23.2.0\n", + "Successfully installed transformers4rec-23.5.dev0+11.ga070e77f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "ERROR: nvtabular 1.6.0+66.g67136eba has requirement merlin-dataloader>=23.4.0, but you'll have merlin-dataloader 0.0.2+72.gd9e97b4 which is incompatible.\n", - "ERROR: merlin-models 0.9.0+157.gd8133b8f has requirement merlin-dataloader>=23.4.0, but you'll have merlin-dataloader 0.0.2+72.gd9e97b4 which is incompatible.\n" + "Note: switching to 'origin/release-23.04'.\n", + "\n", + "You are in 'detached HEAD' state. You can look around, make experimental\n", + "changes and commit them, and you can discard any commits you make in this\n", + "state without impacting any branches by switching back to a branch.\n", + "\n", + "If you want to create a new branch to retain commits you create, you may\n", + "do so (now or later) by using -c with the switch command. Example:\n", + "\n", + " git switch -c \n", + "\n", + "Or undo this operation with:\n", + "\n", + " git switch -\n", + "\n", + "Turn off this advice by setting config variable advice.detachedHead to false\n", + "\n", + "HEAD is now at 2516efb Return version 23.04.00 from versions\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Installing collected packages: merlin-dataloader\n", - " Attempting uninstall: merlin-dataloader\n", - " Found existing installation: merlin-dataloader 23.4.0\n", - " Uninstalling merlin-dataloader-23.4.0:\n", - " Successfully uninstalled merlin-dataloader-23.4.0\n", - "Successfully installed merlin-dataloader-0.0.2+72.gd9e97b4\n" + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-systems\n", + " Building wheel for merlin-systems (PEP 517): started\n", + " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-systems: filename=merlin_systems-23.4.0-py3-none-any.whl size=82535 sha256=229914452c3a872d916e197689a80000fc868685acd4949a3c6ba707e88e3d30\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-klnajv41/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + "Successfully built merlin-systems\n", + "Installing collected packages: merlin-systems\n", + " Attempting uninstall: merlin-systems\n", + " Found existing installation: merlin-systems 23.5.dev0+8.g2b1b90b\n", + " Uninstalling merlin-systems-23.5.dev0+8.g2b1b90b:\n", + " Successfully uninstalled merlin-systems-23.5.dev0+8.g2b1b90b\n", + "Successfully installed merlin-systems-23.4.0\n" ] } ], "source": [ "%%bash\n", + "cd /core\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /dataloader\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /nvtabular\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /models\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", "\n", - "cd /models && git checkout main && git pull origin main && pip install .\n", - "cd /core && git checkout main && git pull origin main && pip install .\n", - "cd /nvtabular && git checkout main && git pull origin main && pip install .\n", - "cd /systems && git checkout main && git pull origin main && pip install .\n", - "cd /dataloader && git checkout main && git pull origin main && pip install ." + "cd /systems\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /transformers4rec\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /systems\n", + "git checkout origin/release-23.04\n", + "pip install . --no-deps" ] }, { @@ -1405,15 +912,15 @@ "text": [ "Collecting gdown\n", " Downloading gdown-4.7.1-py3-none-any.whl (15 kB)\n", - "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", - "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.65.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", - "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", + "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.2)\n", + "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (1.25.8)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (3.1.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", "Collecting PySocks!=1.5.7,>=1.5.6; extra == \"socks\"\n", " Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", @@ -1427,9 +934,9 @@ "text": [ "Downloading...\n", "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", - "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=c6c52af8-65d0-4308-84a7-f680f5add55c\n", + "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=c0a77044-3198-4431-915f-3fbdaf1e7f79\n", "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", - "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.16MB/s]\n" + "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.19MB/s]\n" ] }, { @@ -1442,27 +949,27 @@ "Get:4 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", "Get:5 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1045 kB]\n", "Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", - "Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", - "Get:8 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", - "Get:9 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [2203 kB]\n", - "Get:10 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", - "Get:11 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", - "Get:12 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2674 kB]\n", - "Get:13 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", - "Get:14 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:7 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [2203 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Get:9 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:11 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2674 kB]\n", + "Get:12 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", - "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1341 kB]\n", - "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3157 kB]\n", - "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2341 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2341 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1341 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3157 kB]\n", "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", - "Fetched 27.3 MB in 9s (2917 kB/s)\n", + "Fetched 27.3 MB in 10s (2846 kB/s)\n", "Reading package lists...\n", "Reading package lists...\n", "Building dependency tree...\n", "Reading state information...\n", "unzip is already the newest version (6.0-25ubuntu1.1).\n", - "0 upgraded, 0 newly installed, 0 to remove and 99 not upgraded.\n", + "0 upgraded, 0 newly installed, 0 to remove and 81 not upgraded.\n", "Archive: rees46_ecom_dataset_small_for_ci.zip\n", " creating: ecom_dataset/0001/\n", " inflating: ecom_dataset/0001/valid.parquet \n", @@ -1493,343 +1000,72 @@ { "cell_type": "code", "execution_count": 3, - "id": "4a0105a7", + "id": "fd80de2a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-05-09 06:24:34.941906: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-05-09 06:24:37.290386: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 06:24:37.290801: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 06:24:37.290937: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO]: sparse_operation_kit is imported\n", + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n" + ] + }, + { + "ename": "TypeError", + "evalue": "init() got an unexpected keyword argument 'use_legacy_optimizer'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 11\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mschema\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtags\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tags\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdataset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Dataset\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mmm\u001b[39;00m\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/__init__.py:34\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcross\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CrossBlock\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdlrm\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DLRMBlock\n\u001b[0;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexperts\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CGCBlock, ExpertsGate, MMOEBlock, PLEBlock\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minteraction\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 36\u001b[0m DotProductInteraction,\n\u001b[1;32m 37\u001b[0m FMBlock,\n\u001b[1;32m 38\u001b[0m FMPairwiseInteraction,\n\u001b[1;32m 39\u001b[0m )\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmlp\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DenseResidualBlock, MLPBlock\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/blocks/experts.py:28\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Block\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcombinators\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 23\u001b[0m ParallelBlock,\n\u001b[1;32m 24\u001b[0m SequentialBlock,\n\u001b[1;32m 25\u001b[0m TabularBlock,\n\u001b[1;32m 26\u001b[0m WithShortcut,\n\u001b[1;32m 27\u001b[0m )\n\u001b[0;32m---> 28\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_task_names_from_outputs\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprediction_tasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ParallelPredictionBlock, PredictionTask\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TabularData\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/models/base.py:51\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprediction\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Prediction, PredictionContext, TensorLike\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtabular\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TabularBlock\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdistributed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbackend\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m hvd, hvd_installed\n\u001b[1;32m 52\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minputs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m InputBlock\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mloader\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Loader\n", + "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/distributed/backend.py:33\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m sok_installed:\n\u001b[0;32m---> 33\u001b[0m \u001b[43msok\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minit\u001b[49m\u001b[43m(\u001b[49m\u001b[43muse_legacy_optimizer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: init() got an unexpected keyword argument 'use_legacy_optimizer'" + ] + } + ], "source": [ - "# !cd /dataloader && git checkout main && git pull origin main && git checkout ce2215d8f871d0fb8c71900f7b914a226aea7c24 && pip install ." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "8101aa27", - "metadata": {}, - "outputs": [], - "source": [ - "# !cd /core && git checkout main && git pull origin main && pip install ." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "0f799172", - "metadata": {}, - "outputs": [], - "source": [ - "# %%writefile /core/merlin/dag/graph.py\n", - "\n", - "# #\n", - "# # Copyright (c) 2022, NVIDIA CORPORATION.\n", - "# #\n", - "# # Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# # you may not use this file except in compliance with the License.\n", - "# # You may obtain a copy of the License at\n", - "# #\n", - "# # http://www.apache.org/licenses/LICENSE-2.0\n", - "# #\n", - "# # Unless required by applicable law or agreed to in writing, software\n", - "# # distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# # See the License for the specific language governing permissions and\n", - "# # limitations under the License.\n", - "# #\n", - "\n", - "# import logging\n", - "# from collections import deque\n", - "# from typing import Dict, Optional\n", - "\n", - "# from merlin.dag.node import (\n", - "# Node,\n", - "# _combine_schemas,\n", - "# iter_nodes,\n", - "# postorder_iter_nodes,\n", - "# preorder_iter_nodes,\n", - "# )\n", - "# from merlin.schema import Schema\n", - "\n", - "# LOG = logging.getLogger(\"merlin\")\n", - "\n", - "\n", - "# class Graph:\n", - "# \"\"\"\n", - "# Represents an DAG composed of Nodes, each of which contains an operator that\n", - "# transforms dataframes or dataframe-like data\n", - "# \"\"\"\n", - "\n", - "# def __init__(self, output_node: Node, subgraphs: Optional[Dict[str, Node]] = None):\n", - "# self.output_node = output_node\n", - "# self.subgraphs = subgraphs or {}\n", - "\n", - "# parents_with_deps = self.output_node.parents_with_dependencies\n", - "# parents_with_deps.append(output_node)\n", - "\n", - "# for name, sg in self.subgraphs.items():\n", - "# if sg not in parents_with_deps:\n", - "# raise ValueError(\n", - "# f\"The output node of subgraph {name} does not exist in the provided graph.\"\n", - "# )\n", - "\n", - "# def subgraph(self, name: str) -> \"Graph\":\n", - "# if name not in self.subgraphs.keys():\n", - "# raise ValueError(f\"No subgraph named {name}. Options are: {self.subgraphs.keys()}\")\n", - "# return Graph(self.subgraphs[name])\n", - "\n", - "# @property\n", - "# def input_dtypes(self):\n", - "# if self.input_schema:\n", - "# return {\n", - "# name: col_schema.dtype\n", - "# for name, col_schema in self.input_schema.column_schemas.items()\n", - "# }\n", - "# else:\n", - "# return {}\n", - "\n", - "# @property\n", - "# def output_dtypes(self):\n", - "# if self.output_schema:\n", - "# return {\n", - "# name: col_schema.dtype\n", - "# for name, col_schema in self.output_schema.column_schemas.items()\n", - "# }\n", - "# else:\n", - "# return {}\n", - "\n", - "# @property\n", - "# def column_mapping(self):\n", - "# nodes = preorder_iter_nodes(self.output_node)\n", - "# column_mapping = self.output_node.column_mapping\n", - "# for node in list(nodes)[1:]:\n", - "# node_map = node.column_mapping\n", - "# for output_col, input_cols in column_mapping.items():\n", - "# early_inputs = []\n", - "# for input_col in input_cols:\n", - "# early_inputs += node_map.get(input_col, [input_col])\n", - "# column_mapping[output_col] = early_inputs\n", - "\n", - "# return column_mapping\n", - "\n", - "# def construct_schema(self, root_schema: Schema, preserve_dtypes=False) -> \"Graph\":\n", - "# \"\"\"\n", - "# Given the schema of a dataset to transform, determine the output schema of the graph\n", - "\n", - "# Parameters\n", - "# ----------\n", - "# root_schema : Schema\n", - "# The schema of a dataset to be transformed with this DAG\n", - "# preserve_dtypes : bool, optional\n", - "# Whether to keep any dtypes that may already be present in the schemas, by default False\n", - "\n", - "# Returns\n", - "# -------\n", - "# Graph\n", - "# This DAG after the schemas have been filled in\n", - "# \"\"\"\n", - "# nodes = list(postorder_iter_nodes(self.output_node))\n", - "\n", - "# self._compute_node_schemas(root_schema, nodes, preserve_dtypes)\n", - "# # self._validate_node_schemas(root_schema, nodes, preserve_dtypes)\n", - "\n", - "# return self\n", - "\n", - "# def _compute_node_schemas(self, root_schema, nodes, preserve_dtypes=False):\n", - "# for node in nodes:\n", - "# node.compute_schemas(root_schema, preserve_dtypes=preserve_dtypes)\n", - "\n", - "# def _validate_node_schemas(self, root_schema, nodes, strict_dtypes=False):\n", - "# for node in nodes:\n", - "# node.validate_schemas(root_schema, strict_dtypes=strict_dtypes)\n", - "\n", - "# @property\n", - "# def input_schema(self):\n", - "# # leaf_node input and output schemas are the same (aka selection)\n", - "# return _combine_schemas(self.leaf_nodes)\n", - "\n", - "# @property\n", - "# def leaf_nodes(self):\n", - "# return [node for node in postorder_iter_nodes(self.output_node) if not node.parents]\n", - "\n", - "# @property\n", - "# def output_schema(self):\n", - "# return self.output_node.output_schema\n", - "\n", - "# def _input_columns(self):\n", - "# input_cols = []\n", - "# for node in iter_nodes([self.output_node]):\n", - "# upstream_output_cols = []\n", - "\n", - "# for upstream_node in node.parents_with_dependencies:\n", - "# upstream_output_cols += upstream_node.output_columns.names\n", - "\n", - "# upstream_output_cols = _get_unique(upstream_output_cols)\n", - "# input_cols += list(set(node.input_columns.names) - set(upstream_output_cols))\n", - "\n", - "# return _get_unique(input_cols)\n", - "\n", - "# def remove_inputs(self, to_remove):\n", - "# \"\"\"\n", - "# Removes columns from a Graph\n", - "\n", - "# Starting at the leaf nodes, trickle down looking for columns to remove,\n", - "# when found remove but then must propagate the removal of any other\n", - "# output columns derived from that column.\n", - "\n", - "# Parameters\n", - "# -----------\n", - "# graph : Graph\n", - "# The graph to remove columns from\n", - "# to_remove : array_like\n", - "# A list of input column names to remove from the graph\n", - "\n", - "# Returns\n", - "# -------\n", - "# Graph\n", - "# The same graph with columns removed\n", - "# \"\"\"\n", - "# nodes_to_process = deque([(node, to_remove) for node in self.leaf_nodes])\n", - "\n", - "# while nodes_to_process:\n", - "# node, columns_to_remove = nodes_to_process.popleft()\n", - "# if node.input_schema and len(node.input_schema):\n", - "# output_columns_to_remove = node.remove_inputs(columns_to_remove)\n", - "\n", - "# for child in node.children:\n", - "# nodes_to_process.append(\n", - "# (child, list(set(to_remove + output_columns_to_remove)))\n", - "# )\n", - "\n", - "# if not len(node.input_schema):\n", - "# node.remove_child(child)\n", - "\n", - "# # remove any dependencies that do not have an output schema\n", - "# node.dependencies = [\n", - "# dep for dep in node.dependencies if dep.output_schema and len(dep.output_schema)\n", - "# ]\n", - "\n", - "# if not node.input_schema or not len(node.input_schema):\n", - "# for parent in node.parents:\n", - "# parent.remove_child(node)\n", - "# for dependency in node.dependencies:\n", - "# dependency.remove_child(node)\n", - "# del node\n", - "\n", - "# return self\n", - "\n", - "# @classmethod\n", - "# def get_nodes_by_op_type(cls, nodes, op_type):\n", - "# return set(node for node in iter_nodes(nodes) if isinstance(node.op, op_type))\n", - "\n", - "\n", - "# def _get_schemaless_nodes(nodes):\n", - "# schemaless_nodes = []\n", - "# for node in iter_nodes(nodes):\n", - "# if node.input_schema is None:\n", - "# schemaless_nodes.append(node)\n", - "\n", - "# return set(schemaless_nodes)\n", - "\n", - "\n", - "# def _get_unique(cols):\n", - "# # Need to preserve order in unique-column list\n", - "# return list({x: x for x in cols}.keys())" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ab4f272d", - "metadata": {}, - "outputs": [], - "source": [ - "# !cd /core && pip install ." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "fd80de2a", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "# os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", - "import gc\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d5a1e610", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-09 02:55:54.458160: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" - ] - } - ], - "source": [ - "import tensorflow as tf\n", - "\n", - "from merlin.schema.tags import Tags\n", - "from merlin.io.dataset import Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "ceb3ae93", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-09 02:55:56.823309: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 02:55:56.823677: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 02:55:56.823805: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO]: sparse_operation_kit is imported\n", - "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", - "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n" - ] - }, - { - "ename": "TypeError", - "evalue": "init() got an unexpected keyword argument 'use_legacy_optimizer'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mmm\u001b[39;00m\n", - "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/__init__.py:34\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcross\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CrossBlock\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdlrm\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DLRMBlock\n\u001b[0;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexperts\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CGCBlock, ExpertsGate, MMOEBlock, PLEBlock\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minteraction\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 36\u001b[0m DotProductInteraction,\n\u001b[1;32m 37\u001b[0m FMBlock,\n\u001b[1;32m 38\u001b[0m FMPairwiseInteraction,\n\u001b[1;32m 39\u001b[0m )\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mblocks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmlp\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DenseResidualBlock, MLPBlock\n", - "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/blocks/experts.py:28\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Block\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcombinators\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 23\u001b[0m ParallelBlock,\n\u001b[1;32m 24\u001b[0m SequentialBlock,\n\u001b[1;32m 25\u001b[0m TabularBlock,\n\u001b[1;32m 26\u001b[0m WithShortcut,\n\u001b[1;32m 27\u001b[0m )\n\u001b[0;32m---> 28\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_task_names_from_outputs\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprediction_tasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ParallelPredictionBlock, PredictionTask\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TabularData\n", - "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/models/base.py:51\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprediction\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Prediction, PredictionContext, TensorLike\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtabular\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TabularBlock\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdistributed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbackend\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m hvd, hvd_installed\n\u001b[1;32m 52\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minputs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m InputBlock\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmerlin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mloader\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Loader\n", - "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/merlin/models/tf/distributed/backend.py:33\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m sok_installed:\n\u001b[0;32m---> 33\u001b[0m \u001b[43msok\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minit\u001b[49m\u001b[43m(\u001b[49m\u001b[43muse_legacy_optimizer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mTypeError\u001b[0m: init() got an unexpected keyword argument 'use_legacy_optimizer'" - ] - } - ], - "source": [ - "import merlin.models.tf as mm" + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "\n", + "import merlin.models.tf as mm" ] }, { @@ -1845,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "4ab4e0fb", "metadata": {}, "outputs": [], @@ -1856,7 +1092,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "8d9903e6", "metadata": {}, "outputs": [], @@ -1875,7 +1111,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "410ea223", "metadata": {}, "outputs": [], @@ -1886,7 +1122,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "4328f03a", "metadata": {}, "outputs": [], @@ -1898,7 +1134,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "d5a9dd50", "metadata": {}, "outputs": [], @@ -1913,47 +1149,7 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "3116726e", - "metadata": {}, - "outputs": [], - "source": [ - "# cat rees46_schema_modified.pbtxt" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "69e8f95c", - "metadata": {}, - "outputs": [], - "source": [ - "# %%writefile rees46_schema_modified_2.pbtxt\n", - "\n", - "# feature {\n", - "# name: \"seq\"\n", - "# value_count {\n", - "# min: 2\n", - "# }\n", - "# type: INT\n", - "# int_domain {\n", - "# name: \"seq\"\n", - "# min: 1\n", - "# max: 390000\n", - "# is_categorical: true\n", - "# }\n", - "# annotation {\n", - "# tag: \"item_id\"\n", - "# tag: \"list\"\n", - "# tag: \"categorical\"\n", - "# tag: \"item\"\n", - "# }\n", - "# }" - ] - }, - { - "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "a6ade14a", "metadata": {}, "outputs": [], @@ -2018,82 +1214,20 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "7baec64f", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "model_transformer, xlnet_block = get_model()" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "523fe2ac", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n", - "2023-05-09 01:50:35.053579: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 105s 143ms/step - loss: 7.2880 - recall_at_20: 0.1451 - mrr_at_20: 0.0813 - ndcg_at_20: 0.0954 - map_at_20: 0.0813 - precision_at_20: 0.0073 - regularization_loss: 0.0000e+00 - loss_batch: 7.2857\n", - "84/84 [==============================] - 4s 26ms/step - loss: 8.5378 - recall_at_20: 0.2315 - mrr_at_20: 0.0811 - ndcg_at_20: 0.1142 - map_at_20: 0.0811 - precision_at_20: 0.0116 - regularization_loss: 0.0000e+00 - loss_batch: 8.5385\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.537825584411621,\n", - " 'recall_at_20': 0.2337784469127655,\n", - " 'mrr_at_20': 0.07926096022129059,\n", - " 'ndcg_at_20': 0.11324834823608398,\n", - " 'map_at_20': 0.07926096022129059,\n", - " 'precision_at_20': 0.011688923463225365,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 8.566910743713379}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model_transformer.fit(\n", " train,\n", @@ -2113,11408 +1247,27 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "569113e1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n", - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n", - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n", - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: t4rec_model/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: t4rec_model/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", - "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " return generic_utils.serialize_keras_object(obj)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "model_transformer.save('t4rec_model')" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "2b09261c", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "text/plain": [ - "Model(\n", - " (_should_compute_train_metrics_for_batch): \n", - " (blocks): _TupleWrapper((SequentialBlock(\n", - " (layers): List(\n", - " (0): ParallelBlock(\n", - " (_aggregation): ConcatFeatures(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (parallel_layers): Dict(\n", - " (categorical): ParallelBlock(\n", - " (parallel_layers): Dict(\n", - " (sess_pid_seq): EmbeddingTable(\n", - " (features): Dict(\n", - " (sess_pid_seq): ColumnSchema(name='sess_pid_seq', tags={, , , , }, properties={'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, dtype=DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), is_list=True, is_ragged=True)\n", - " )\n", - " (table): Embedding(\n", - " (embeddings): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): SequentialBlock(\n", - " (layers): List(\n", - " (0): _Dense(\n", - " (dense): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (2): XLNetBlock(\n", - " (transformer): TFXLNetMainLayer(\n", - " (word_embedding): TFSharedEmbeddings(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer): List(\n", - " (0): TFXLNetLayer(\n", - " (rel_attn): TFXLNetRelativeAttention(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (q): \n", - " (k): \n", - " (v): \n", - " (o): \n", - " (r): \n", - " (r_r_bias): \n", - " (r_s_bias): \n", - " (r_w_bias): \n", - " (seg_embed): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (ff): TFXLNetFeedForward(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_1): Dense(\n", - " 768, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_2): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): TFXLNetLayer(\n", - " (rel_attn): TFXLNetRelativeAttention(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (q): \n", - " (k): \n", - " (v): \n", - " (o): \n", - " (r): \n", - " (r_r_bias): \n", - " (r_s_bias): \n", - " (r_w_bias): \n", - " (seg_embed): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (ff): TFXLNetFeedForward(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_1): Dense(\n", - " 768, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_2): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (2): TFXLNetLayer(\n", - " (rel_attn): TFXLNetRelativeAttention(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (q): \n", - " (k): \n", - " (v): \n", - " (o): \n", - " (r): \n", - " (r_r_bias): \n", - " (r_s_bias): \n", - " (r_w_bias): \n", - " (seg_embed): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (ff): TFXLNetFeedForward(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_1): Dense(\n", - " 768, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_2): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_kwargs): Dict(\n", - " (name): 'transformer'\n", - " (trainable): True\n", - " (dtype): 'float32'\n", - " )\n", - " (mask_emb): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (transformer_pre): PrepareTransformerInputs(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (transformer_post): LastHiddenState(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_masking_post): SequentialBlock(\n", - " (layers): List(\n", - " (0): TransformerOutputToRagged(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): TransformerInferenceHiddenState(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_masking_pre): SequentialBlock(\n", - " (layers): List(\n", - " (0): SequenceCausalLastInference(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): ExtractMaskFromTargets(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " ), SequentialBlock(\n", - " (layers): List(\n", - " (0): _Dense(\n", - " (dense): Dense(\n", - " 448, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " ), CategoricalOutput(\n", - " (to_call): EmbeddingTablePrediction(\n", - " (table): EmbeddingTable(\n", - " (features): Dict(\n", - " (sess_pid_seq): ColumnSchema(name='sess_pid_seq', tags={, , , , }, properties={'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, dtype=DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), is_list=True, is_ragged=True)\n", - " )\n", - " (table): Embedding(\n", - " (embeddings): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (output_layer_bias): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )))\n", - " (context): ModelContext(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_prepare_features): PrepareFeatures(\n", - " (prepare_lists): PrepareListFeatures()\n", - " )\n", - " (output_names): List(\n", - " (0): 'sess_pid_seq/categorical_output'\n", - " )\n", - " (optimizer): Adam()\n", - " (loss): Dict(\n", - " (sess_pid_seq/categorical_output): CategoricalCrossEntropy()\n", - " )\n", - " (train_pre): SequencePredictNext(\n", - " (_pre): SequentialBlock(\n", - " (layers): List(\n", - " (0): PrepareFeatures(\n", - " (prepare_lists): PrepareListFeatures()\n", - " )\n", - " (1): PrepareFeatures(\n", - " (prepare_lists): PrepareListFeatures()\n", - " )\n", - " )\n", - " (prepare_lists): PrepareListFeatures()\n", - " )\n", - " (transformer): XLNetBlock(\n", - " (transformer): TFXLNetMainLayer(\n", - " (word_embedding): TFSharedEmbeddings(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer): List(\n", - " (0): TFXLNetLayer(\n", - " (rel_attn): TFXLNetRelativeAttention(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (q): \n", - " (k): \n", - " (v): \n", - " (o): \n", - " (r): \n", - " (r_r_bias): \n", - " (r_s_bias): \n", - " (r_w_bias): \n", - " (seg_embed): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (ff): TFXLNetFeedForward(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_1): Dense(\n", - " 768, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_2): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): TFXLNetLayer(\n", - " (rel_attn): TFXLNetRelativeAttention(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (q): \n", - " (k): \n", - " (v): \n", - " (o): \n", - " (r): \n", - " (r_r_bias): \n", - " (r_s_bias): \n", - " (r_w_bias): \n", - " (seg_embed): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (ff): TFXLNetFeedForward(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_1): Dense(\n", - " 768, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_2): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (2): TFXLNetLayer(\n", - " (rel_attn): TFXLNetRelativeAttention(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (q): \n", - " (k): \n", - " (v): \n", - " (o): \n", - " (r): \n", - " (r_r_bias): \n", - " (r_s_bias): \n", - " (r_w_bias): \n", - " (seg_embed): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (ff): TFXLNetFeedForward(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_1): Dense(\n", - " 768, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_2): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_kwargs): Dict(\n", - " (name): 'transformer'\n", - " (trainable): True\n", - " (dtype): 'float32'\n", - " )\n", - " (mask_emb): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (transformer_pre): PrepareTransformerInputs(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (transformer_post): LastHiddenState(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_masking_post): SequentialBlock(\n", - " (layers): List(\n", - " (0): TransformerOutputToRagged(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): TransformerInferenceHiddenState(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_masking_pre): SequentialBlock(\n", - " (layers): List(\n", - " (0): SequenceCausalLastInference(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): ExtractMaskFromTargets(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (test_pre): SequencePredictLast(\n", - " (_pre): SequentialBlock(\n", - " (layers): List(\n", - " (0): PrepareFeatures(\n", - " (prepare_lists): PrepareListFeatures()\n", - " )\n", - " (1): PrepareFeatures(\n", - " (prepare_lists): PrepareListFeatures()\n", - " )\n", - " )\n", - " (prepare_lists): PrepareListFeatures()\n", - " )\n", - " (transformer): XLNetBlock(\n", - " (transformer): TFXLNetMainLayer(\n", - " (word_embedding): TFSharedEmbeddings(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer): List(\n", - " (0): TFXLNetLayer(\n", - " (rel_attn): TFXLNetRelativeAttention(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (q): \n", - " (k): \n", - " (v): \n", - " (o): \n", - " (r): \n", - " (r_r_bias): \n", - " (r_s_bias): \n", - " (r_w_bias): \n", - " (seg_embed): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (ff): TFXLNetFeedForward(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_1): Dense(\n", - " 768, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_2): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): TFXLNetLayer(\n", - " (rel_attn): TFXLNetRelativeAttention(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (q): \n", - " (k): \n", - " (v): \n", - " (o): \n", - " (r): \n", - " (r_r_bias): \n", - " (r_s_bias): \n", - " (r_w_bias): \n", - " (seg_embed): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (ff): TFXLNetFeedForward(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_1): Dense(\n", - " 768, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_2): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (2): TFXLNetLayer(\n", - " (rel_attn): TFXLNetRelativeAttention(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (q): \n", - " (k): \n", - " (v): \n", - " (o): \n", - " (r): \n", - " (r_r_bias): \n", - " (r_s_bias): \n", - " (r_w_bias): \n", - " (seg_embed): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (ff): TFXLNetFeedForward(\n", - " (layer_norm): LayerNormalization(\n", - " (axis): List(\n", - " (0): 2\n", - " )\n", - " (gamma): \n", - " (beta): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_1): Dense(\n", - " 768, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (layer_2): Dense(\n", - " 192, activation=linear, use_bias=True\n", - " (kernel): \n", - " (bias): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (dropout): Dropout(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_kwargs): Dict(\n", - " (name): 'transformer'\n", - " (trainable): True\n", - " (dtype): 'float32'\n", - " )\n", - " (mask_emb): \n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (transformer_pre): PrepareTransformerInputs(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (transformer_post): LastHiddenState(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_masking_post): SequentialBlock(\n", - " (layers): List(\n", - " (0): TransformerOutputToRagged(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): TransformerInferenceHiddenState(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_masking_pre): SequentialBlock(\n", - " (layers): List(\n", - " (0): SequenceCausalLastInference(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (1): ExtractMaskFromTargets(\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " (_feature_shapes): Dict()\n", - " (_feature_dtypes): Dict()\n", - " )\n", - " )\n", - " (signatures): _SignatureMap({'serving_default': })\n", - ")" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model_transformer.load('t4rec_model')" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "4c62973a", "metadata": {}, "outputs": [], @@ -13524,26 +1277,17 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "e5db703a", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/dataloader/tensorflow.py:65: UserWarning: Due to a CUDA memory alignment issue in some Tensorflow operations such as Embedding ops, we recommend that 'batch_size' be at least 16 and also a power of two. Please change 'batch_size' to a number that is a power of two that is greater than or equal to 16.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "loader = Loader(valid, batch_size=1)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "e11f107c", "metadata": {}, "outputs": [], @@ -13553,7 +1297,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "c216e7fb", "metadata": {}, "outputs": [], @@ -13566,43 +1310,20 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "ea436b46", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "({'sess_pid_seq__values': ,\n", - " 'sess_pid_seq__offsets': },\n", - " None)" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "b" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "dcd414a9", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "50.1 ms ± 78.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" - ] - } - ], + "outputs": [], "source": [ "%%timeit\n", "\n", @@ -13611,18 +1332,10 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "b6244062", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "297 ms ± 753 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], + "outputs": [], "source": [ "%%timeit\n", "\n", @@ -13632,180 +1345,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "5bd66ba8", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n", - "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmpvsz5e5b2/model.savedmodel/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmpvsz5e5b2/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", - "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " return generic_utils.serialize_keras_object(obj)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - } - ], + "outputs": [], "source": [ "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n", @@ -13815,21 +1358,7 @@ }, { "cell_type": "code", - "execution_count": 33, - "id": "3ef1e5fc", - "metadata": {}, - "outputs": [], - "source": [ - "# import merlin.models.tf as mm\n", - "# import tensorflow as tf\n", - "# tf_model_path = os.path.join('t4rec_model')\n", - "\n", - "# model = tf.keras.models.load_model(tf_model_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "e2a7b6ee", "metadata": {}, "outputs": [], @@ -13842,232 +1371,20 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "55ad012c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nametagsdtypeis_listis_raggedproperties.domain.minproperties.domain.maxproperties.domain.nameproperties.value_count.minproperties.value_count.max
0sess_pid_seq(Tags.LIST, Tags.ITEM_ID, Tags.CATEGORICAL, Ta...DType(name='int64', element_type=<ElementType....TrueTrue1390000sess_pid_seq2None
\n", - "
" - ], - "text/plain": [ - "[{'name': 'sess_pid_seq', 'tags': {, , , , }, 'properties': {'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), 'is_list': True, 'is_ragged': True}]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "train.schema.select_by_name('sess_pid_seq')" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "1a39b4f8", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n", - "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", - "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " return generic_utils.serialize_keras_object(obj)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - } - ], + "outputs": [], "source": [ "from merlin.systems.dag.ensemble import Ensemble\n", "\n", @@ -14077,156 +1394,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "d7cdc6cc", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "I0509 01:55:17.511153 1205 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7ff516000000' with size 268435456\n", - "I0509 01:55:17.511472 1205 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864\n", - "I0509 01:55:17.513574 1205 model_lifecycle.cc:459] loading: executor_model:1\n", - "I0509 01:55:17.513595 1205 model_lifecycle.cc:459] loading: 0_transformworkflowtriton:1\n", - "I0509 01:55:17.513608 1205 model_lifecycle.cc:459] loading: 1_predicttensorflowtriton:1\n", - "I0509 01:55:17.693342 1205 tensorflow.cc:2536] TRITONBACKEND_Initialize: tensorflow\n", - "I0509 01:55:17.693362 1205 tensorflow.cc:2546] Triton TRITONBACKEND API version: 1.10\n", - "I0509 01:55:17.693365 1205 tensorflow.cc:2552] 'tensorflow' TRITONBACKEND API version: 1.10\n", - "I0509 01:55:17.693368 1205 tensorflow.cc:2576] backend configuration:\n", - "{\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}}\n", - "2023-05-09 01:55:18.992767: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-05-09 01:55:20.814292: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:20.814710: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:20.814876: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "I0509 01:55:22.571307 1205 tensorflow.cc:2642] TRITONBACKEND_ModelInitialize: 1_predicttensorflowtriton (version 1)\n", - "2023-05-09 01:55:22.571962: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-05-09 01:55:22.592315: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", - "2023-05-09 01:55:22.592352: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-05-09 01:55:22.592474: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-05-09 01:55:22.593417: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:22.609446: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:22.609627: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:22.855175: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:22.855338: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:22.855479: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:22.855607: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 29840 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", - "2023-05-09 01:55:22.913337: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled\n", - "2023-05-09 01:55:22.922530: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n", - "2023-05-09 01:55:23.337695: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-05-09 01:55:23.403830: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 831878 microseconds.\n", - "2023-05-09 01:55:24.746386: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-05-09 01:55:26.581369: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:26.581724: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:26.581886: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "I0509 01:55:28.344195 1205 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: executor_model_0 (GPU device 0)\n", - "2023-05-09 01:55:29.628356: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-05-09 01:55:31.434543: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:31.434993: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:31.435198: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "I0509 01:55:31.465538 1205 tensorflow.cc:2691] TRITONBACKEND_ModelInstanceInitialize: 1_predicttensorflowtriton_0 (GPU device 0)\n", - "I0509 01:55:31.465701 1205 model_lifecycle.cc:694] successfully loaded 'executor_model' version 1\n", - "2023-05-09 01:55:31.465951: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-05-09 01:55:31.490532: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", - "2023-05-09 01:55:31.490575: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-05-09 01:55:31.490777: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:31.491003: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:31.491186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:31.491411: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:31.491588: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:31.491744: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 29840 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", - "2023-05-09 01:55:31.549442: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-09 01:55:32.146750: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-05-09 01:55:32.213463: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 747520 microseconds.\n", - "I0509 01:55:32.213572 1205 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: 0_transformworkflowtriton_0 (GPU device 0)\n", - "I0509 01:55:32.213757 1205 model_lifecycle.cc:694] successfully loaded '1_predicttensorflowtriton' version 1\n", - "2023-05-09 01:55:33.476455: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-05-09 01:55:35.263779: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:35.264127: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-05-09 01:55:35.264284: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "I0509 01:55:35.317101 1205 model_lifecycle.cc:694] successfully loaded '0_transformworkflowtriton' version 1\n", - "I0509 01:55:35.317235 1205 server.cc:563] \n", - "+------------------+------+\n", - "| Repository Agent | Path |\n", - "+------------------+------+\n", - "+------------------+------+\n", - "\n", - "I0509 01:55:35.317307 1205 server.cc:590] \n", - "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", - "| Backend | Path | Config |\n", - "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", - "| python | /opt/tritonserver/backends/python/libtriton_python.so | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n", - "| tensorflow | /opt/tritonserver/backends/tensorflow2/libtriton_tensorflow2.so | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n", - "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", - "\n", - "I0509 01:55:35.317350 1205 server.cc:633] \n", - "+---------------------------+---------+--------+\n", - "| Model | Version | Status |\n", - "+---------------------------+---------+--------+\n", - "| 0_transformworkflowtriton | 1 | READY |\n", - "| 1_predicttensorflowtriton | 1 | READY |\n", - "| executor_model | 1 | READY |\n", - "+---------------------------+---------+--------+\n", - "\n", - "I0509 01:55:35.343214 1205 metrics.cc:864] Collecting metrics for GPU 0: Quadro RTX 8000\n", - "I0509 01:55:35.343395 1205 metrics.cc:757] Collecting CPU metrics\n", - "I0509 01:55:35.343534 1205 tritonserver.cc:2264] \n", - "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", - "| Option | Value |\n", - "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", - "| server_id | triton |\n", - "| server_version | 2.28.0 |\n", - "| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace logging |\n", - "| model_repository_path[0] | /workspace/models_for_benchmarking/ |\n", - "| model_control_mode | MODE_NONE |\n", - "| strict_model_config | 0 |\n", - "| rate_limit | OFF |\n", - "| pinned_memory_pool_byte_size | 268435456 |\n", - "| cuda_memory_pool_byte_size{0} | 67108864 |\n", - "| response_cache_byte_size | 0 |\n", - "| min_supported_compute_capability | 6.0 |\n", - "| strict_readiness | 1 |\n", - "| exit_timeout | 30 |\n", - "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", - "\n", - "I0509 01:55:35.344357 1205 grpc_server.cc:4819] Started GRPCInferenceService at 0.0.0.0:8001\n", - "I0509 01:55:35.344507 1205 http_server.cc:3477] Started HTTPService at 0.0.0.0:8000\n", - "I0509 01:55:35.385232 1205 http_server.cc:184] Started Metrics Service at 0.0.0.0:8002\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-09 01:56:23.448369: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" - ] - } - ], + "outputs": [], "source": [ "import nvtabular.inference.triton as nvt_triton\n", "import tritonclient.grpc as grpcclient\n", @@ -14234,701 +1405,6 @@ "\n", "subprocess.Popen(['tritonserver', '--model-repository=/workspace/models_for_benchmarking/'])" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6f63b425", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4a772eeb", - "metadata": {}, - "outputs": [], - "source": [ - "# !pkill triton" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6ed7b5a", - "metadata": {}, - "outputs": [], - "source": [ - "import tritonhttpclient\n", - "try:\n", - " triton_client = tritonhttpclient.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n", - " print(\"client created.\")\n", - "except Exception as e:\n", - " print(\"channel creation failed: \" + str(e))\n", - "triton_client.is_server_live()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10c2a62e", - "metadata": {}, - "outputs": [], - "source": [ - "validation_data.iloc[]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2c2723e9", - "metadata": {}, - "outputs": [], - "source": [ - "from merlin.systems.triton import convert_df_to_triton_input\n", - "\n", - "validation_data = valid.compute()\n", - "inputs = convert_df_to_triton_input(wf.input_schema, validation_data.iloc[:1])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fa9fc0dd", - "metadata": {}, - "outputs": [], - "source": [ - "inputs[0].name()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ae7eb08", - "metadata": {}, - "outputs": [], - "source": [ - "inputs[0].shape()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ac3596c3", - "metadata": {}, - "outputs": [], - "source": [ - "inputs[1].name()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "18f8e77d", - "metadata": {}, - "outputs": [], - "source": [ - "inputs[1].shape()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "292b58da", - "metadata": {}, - "outputs": [], - "source": [ - "validation_data.iloc[:1]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f8e1fd90", - "metadata": {}, - "outputs": [], - "source": [ - "wf.input_schema" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5a79c58f", - "metadata": {}, - "outputs": [], - "source": [ - "import tritonclient.grpc as grpcclient\n", - "\n", - "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", - " response = client.infer('1_predicttensorflowtriton', inputs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b6dd51a6", - "metadata": {}, - "outputs": [], - "source": [ - "response.get_output('sess_pid_seq/categorical_output')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ba6712bb", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "637eb3f0", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd62f641", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "d1bc6530", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", - " (_feature_shapes): Dict(\n", - " (sess_pid_seq): TensorShape([128, None, 1])\n", - " )\n", - " (_feature_dtypes): Dict(\n", - " (sess_pid_seq): tf.int32\n", - " )\n", - "), because it is not built.\n", - "WARNING:absl:Function `_wrapped_model` contains input name(s) sess_pid_seq with unsupported characters which will be renamed to sess_pid_seq_1 in the SavedModel.\n", - "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 110). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/0_predicttensorflowtriton/1/model.savedmodel/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/0_predicttensorflowtriton/1/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:83: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", - "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", - " return generic_utils.serialize_keras_object(obj)\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - } - ], - "source": [ - "from merlin.systems.dag.ensemble import Ensemble\n", - "\n", - "ensemble = Ensemble(serving_operators, train.schema)\n", - "ens_conf, node_confs = ensemble.export(\"/workspace/models_for_benchmarking\")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "8d390999", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "name: \"0_predicttensorflowtriton\"\r\n", - "platform: \"tensorflow_savedmodel\"\r\n", - "input {\r\n", - " name: \"sess_pid_seq\"\r\n", - " data_type: TYPE_INT32\r\n", - " dims: -1\r\n", - " dims: 1\r\n", - "}\r\n", - "input {\r\n", - " name: \"sess_pid_seq_1\"\r\n", - " data_type: TYPE_INT32\r\n", - " dims: -1\r\n", - " dims: 1\r\n", - "}\r\n", - "output {\r\n", - " name: \"sess_pid_seq/categorical_output\"\r\n", - " data_type: TYPE_FP32\r\n", - " dims: -1\r\n", - " dims: 390001\r\n", - "}\r\n", - "parameters {\r\n", - " key: \"TF_GRAPH_TAG\"\r\n", - " value {\r\n", - " string_value: \"serve\"\r\n", - " }\r\n", - "}\r\n", - "parameters {\r\n", - " key: \"TF_SIGNATURE_DEF\"\r\n", - " value {\r\n", - " string_value: \"serving_default\"\r\n", - " }\r\n", - "}\r\n", - "backend: \"tensorflow\"\r\n" - ] - } - ], - "source": [ - "cat /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "f7fe741c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Overwriting /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt\n" - ] - } - ], - "source": [ - "%%writefile /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt\n", - "\n", - "name: \"0_predicttensorflowtriton\"\n", - "platform: \"tensorflow_savedmodel\"\n", - "input {\n", - " name: \"sess_pid_seq\"\n", - " data_type: TYPE_INT32\n", - " dims: -1\n", - " dims: 1\n", - "}\n", - "input {\n", - " name: \"sess_pid_seq_1\"\n", - " data_type: TYPE_INT32\n", - " dims: -1\n", - " dims: 1\n", - "}\n", - "output {\n", - " name: \"sess_pid_seq/categorical_output\"\n", - " data_type: TYPE_FP32\n", - " dims: -1\n", - " dims: 390001\n", - "}\n", - "parameters {\n", - " key: \"TF_GRAPH_TAG\"\n", - " value {\n", - " string_value: \"serve\"\n", - " }\n", - "}\n", - "parameters {\n", - " key: \"TF_SIGNATURE_DEF\"\n", - " value {\n", - " string_value: \"serving_default\"\n", - " }\n", - "}\n", - "backend: \"tensorflow\"" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "9cfe8bca", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "name: \"executor_model\"\r\n", - "platform: \"merlin_executor\"\r\n", - "input {\r\n", - " name: \"sess_pid_seq__values\"\r\n", - " data_type: TYPE_INT64\r\n", - " dims: -1\r\n", - " dims: -1\r\n", - "}\r\n", - "input {\r\n", - " name: \"sess_pid_seq__lengths\"\r\n", - " data_type: TYPE_INT32\r\n", - " dims: -1\r\n", - " dims: -1\r\n", - "}\r\n", - "output {\r\n", - " name: \"sess_pid_seq/categorical_output\"\r\n", - " data_type: TYPE_FP32\r\n", - " dims: -1\r\n", - " dims: 390001\r\n", - "}\r\n", - "backend: \"python\"\r\n" - ] - } - ], - "source": [ - "cat /workspace/models_for_benchmarking/executor_model/config.pbtxt" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "a659255d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Overwriting /workspace/models_for_benchmarking/executor_model/config.pbtxt\n" - ] - } - ], - "source": [ - "%%writefile /workspace/models_for_benchmarking/executor_model/config.pbtxt\n", - "\n", - "name: \"executor_model\"\n", - "platform: \"merlin_executor\"\n", - "input {\n", - " name: \"sess_pid_seq__values\"\n", - " data_type: TYPE_INT64\n", - " dims: -1\n", - " dims: -1\n", - "}\n", - "input {\n", - " name: \"sess_pid_seq__nnzs\"\n", - " data_type: TYPE_INT64\n", - " dims: -1\n", - " dims: -1\n", - "}\n", - "output {\n", - " name: \"sess_pid_seq/categorical_output\"\n", - " data_type: TYPE_FP32\n", - " dims: -1\n", - " dims: 390001\n", - "}\n", - "backend: \"python\"" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "ddf2dc55", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.\r\n", - "#\r\n", - "# Redistribution and use in source and binary forms, with or without\r\n", - "# modification, are permitted provided that the following conditions\r\n", - "# are met:\r\n", - "# * Redistributions of source code must retain the above copyright\r\n", - "# notice, this list of conditions and the following disclaimer.\r\n", - "# * Redistributions in binary form must reproduce the above copyright\r\n", - "# notice, this list of conditions and the following disclaimer in the\r\n", - "# documentation and/or other materials provided with the distribution.\r\n", - "# * Neither the name of NVIDIA CORPORATION nor the names of its\r\n", - "# contributors may be used to endorse or promote products derived\r\n", - "# from this software without specific prior written permission.\r\n", - "#\r\n", - "# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY\r\n", - "# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\n", - "# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\r\n", - "# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r\n", - "# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r\n", - "# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r\n", - "# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r\n", - "# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY\r\n", - "# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r\n", - "# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\n", - "# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r\n", - "import pathlib\r\n", - "from pathlib import Path\r\n", - "\r\n", - "from merlin.dag import postorder_iter_nodes\r\n", - "from merlin.systems.dag import Ensemble\r\n", - "from merlin.systems.dag.runtimes.triton import TritonExecutorRuntime\r\n", - "from merlin.systems.triton.conversions import (\r\n", - " dict_array_to_triton_response,\r\n", - " triton_request_to_dict_array,\r\n", - ")\r\n", - "from merlin.systems.triton.utils import triton_error_handling, triton_multi_request\r\n", - "\r\n", - "\r\n", - "class TritonPythonModel:\r\n", - " \"\"\"Model for Triton Python Backend.\r\n", - "\r\n", - " Every Python model must have \"TritonPythonModel\" as the class name\r\n", - " \"\"\"\r\n", - "\r\n", - " def initialize(self, args):\r\n", - " \"\"\"Called only once when the model is being loaded. Allowing\r\n", - " the model to initialize any state associated with this model.\r\n", - "\r\n", - " Parameters\r\n", - " ----------\r\n", - " args : dict\r\n", - " Both keys and values are strings. The dictionary keys and values are:\r\n", - " * model_config: A JSON string containing the model configuration\r\n", - " * model_instance_kind: A string containing model instance kind\r\n", - " * model_instance_device_id: A string containing model instance device ID\r\n", - " * model_repository: Model repository path\r\n", - " * model_version: Model version\r\n", - " * model_name: Model name\r\n", - " \"\"\"\r\n", - " # Arg parsing\r\n", - " model_repo = args[\"model_repository\"]\r\n", - " repository_path = _parse_model_repository(model_repo)\r\n", - "\r\n", - " ensemble_path = (\r\n", - " Path(repository_path) / args[\"model_name\"] / str(args[\"model_version\"]) / \"ensemble\"\r\n", - " )\r\n", - "\r\n", - " self.ensemble = Ensemble.load(str(ensemble_path))\r\n", - "\r\n", - " for node in list(postorder_iter_nodes(self.ensemble.graph.output_node)):\r\n", - " if hasattr(node.op, \"load_artifacts\"):\r\n", - " node.op.load_artifacts(str(ensemble_path))\r\n", - "\r\n", - " @triton_multi_request\r\n", - " @triton_error_handling\r\n", - " def execute(self, request):\r\n", - " \"\"\"Receives a list of pb_utils.InferenceRequest as the only argument. This\r\n", - " function is called when an inference is requested for this model. Depending on the\r\n", - " batching configuration (e.g. Dynamic Batching) used, `requests` may contain\r\n", - " multiple requests. Every Python model, must create one pb_utils.InferenceResponse\r\n", - " for every pb_utils.InferenceRequest in `requests`. If there is an error, you can\r\n", - " set the error argument when creating a pb_utils.InferenceResponse.\r\n", - "\r\n", - " Parameters\r\n", - " ----------\r\n", - " requests : list\r\n", - " A list of pb_utils.InferenceRequest\r\n", - "\r\n", - " Returns\r\n", - " -------\r\n", - " list\r\n", - " A list of pb_utils.InferenceResponse. The length of this list must\r\n", - " be the same as `requests`\r\n", - " \"\"\"\r\n", - " inputs = triton_request_to_dict_array(request, self.ensemble.input_schema.column_names)\r\n", - " outputs = self.ensemble.transform(inputs, runtime=TritonExecutorRuntime())\r\n", - " return dict_array_to_triton_response(outputs)\r\n", - "\r\n", - "\r\n", - "def _parse_model_repository(model_repository: str) -> str:\r\n", - " \"\"\"\r\n", - " Extract the model repository path from the model_repository value\r\n", - " passed to the TritonPythonModel initialize method.\r\n", - " \"\"\"\r\n", - " # Handle bug in Tritonserver 22.06\r\n", - " # model_repository argument became path to model.py\r\n", - " # instead of path to model directory within the model repository\r\n", - " if model_repository.endswith(\".py\"):\r\n", - " return str(pathlib.Path(model_repository).parent.parent.parent)\r\n", - " else:\r\n", - " return str(pathlib.Path(model_repository).parent)\r\n" - ] - } - ], - "source": [ - "cat /workspace/models_for_benchmarking/executor_model/1/model.py" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "3d21ce62", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"versions\": {\"python\": \"3.8.10 (default, Nov 14 2022, 12:59:47) \\n[GCC 9.4.0]\"}, \"generated_timestamp\": 1679017581}" - ] - } - ], - "source": [ - "cat /workspace/models_for_benchmarking/executor_model/1/ensemble/metadata.json" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7998b835", - "metadata": {}, - "outputs": [], - "source": [ - "# %%writefile /workspace/models_for_benchmarking/t4r_pytorch_pt/config.pbtxt\n", - "\n", - "# name: \"t4r_pytorch_pt\"\n", - "# input {\n", - "# name: \"sess_pid_seq__values\"\n", - "# data_type: TYPE_INT64\n", - "# dims: -1\n", - "# dims: 1\n", - "# }\n", - "# input {\n", - "# name: \"sess_pid_seq__nnzs\"\n", - "# data_type: TYPE_INT64\n", - "# dims: -1\n", - "# dims: 1\n", - "# }\n", - "# output {\n", - "# name: \"output\"\n", - "# data_type: TYPE_FP32\n", - "# dims: -1\n", - "# dims: 20\n", - "# }\n", - "# backend: \"python\"" - ] } ], "metadata": { From eae1088f16c85f7c509f36f3f386957156e363bf Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Tue, 9 May 2023 10:37:36 +0000 Subject: [PATCH 15/15] update --- ...el_for_benchmarking-inference-on-CPU.ipynb | 12592 ++++++++++++++++ ...nd_save_model_for_benchmarking_works.ipynb | 1492 ++ 2 files changed, 14084 insertions(+) create mode 100644 T4Rec_repro/train_and_save_model_for_benchmarking-inference-on-CPU.ipynb create mode 100644 T4Rec_repro/train_and_save_model_for_benchmarking_works.ipynb diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking-inference-on-CPU.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking-inference-on-CPU.ipynb new file mode 100644 index 0000000000..68e207b4aa --- /dev/null +++ b/T4Rec_repro/train_and_save_model_for_benchmarking-inference-on-CPU.ipynb @@ -0,0 +1,12592 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "026bd245", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/core\n", + " 9d9b5c6a..c5c9bc25 release-23.04 -> origin/release-23.04\n", + " * [new branch] feature/merlin-array-dispatch -> origin/feature/merlin-array-dispatch\n", + " * [new branch] fix-repartition -> origin/fix-repartition\n", + " * [new branch] fix-with-properties -> origin/fix-with-properties\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] laiacano/docs-on-pr -> origin/laiacano/docs-on-pr\n", + " * [new branch] main -> origin/main\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] revert-163-refactor/dictarray-columns -> origin/revert-163-refactor/dictarray-columns\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] tags-intersection -> origin/tags-intersection\n", + " * [new branch] v0.2.0-docs -> origin/v0.2.0-docs\n", + " * [new tag] v0.10.0 -> v0.10.0\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v23.02.01 -> v23.02.01\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.1.1 -> v0.1.1\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n", + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /core\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-core\n", + " Building wheel for merlin-core (PEP 517): started\n", + " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-core: filename=merlin_core-23.5.dev0+21.ga0bcd30f-py3-none-any.whl size=161483 sha256=ec8d33030b56d7a0b9df3f50950a4131456ba0916c4e44fa090f94e8f0cdd2af\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-x7t5590g/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + "Successfully built merlin-core\n", + "Installing collected packages: merlin-core\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 23.4.0\n", + " Uninstalling merlin-core-23.4.0:\n", + " Successfully uninstalled merlin-core-23.4.0\n", + "Successfully installed merlin-core-23.5.dev0+21.ga0bcd30f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/dataloader\n", + " * [new branch] chore/comprehensive-shapes -> origin/chore/comprehensive-shapes\n", + " * [new branch] chore/packages-action -> origin/chore/packages-action\n", + " * [new branch] collabify_examples -> origin/collabify_examples\n", + " * [new branch] docs-add-seo -> origin/docs-add-seo\n", + " * [new branch] docs-calver-banner -> origin/docs-calver-banner\n", + " * [new branch] ds-api -> origin/ds-api\n", + " * [new branch] feature/embedding-tags -> origin/feature/embedding-tags\n", + " * [new branch] fix-sparse-logic -> origin/fix-sparse-logic\n", + " * [new branch] fix/tf-batch-size-warning -> origin/fix/tf-batch-size-warning\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] gha-test -> origin/gha-test\n", + " * [new branch] laiacano/docs-pr -> origin/laiacano/docs-pr\n", + " * [new branch] main -> origin/main\n", + " * [new branch] no_gpu -> origin/no_gpu\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] update_github_actions -> origin/update_github_actions\n", + " * [new tag] v0.0.3 -> v0.0.3\n", + " * [new tag] v0.0.4 -> v0.0.4\n", + " * [new tag] v23.02.01 -> v23.02.01\n", + " * [new tag] v0.0.1 -> v0.0.1\n", + " * [new tag] v0.0.2 -> v0.0.2\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /dataloader\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-dataloader\n", + " Building wheel for merlin-dataloader (PEP 517): started\n", + " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-23.5.dev0+8.gd9e97b4-py3-none-any.whl size=34916 sha256=4c5a734dc23827efb928b5c29de6eb394b7f6e92940e054702433ea07a229d68\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-gz8k5ff8/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + "Successfully built merlin-dataloader\n", + "Installing collected packages: merlin-dataloader\n", + " Attempting uninstall: merlin-dataloader\n", + " Found existing installation: merlin-dataloader 23.4.0\n", + " Uninstalling merlin-dataloader-23.4.0:\n", + " Successfully uninstalled merlin-dataloader-23.4.0\n", + "Successfully installed merlin-dataloader-23.5.dev0+8.gd9e97b4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/NVTabular\n", + " f8f484e5..90489194 release-23.04 -> origin/release-23.04\n", + " * [new branch] 1077-implement -> origin/1077-implement\n", + " * [new branch] 21.09/column-tagging -> origin/21.09/column-tagging\n", + " * [new branch] 21.09/dataset-collection -> origin/21.09/dataset-collection\n", + " * [new branch] 21.09/operator-block -> origin/21.09/operator-block\n", + " * [new branch] 21.09/schema -> origin/21.09/schema\n", + " * [new branch] add_sum_to_supported_aggregations -> origin/add_sum_to_supported_aggregations\n", + " * [new branch] aiobotocore_v2 -> origin/aiobotocore_v2\n", + " * [new branch] alexanderronquillo-patch-1 -> origin/alexanderronquillo-patch-1\n", + " * [new branch] automate_pypi -> origin/automate_pypi\n", + " * [new branch] bench-pynvml-fix -> origin/bench-pynvml-fix\n", + " * [new branch] branch-0.6 -> origin/branch-0.6\n", + " * [new branch] bschifferer-remove_examples_1 -> origin/bschifferer-remove_examples_1\n", + " * [new branch] categorify-inference-int16 -> origin/categorify-inference-int16\n", + " * [new branch] columns_with_aggs_in_names -> origin/columns_with_aggs_in_names\n", + " * [new branch] conda-package-python-versions -> origin/conda-package-python-versions\n", + " * [new branch] conda_gh_action -> origin/conda_gh_action\n", + " * [new branch] dataloader-remove-sparse -> origin/dataloader-remove-sparse\n", + " * [new branch] dataloader_doc_fix -> origin/dataloader_doc_fix\n", + " * [new branch] disable-package-build-on-pull-requests -> origin/disable-package-build-on-pull-requests\n", + " * [new branch] dont_install_tests -> origin/dont_install_tests\n", + " * [new branch] drop_low_cardinality -> origin/drop_low_cardinality\n", + " * [new branch] fix-docs-tox-env -> origin/fix-docs-tox-env\n", + " * [new branch] fix-wf-file -> origin/fix-wf-file\n", + " * [new branch] fix/inference-deprecation -> origin/fix/inference-deprecation\n", + " * [new branch] fix_data_path -> origin/fix_data_path\n", + " * [new branch] fix_hugectr_nb -> origin/fix_hugectr_nb\n", + " * [new branch] fix_nbs -> origin/fix_nbs\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] groupby_without_groupby_col_in_col_selector -> origin/groupby_without_groupby_col_in_col_selector\n", + " * [new branch] hugectr-newapi -> origin/hugectr-newapi\n", + " * [new branch] laiacano/check-list-from-schema -> origin/laiacano/check-list-from-schema\n", + " * [new branch] laiacano/workflow-subgraph -> origin/laiacano/workflow-subgraph\n", + " * [new branch] main -> origin/main\n", + " * [new branch] na_sentinel -> origin/na_sentinel\n", + " * [new branch] notebooks-21.10 -> origin/notebooks-21.10\n", + " * [new branch] nvt-1195 -> origin/nvt-1195\n", + " * [new branch] nvtabular_examples -> origin/nvtabular_examples\n", + " * [new branch] packages-workflow-split -> origin/packages-workflow-split\n", + " * [new branch] readme_updates -> origin/readme_updates\n", + " * [new branch] refactor/fit-schema -> origin/refactor/fit-schema\n", + " * [new branch] refactor/input-column-selection -> origin/refactor/input-column-selection\n", + " * [new branch] refactor/postpone-schema-binding -> origin/refactor/postpone-schema-binding\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] remove_poetry -> origin/remove_poetry\n", + " * [new branch] remove_release_notes -> origin/remove_release_notes\n", + " * [new branch] repeat-ops -> origin/repeat-ops\n", + " * [new branch] rjzamora-simplify-criteo -> origin/rjzamora-simplify-criteo\n", + " * [new branch] rnyak-patch-1 -> origin/rnyak-patch-1\n", + " * [new branch] romeyn/input-api -> origin/romeyn/input-api\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] test-column-similarity-dataset-cpu-default-none -> origin/test-column-similarity-dataset-cpu-default-none\n", + " * [new branch] test-torch-dataloader-dataset-cpu-default-none -> origin/test-torch-dataloader-dataset-cpu-default-none\n", + " * [new branch] torch_catch -> origin/torch_catch\n", + " * [new branch] update-dask-reqs -> origin/update-dask-reqs\n", + " * [new branch] update_merlin_core -> origin/update_merlin_core\n", + " * [new branch] update_requirements -> origin/update_requirements\n", + " * [new branch] v0.10.0-docs -> origin/v0.10.0-docs\n", + " * [new branch] v0.11.0-docs -> origin/v0.11.0-docs\n", + " * [new branch] v0.7.1-docs -> origin/v0.7.1-docs\n", + " * [new branch] v0.8.0-docs -> origin/v0.8.0-docs\n", + " * [new branch] v0.9.0-docs -> origin/v0.9.0-docs\n", + " * [new branch] v1.0.0-docs -> origin/v1.0.0-docs\n", + " * [new tag] v0.6.1 -> v0.6.1\n", + " * [new tag] v1.6.0 -> v1.6.0\n", + " * [new tag] v1.7.0 -> v1.7.0\n", + " * [new tag] v1.8.1 -> v1.8.1\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.1.1 -> v0.1.1\n", + " * [new tag] v0.10.0 -> v0.10.0\n", + " * [new tag] v0.11.0 -> v0.11.0\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n", + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.5.1 -> v0.5.1\n", + " * [new tag] v0.5.2 -> v0.5.2\n", + " * [new tag] v0.5.3 -> v0.5.3\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v0.7.1 -> v0.7.1\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v1.0.0 -> v1.0.0\n", + " * [new tag] v1.1.0 -> v1.1.0\n", + " * [new tag] v1.1.1 -> v1.1.1\n", + " * [new tag] v1.2.0 -> v1.2.0\n", + " * [new tag] v1.2.1 -> v1.2.1\n", + " * [new tag] v1.2.2 -> v1.2.2\n", + " * [new tag] v1.3.0 -> v1.3.0\n", + " * [new tag] v1.3.1 -> v1.3.1\n", + " * [new tag] v1.3.2 -> v1.3.2\n", + " * [new tag] v1.3.3 -> v1.3.3\n", + " * [new tag] v1.4.0 -> v1.4.0\n", + " * [new tag] v1.5.0 -> v1.5.0\n", + " * [new tag] v1.8.0 -> v1.8.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /nvtabular\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: nvtabular\n", + " Building wheel for nvtabular (PEP 517): started\n", + " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", + " Created wheel for nvtabular: filename=nvtabular-23.5.dev0+7.g67136eba-cp38-cp38-linux_x86_64.whl size=259925 sha256=daaa86cb4ab2df4b9c6a04a6ddea5e6a4ac5b14b901740152ef71cb3b53171db\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-11i49cvh/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + "Successfully built nvtabular\n", + "Installing collected packages: nvtabular\n", + " Attempting uninstall: nvtabular\n", + " Found existing installation: nvtabular 23.4.0\n", + " Uninstalling nvtabular-23.4.0:\n", + " Successfully uninstalled nvtabular-23.4.0\n", + "Successfully installed nvtabular-23.5.dev0+7.g67136eba\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Models\n", + " a44eced6..56c7d6a4 release-23.04 -> origin/release-23.04\n", + " * [new branch] Mai -> origin/Mai\n", + " * [new branch] add_category_encoding_test -> origin/add_category_encoding_test\n", + " * [new branch] add_lightfm_and_explicit_training_example -> origin/add_lightfm_and_explicit_training_example\n", + " * [new branch] add_logo_tracking_to_07 -> origin/add_logo_tracking_to_07\n", + " * [new branch] add_notebooks_test -> origin/add_notebooks_test\n", + " * [new branch] advanced_example -> origin/advanced_example\n", + " * [new branch] asvdb_metric_tracking -> origin/asvdb_metric_tracking\n", + " * [new branch] batched-dataset/schema -> origin/batched-dataset/schema\n", + " * [new branch] benchmark-session-based -> origin/benchmark-session-based\n", + " * [new branch] block-context -> origin/block-context\n", + " * [new branch] blossom_report_skipped -> origin/blossom_report_skipped\n", + " * [new branch] break_ties -> origin/break_ties\n", + " * [new branch] bs_unittest_examples_v2 -> origin/bs_unittest_examples_v2\n", + " * [new branch] bschifferer-patch-1 -> origin/bschifferer-patch-1\n", + " * [new branch] change_two_tower_api_test -> origin/change_two_tower_api_test\n", + " * [new branch] ci/backend-tests -> origin/ci/backend-tests\n", + " * [new branch] ci/example-linting -> origin/ci/example-linting\n", + " * [new branch] ci/horovod -> origin/ci/horovod\n", + " * [new branch] cicd -> origin/cicd\n", + " * [new branch] codespell_fix -> origin/codespell_fix\n", + " * [new branch] compare_ranking_models -> origin/compare_ranking_models\n", + " * [new branch] conda_recipe -> origin/conda_recipe\n", + " * [new branch] consolidate-abstractions -> origin/consolidate-abstractions\n", + " * [new branch] dataloader_tag_fix -> origin/dataloader_tag_fix\n", + " * [new branch] dcn_tests -> origin/dcn_tests\n", + " * [new branch] deps/merlin-core-commit -> origin/deps/merlin-core-commit\n", + " * [new branch] docs-strings -> origin/docs-strings\n", + " * [new branch] docs/interrogate-cfg -> origin/docs/interrogate-cfg\n", + " * [new branch] docs/interrogate-config -> origin/docs/interrogate-config\n", + " * [new branch] emb_export_fix -> origin/emb_export_fix\n", + " * [new branch] evaluate_fixes -> origin/evaluate_fixes\n", + " * [new branch] examples/unit-tests -> origin/examples/unit-tests\n", + " * [new branch] examples/update_link -> origin/examples/update_link\n", + " * [new branch] examples_fixes -> origin/examples_fixes\n", + " * [new branch] fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", + " * [new branch] feature/multi-hot-columns -> origin/feature/multi-hot-columns\n", + " * [new branch] feature/retrieval-dnn -> origin/feature/retrieval-dnn\n", + " * [new branch] fix-contrastive-predictions -> origin/fix-contrastive-predictions\n", + " * [new branch] fix/aliccp_workflow -> origin/fix/aliccp_workflow\n", + " * [new branch] fix/batch_predict -> origin/fix/batch_predict\n", + " * [new branch] fix/example-tests -> origin/fix/example-tests\n", + " * [new branch] fix/python-version -> origin/fix/python-version\n", + " * [new branch] fix/shared_embeddings -> origin/fix/shared_embeddings\n", + " * [new branch] fix_aliccp_schema -> origin/fix_aliccp_schema\n", + " * [new branch] fix_cated_ohe -> origin/fix_cated_ohe\n", + " * [new branch] fix_datetime_issue_add_inference_on_TIS -> origin/fix_datetime_issue_add_inference_on_TIS\n", + " * [new branch] fix_lightfm_evaluate -> origin/fix_lightfm_evaluate\n", + " * [new branch] fix_masking -> origin/fix_masking\n", + " * [new branch] fix_mtl_metrics -> origin/fix_mtl_metrics\n", + " * [new branch] fix_notebooks -> origin/fix_notebooks\n", + " * [new branch] fix_regression -> origin/fix_regression\n", + " * [new branch] fix_retrieval -> origin/fix_retrieval\n", + " * [new branch] fix_retrieval_eval_loss -> origin/fix_retrieval_eval_loss\n", + " * [new branch] fix_sampled_softmax_evaluation -> origin/fix_sampled_softmax_evaluation\n", + " * [new branch] fix_test_07 -> origin/fix_test_07\n", + " * [new branch] getting_started_exp -> origin/getting_started_exp\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] hashed_cross_test -> origin/hashed_cross_test\n", + " * [new branch] implement_review_comments -> origin/implement_review_comments\n", + " * [new branch] in-bath-sampling-bug -> origin/in-bath-sampling-bug\n", + " * [new branch] infer_embeddings -> origin/infer_embeddings\n", + " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", + " * [new branch] laiacano/concurrency -> origin/laiacano/concurrency\n", + " * [new branch] laiacano/tox -> origin/laiacano/tox\n", + " * [new branch] layer_freezing_test -> origin/layer_freezing_test\n", + " * [new branch] load_retrieval_model -> origin/load_retrieval_model\n", + " * [new branch] logit_correction_nol2_temp -> origin/logit_correction_nol2_temp\n", + " * [new branch] losses -> origin/losses\n", + " * [new branch] main -> origin/main\n", + " * [new branch] masking_transforms -> origin/masking_transforms\n", + " * [new branch] merlin-standard-lib -> origin/merlin-standard-lib\n", + " * [new branch] metrics_opt -> origin/metrics_opt\n", + " * [new branch] metrics_opt2 -> origin/metrics_opt2\n", + " * [new branch] mikemckiernan-patch-1 -> origin/mikemckiernan-patch-1\n", + " * [new branch] mlm -> origin/mlm\n", + " * [new branch] mlm_alt -> origin/mlm_alt\n", + " * [new branch] mlp_selu -> origin/mlp_selu\n", + " * [new branch] mrr_fix -> origin/mrr_fix\n", + " * [new branch] mtl_example -> origin/mtl_example\n", + " * [new branch] mtl_loss -> origin/mtl_loss\n", + " * [new branch] mtl_models -> origin/mtl_models\n", + " * [new branch] mtl_regularization -> origin/mtl_regularization\n", + " * [new branch] multi_optimizer_example -> origin/multi_optimizer_example\n", + " * [new branch] neg_sampling -> origin/neg_sampling\n", + " * [new branch] poc -> origin/poc\n", + " * [new branch] pretrained_init -> origin/pretrained_init\n", + " * [new branch] radekosmulski-patch-2 -> origin/radekosmulski-patch-2\n", + " * [new branch] ragged_embeddings -> origin/ragged_embeddings\n", + " * [new branch] ranking_models_inputs -> origin/ranking_models_inputs\n", + " * [new branch] ranking_tests -> origin/ranking_tests\n", + " * [new branch] ranking_tests3 -> origin/ranking_tests3\n", + " * [new branch] readme_bash -> origin/readme_bash\n", + " * [new branch] refactor-docs-reqs -> origin/refactor-docs-reqs\n", + " * [new branch] refactor/docs-reqs -> origin/refactor/docs-reqs\n", + " * [new branch] refactor/embedding-layers -> origin/refactor/embedding-layers\n", + " * [new branch] refactor/youtube-retrieval -> origin/refactor/youtube-retrieval\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] remove/masking -> origin/remove/masking\n", + " * [new branch] reset-metrics -> origin/reset-metrics\n", + " * [new branch] retrieval-sample-weights -> origin/retrieval-sample-weights\n", + " * [new branch] retrieval_debug -> origin/retrieval_debug\n", + " * [new branch] retrieval_debug_no_l2norm -> origin/retrieval_debug_no_l2norm\n", + " * [new branch] retrieval_debug_scores_temp -> origin/retrieval_debug_scores_temp\n", + " * [new branch] retrieval_eval_fix -> origin/retrieval_eval_fix\n", + " * [new branch] retrieval_fixes -> origin/retrieval_fixes\n", + " * [new branch] retrieval_fixes_2 -> origin/retrieval_fixes_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " * [new branch] retrieval_integration_tests -> origin/retrieval_integration_tests\n", + " * [new branch] revert-813-laiacano/tox-and-tmpdir -> origin/revert-813-laiacano/tox-and-tmpdir\n", + " * [new branch] romeyn/block-api -> origin/romeyn/block-api\n", + " * [new branch] romeyn/block-cleanup -> origin/romeyn/block-cleanup\n", + " * [new branch] romeyn/inputs -> origin/romeyn/inputs\n", + " * [new branch] sampling -> origin/sampling\n", + " * [new branch] select-by-tag -> origin/select-by-tag\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] t4rec_use_case -> origin/t4rec_use_case\n", + " * [new branch] tf/add-bokeh-to-dev -> origin/tf/add-bokeh-to-dev\n", + " * [new branch] tf/base-model-test-graph-mode -> origin/tf/base-model-test-graph-mode\n", + " * [new branch] tf/batch_predict_fix -> origin/tf/batch_predict_fix\n", + " * [new branch] tf/categorical-prediction -> origin/tf/categorical-prediction\n", + " * [new branch] tf/categorical-prediction-2 -> origin/tf/categorical-prediction-2\n", + " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", + " * [new branch] tf/combinators-base -> origin/tf/combinators-base\n", + " * [new branch] tf/cond -> origin/tf/cond\n", + " * [new branch] tf/context-tensor -> origin/tf/context-tensor\n", + " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", + " * [new branch] tf/contrastive-prediction -> origin/tf/contrastive-prediction\n", + " * [new branch] tf/core -> origin/tf/core\n", + " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", + " * [new branch] tf/dep-prediction-tasks -> origin/tf/dep-prediction-tasks\n", + " * [new branch] tf/dlrm_dropout_fix -> origin/tf/dlrm_dropout_fix\n", + " * [new branch] tf/dynamic-memory-growth -> origin/tf/dynamic-memory-growth\n", + " * [new branch] tf/embedding-tables -> origin/tf/embedding-tables\n", + " * [new branch] tf/embeddings_regularization -> origin/tf/embeddings_regularization\n", + " * [new branch] tf/evaluate_retrieval -> origin/tf/evaluate_retrieval\n", + " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", + " * [new branch] tf/fix_logq_correction -> origin/tf/fix_logq_correction\n", + " * [new branch] tf/fix_mlm_test -> origin/tf/fix_mlm_test\n", + " * [new branch] tf/fix_tag_item_id -> origin/tf/fix_tag_item_id\n", + " * [new branch] tf/fix_tests_shared_state -> origin/tf/fix_tests_shared_state\n", + " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", + " * [new branch] tf/input-block -> origin/tf/input-block\n", + " * [new branch] tf/input-block-filter -> origin/tf/input-block-filter\n", + " * [new branch] tf/inputs-concat -> origin/tf/inputs-concat\n", + " * [new branch] tf/keras-embedding -> origin/tf/keras-embedding\n", + " * [new branch] tf/logit_correction -> origin/tf/logit_correction\n", + " * [new branch] tf/loglossmetric_callbacks -> origin/tf/loglossmetric_callbacks\n", + " * [new branch] tf/logq_correction -> origin/tf/logq_correction\n", + " * [new branch] tf/loss_batch_metric -> origin/tf/loss_batch_metric\n", + " * [new branch] tf/map-values -> origin/tf/map-values\n", + " * [new branch] tf/masking_block -> origin/tf/masking_block\n", + " * [new branch] tf/mf-retrieval-model -> origin/tf/mf-retrieval-model\n", + " * [new branch] tf/mlm-schema -> origin/tf/mlm-schema\n", + " * [new branch] tf/model-tests -> origin/tf/model-tests\n", + " * [new branch] tf/model/sequential -> origin/tf/model/sequential\n", + " * [new branch] tf/move-core -> origin/tf/move-core\n", + " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", + " * [new branch] tf/multi_task_improv -> origin/tf/multi_task_improv\n", + " * [new branch] tf/ncf_model -> origin/tf/ncf_model\n", + " * [new branch] tf/output-block -> origin/tf/output-block\n", + " * [new branch] tf/pop_metrics -> origin/tf/pop_metrics\n", + " * [new branch] tf/prediction -> origin/tf/prediction\n", + " * [new branch] tf/prediction-block -> origin/tf/prediction-block\n", + " * [new branch] tf/pretrained_emb -> origin/tf/pretrained_emb\n", + " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", + " * [new branch] tf/pruning-parallel-block -> origin/tf/pruning-parallel-block\n", + " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", + " * [new branch] tf/ragged-tensors -> origin/tf/ragged-tensors\n", + " * [new branch] tf/ranking_metrics_sort -> origin/tf/ranking_metrics_sort\n", + " * [new branch] tf/refactor -> origin/tf/refactor\n", + " * [new branch] tf/retireval_eval -> origin/tf/retireval_eval\n", + " * [new branch] tf/retrieval-eval -> origin/tf/retrieval-eval\n", + " * [new branch] tf/retrieval-model-v2 -> origin/tf/retrieval-model-v2\n", + " * [new branch] tf/retrieval-models -> origin/tf/retrieval-models\n", + " * [new branch] tf/sampling/items -> origin/tf/sampling/items\n", + " * [new branch] tf/save-regularizer -> origin/tf/save-regularizer\n", + " * [new branch] tf/target-propagation -> origin/tf/target-propagation\n", + " * [new branch] tf/targets -> origin/tf/targets\n", + " * [new branch] tf/tf-cont-list -> origin/tf/tf-cont-list\n", + " * [new branch] tf/topk_recommender -> origin/tf/topk_recommender\n", + " * [new branch] tf/tower-save -> origin/tf/tower-save\n", + " * [new branch] tf/train_metrics_steps_fix -> origin/tf/train_metrics_steps_fix\n", + " * [new branch] tf/transformer-api -> origin/tf/transformer-api\n", + " * [new branch] tf/transformer-block -> origin/tf/transformer-block\n", + " * [new branch] tf/transformer_block -> origin/tf/transformer_block\n", + " * [new branch] tf/wide_and_deep -> origin/tf/wide_and_deep\n", + " * [new branch] tf/wrap-as-model -> origin/tf/wrap-as-model\n", + " * [new branch] tf/xlnet-bug -> origin/tf/xlnet-bug\n", + " * [new branch] torch/clean-up -> origin/torch/clean-up\n", + " * [new branch] torch/dev -> origin/torch/dev\n", + " * [new branch] torch/masking -> origin/torch/masking\n", + " * [new branch] torch/prototype -> origin/torch/prototype\n", + " * [new branch] torch/remove-t4r-code -> origin/torch/remove-t4r-code\n", + " * [new branch] tox_github_actions_fix -> origin/tox_github_actions_fix\n", + " * [new branch] transformer-api -> origin/transformer-api\n", + " * [new branch] two_tower_fixes -> origin/two_tower_fixes\n", + " * [new branch] update_07 -> origin/update_07\n", + " * [new branch] update_advanced_notebook -> origin/update_advanced_notebook\n", + " * [new branch] update_example_01 -> origin/update_example_01\n", + " * [new branch] update_examples_with_tracking_logo -> origin/update_examples_with_tracking_logo\n", + " * [new branch] v0.2.0-docs -> origin/v0.2.0-docs\n", + " * [new branch] v0.3.0-docs -> origin/v0.3.0-docs\n", + " * [new branch] validation_data_fix -> origin/validation_data_fix\n", + " * [new branch] validation_data_fix2 -> origin/validation_data_fix2\n", + " * [new branch] wide_deep_example_test -> origin/wide_deep_example_test\n", + " * [new branch] wideanddeep_example -> origin/wideanddeep_example\n", + " * [new branch] xgboost/predict-without-target -> origin/xgboost/predict-without-target\n", + " * [new branch] youtube_dnn_retrieval -> origin/youtube_dnn_retrieval\n", + " * [new branch] youtubednn_improv -> origin/youtubednn_improv\n", + " * [new branch] youtubednn_logq -> origin/youtubednn_logq\n", + " * [new tag] v0.10.0 -> v0.10.0\n", + " * [new tag] v0.11.0 -> v0.11.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /models\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-models\n", + " Building wheel for merlin-models (PEP 517): started\n", + " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-models: filename=merlin_models-23.5.dev0+12.gd8133b8f-py3-none-any.whl size=343289 sha256=09ff3fba62daf076895e75820e936c221b3bbfa0d3342f10ef3f5eb9c572a01b\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ipciscb_/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + "Successfully built merlin-models\n", + "Installing collected packages: merlin-models\n", + " Attempting uninstall: merlin-models\n", + " Found existing installation: merlin-models 23.4.0\n", + " Uninstalling merlin-models-23.4.0:\n", + " Successfully uninstalled merlin-models-23.4.0\n", + "Successfully installed merlin-models-23.5.dev0+12.gd8133b8f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/systems\n", + " fce949f..2516efb release-23.04 -> origin/release-23.04\n", + " * [new branch] add_xgboost_serving_example -> origin/add_xgboost_serving_example\n", + " * [new branch] bschifferer-patch-1 -> origin/bschifferer-patch-1\n", + " * [new branch] bschifferer-patch-2 -> origin/bschifferer-patch-2\n", + " * [new branch] ci/cpu-action -> origin/ci/cpu-action\n", + " * [new branch] dataset-cpu-default-None -> origin/dataset-cpu-default-None\n", + " * [new branch] docs-nightly-build -> origin/docs-nightly-build\n", + " * [new branch] docs-remove-deps -> origin/docs-remove-deps\n", + " * [new branch] docs-tox -> origin/docs-tox\n", + " * [new branch] docs/contributing -> origin/docs/contributing\n", + " * [new branch] docs/coverage-threshold -> origin/docs/coverage-threshold\n", + " * [new branch] docs/docstring-coverage -> origin/docs/docstring-coverage\n", + " * [new branch] docs/interrogate-cfg -> origin/docs/interrogate-cfg\n", + " * [new branch] docs/interrogate-config -> origin/docs/interrogate-config\n", + " * [new branch] docs/issue-templates -> origin/docs/issue-templates\n", + " * [new branch] docs/readme -> origin/docs/readme\n", + " * [new branch] feast-errors -> origin/feast-errors\n", + " * [new branch] feature/pytorch -> origin/feature/pytorch\n", + " * [new branch] feature/t4r-serving -> origin/feature/t4r-serving\n", + " * [new branch] feature/torchscript -> origin/feature/torchscript\n", + " * [new branch] fix/dask-dist-deps -> origin/fix/dask-dist-deps\n", + " * [new branch] fix/faiss-types -> origin/fix/faiss-types\n", + " * [new branch] fix/multi-hot-dtypes -> origin/fix/multi-hot-dtypes\n", + " * [new branch] fix/multihot-schemas -> origin/fix/multihot-schemas\n", + " * [new branch] fix/pkg-build-lib -> origin/fix/pkg-build-lib\n", + " * [new branch] fix/pytest-feast -> origin/fix/pytest-feast\n", + " * [new branch] fix/skipped-tests -> origin/fix/skipped-tests\n", + " * [new branch] fix/tf-input-shapes -> origin/fix/tf-input-shapes\n", + " * [new branch] fix/torch-importorskip -> origin/fix/torch-importorskip\n", + " * [new branch] fix_model_outputnames -> origin/fix_model_outputnames\n", + " * [new branch] fix_nb -> origin/fix_nb\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] laiacano/slack-notify -> origin/laiacano/slack-notify\n", + " * [new branch] laiacano/transformer-import -> origin/laiacano/transformer-import\n", + " * [new branch] laiacano/upgrade-feast -> origin/laiacano/upgrade-feast\n", + " * [new branch] main -> origin/main\n", + " * [new branch] merlin_models_xgboost -> origin/merlin_models_xgboost\n", + " * [new branch] migration/from-nvt -> origin/migration/from-nvt\n", + " * [new branch] polish/remove-dtype-matching -> origin/polish/remove-dtype-matching\n", + " * [new branch] radekosmulski-patch-1 -> origin/radekosmulski-patch-1\n", + " * [new branch] radekosmulski-patch-1-1 -> origin/radekosmulski-patch-1-1\n", + " * [new branch] refactor/dtypes -> origin/refactor/dtypes\n", + " * [new branch] refactor/organize-tests -> origin/refactor/organize-tests\n", + " * [new branch] refactor/schema-validation-hook -> origin/refactor/schema-validation-hook\n", + " * [new branch] refactor/virtual-dataframe -> origin/refactor/virtual-dataframe\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] run_triton_utils -> origin/run_triton_utils\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] update-reqs -> origin/update-reqs\n", + " * [new branch] update/precommit-hooks -> origin/update/precommit-hooks\n", + " * [new branch] use_dataloader -> origin/use_dataloader\n", + " * [new branch] v0.0.1-docs -> origin/v0.0.1-docs\n", + " * [new branch] v0.1.0-docs -> origin/v0.1.0-docs\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v0.0.1 -> v0.0.1\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n", + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-systems\n", + " Building wheel for merlin-systems (PEP 517): started\n", + " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-systems: filename=merlin_systems-23.5.dev0+8.g2b1b90b-py3-none-any.whl size=83188 sha256=4152a863cc43b7f51bf5ab1bd406890a32a4b33ad1b6ab5cebc4f00db54fb144\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-x197uh_u/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + "Successfully built merlin-systems\n", + "Installing collected packages: merlin-systems\n", + " Attempting uninstall: merlin-systems\n", + " Found existing installation: merlin-systems 23.4.0\n", + " Uninstalling merlin-systems-23.4.0:\n", + " Successfully uninstalled merlin-systems-23.4.0\n", + "Successfully installed merlin-systems-23.5.dev0+8.g2b1b90b\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Transformers4Rec\n", + " 4a9e7373..911355f4 release-23.04 -> origin/release-23.04\n", + " * [new branch] DDP_fix -> origin/DDP_fix\n", + " * [new branch] HF-update -> origin/HF-update\n", + " * [new branch] add_benchmarking_scripts -> origin/add_benchmarking_scripts\n", + " * [new branch] add_topk_layer -> origin/add_topk_layer\n", + " * [new branch] albert17-check -> origin/albert17-check\n", + " * [new branch] batches -> origin/batches\n", + " * [new branch] benfred/datasetschema -> origin/benfred/datasetschema\n", + " * [new branch] clean_rnn_block -> origin/clean_rnn_block\n", + " * [new branch] core-schema/deprecation-warning -> origin/core-schema/deprecation-warning\n", + " * [new branch] core-schema/tabular-features -> origin/core-schema/tabular-features\n", + " * [new branch] core-schema/trainer -> origin/core-schema/trainer\n", + " * [new branch] dataloader -> origin/dataloader\n", + " * [new branch] dataparallel_fix -> origin/dataparallel_fix\n", + " * [new branch] doc/supported_transformers -> origin/doc/supported_transformers\n", + " * [new branch] doc_fix -> origin/doc_fix\n", + " * [new branch] docs -> origin/docs\n", + " * [new branch] etl-nvt -> origin/etl-nvt\n", + " * [new branch] examples -> origin/examples\n", + " * [new branch] fix-data-repartition -> origin/fix-data-repartition\n", + " * [new branch] fix-failing-ci -> origin/fix-failing-ci\n", + " * [new branch] fix-inference -> origin/fix-inference\n", + " * [new branch] fix/transformers_config -> origin/fix/transformers_config\n", + " * [new branch] fix_gettingstarted_nb -> origin/fix_gettingstarted_nb\n", + " * [new branch] fix_inference -> origin/fix_inference\n", + " * [new branch] fix_nbs -> origin/fix_nbs\n", + " * [new branch] fix_oom_tests -> origin/fix_oom_tests\n", + " * [new branch] fix_req_paper_repro -> origin/fix_req_paper_repro\n", + " * [new branch] fix_stochastic -> origin/fix_stochastic\n", + " * [new branch] fix_unit_test -> origin/fix_unit_test\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] github-templates -> origin/github-templates\n", + " * [new branch] ignore-masking -> origin/ignore-masking\n", + " * [new branch] laiacano/merlin-core-schema -> origin/laiacano/merlin-core-schema\n", + " * [new branch] laiacano/skip-ci-on-closed-pr -> origin/laiacano/skip-ci-on-closed-pr\n", + " * [new branch] license -> origin/license\n", + " * [new branch] main -> origin/main\n", + " * [new branch] masking_quick_fix -> origin/masking_quick_fix\n", + " * [new branch] metric-names-prefix -> origin/metric-names-prefix\n", + " * [new branch] model_save_load -> origin/model_save_load\n", + " * [new branch] multi_gpu_doc -> origin/multi_gpu_doc\n", + " * [new branch] multi_gpu_doc_fix -> origin/multi_gpu_doc_fix\n", + " * [new branch] post_fusion_context -> origin/post_fusion_context\n", + " * [new branch] pretrained_embeddings_init -> origin/pretrained_embeddings_init\n", + " * [new branch] pretrained_module -> origin/pretrained_module\n", + " * [new branch] pyt_serving -> origin/pyt_serving\n", + " * [new branch] pytorch/item-id-aggregator -> origin/pytorch/item-id-aggregator\n", + " * [new branch] pytorch/label_smoothing -> origin/pytorch/label_smoothing\n", + " * [new branch] pytorch/model-and-heads -> origin/pytorch/model-and-heads\n", + " * [new branch] pytorch/model-updates -> origin/pytorch/model-updates\n", + " * [new branch] read_schema_from_core -> origin/read_schema_from_core\n", + " * [new branch] recsys22 -> origin/recsys22\n", + " * [new branch] refactor-prediction-task -> origin/refactor-prediction-task\n", + " * [new branch] refactor_part1 -> origin/refactor_part1\n", + " * [new branch] refactor_part2 -> origin/refactor_part2\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-jperez999 -> origin/release-jperez999\n", + " * [new branch] remove_paper_assets -> origin/remove_paper_assets\n", + " * [new branch] romeyn/dev -> origin/romeyn/dev\n", + " * [new branch] romeyn/transformer-configs -> origin/romeyn/transformer-configs\n", + " * [new branch] save-schema-for-t4rec-model -> origin/save-schema-for-t4rec-model\n", + " * [new branch] schema-pbtxt-bug -> origin/schema-pbtxt-bug\n", + " * [new branch] schema-shape-fix -> origin/schema-shape-fix\n", + " * [new branch] seq_binary_classification -> origin/seq_binary_classification\n", + " * [new branch] serve_nvt_and__model -> origin/serve_nvt_and__model\n", + " * [new branch] session_features -> origin/session_features\n", + " * [new branch] slim_doc_deps -> origin/slim_doc_deps\n", + " * [new branch] soft_embeddings -> origin/soft_embeddings\n", + " * [new branch] ssn_seed -> origin/ssn_seed\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] stochastic_noise -> origin/stochastic_noise\n", + " * [new branch] stochastic_noise2 -> origin/stochastic_noise2\n", + " * [new branch] synthetic-data -> origin/synthetic-data\n", + " * [new branch] t4rec-MM-repro -> origin/t4rec-MM-repro\n", + " * [new branch] t4rec_paper_repro2 -> origin/t4rec_paper_repro2\n", + " * [new branch] t4rec_refactor -> origin/t4rec_refactor\n", + " * [new branch] tensorflow -> origin/tensorflow\n", + " * [new branch] test-data -> origin/test-data\n", + " * [new branch] test/text_module -> origin/test/text_module\n", + " * [new branch] testing/updates -> origin/testing/updates\n", + " * [new branch] tf/example_notebook -> origin/tf/example_notebook\n", + " * [new branch] tf/fix_compute_loss -> origin/tf/fix_compute_loss\n", + " * [new branch] tf/fix_graph_mode -> origin/tf/fix_graph_mode\n", + " * [new branch] tf/model_saving_and_loading -> origin/tf/model_saving_and_loading\n", + " * [new branch] tf/refactor_item_prediction_task -> origin/tf/refactor_item_prediction_task\n", + " * [new branch] tf/refactor_masking -> origin/tf/refactor_masking\n", + " * [new branch] tf/refactor_ranking_metric -> origin/tf/refactor_ranking_metric\n", + " * [new branch] tf/refactor_transformer_block -> origin/tf/refactor_transformer_block\n", + " * [new branch] tf/save_load_model -> origin/tf/save_load_model\n", + " * [new branch] tf/test-utils -> origin/tf/test-utils\n", + " * [new branch] tf/to_tf_model -> origin/tf/to_tf_model\n", + " * [new branch] torch/demo_utils -> origin/torch/demo_utils\n", + " * [new branch] torch/fit_eval -> origin/torch/fit_eval\n", + " * [new branch] torch/fix_evaluation -> origin/torch/fix_evaluation\n", + " * [new branch] torch/fix_examples_utils -> origin/torch/fix_examples_utils\n", + " * [new branch] torch/fix_wipe_memory -> origin/torch/fix_wipe_memory\n", + " * [new branch] torch/label_smoothing_loss -> origin/torch/label_smoothing_loss\n", + " * [new branch] torch/next_item_prediction -> origin/torch/next_item_prediction\n", + " * [new branch] torch/stochastic_swap_noise -> origin/torch/stochastic_swap_noise\n", + " * [new branch] trainer_predict_step -> origin/trainer_predict_step\n", + " * [new branch] tutorial -> origin/tutorial\n", + " * [new branch] unittest_endtoend_multi -> origin/unittest_endtoend_multi\n", + " * [new branch] update/torchmetrics -> origin/update/torchmetrics\n", + " * [new branch] utils -> origin/utils\n", + " * [new branch] v0.1.2-docs -> origin/v0.1.2-docs\n", + " * [new branch] v0.1.3-docs -> origin/v0.1.3-docs\n", + " * [new branch] v0.1.4-docs -> origin/v0.1.4-docs\n", + " * [new branch] v0.1.5-docs -> origin/v0.1.5-docs\n", + " * [new branch] v0.1.6-docs -> origin/v0.1.6-docs\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " * [new branch] v0.1.7-docs -> origin/v0.1.7-docs\n", + " * [new tag] v0.1.14 -> v0.1.14\n", + " * [new tag] v0.1.15 -> v0.1.15\n", + " * [new tag] v0.1.16 -> v0.1.16\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + " * [new tag] custom_dataloader -> custom_dataloader\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.1.1 -> v0.1.1\n", + " * [new tag] v0.1.10 -> v0.1.10\n", + " * [new tag] v0.1.11 -> v0.1.11\n", + " * [new tag] v0.1.12 -> v0.1.12\n", + " * [new tag] v0.1.13 -> v0.1.13\n", + " * [new tag] v0.1.2 -> v0.1.2\n", + " * [new tag] v0.1.3 -> v0.1.3\n", + " * [new tag] v0.1.4 -> v0.1.4\n", + " * [new tag] v0.1.5 -> v0.1.5\n", + " * [new tag] v0.1.6 -> v0.1.6\n", + " * [new tag] v0.1.7 -> v0.1.7\n", + " * [new tag] v0.1.8 -> v0.1.8\n", + " * [new tag] v0.1.9 -> v0.1.9\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /transformers4rec\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: transformers4rec\n", + " Building wheel for transformers4rec (PEP 517): started\n", + " Building wheel for transformers4rec (PEP 517): finished with status 'done'\n", + " Created wheel for transformers4rec: filename=transformers4rec-23.5.dev0+11.ga070e77f-py3-none-any.whl size=481639 sha256=c87755b3edaa042660591a34046a487bd7a0921748342baa46371ae2568e7a1b\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-bk3gt81g/wheels/24/44/e3/c29f7de8e7315585705f880ad32ffeae66fcaeb79003405ef6\n", + "Successfully built transformers4rec\n", + "Installing collected packages: transformers4rec\n", + " Attempting uninstall: transformers4rec\n", + " Found existing installation: transformers4rec 23.4.0\n", + " Uninstalling transformers4rec-23.4.0:\n", + " Successfully uninstalled transformers4rec-23.4.0\n", + "Successfully installed transformers4rec-23.5.dev0+11.ga070e77f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Note: switching to 'origin/release-23.04'.\n", + "\n", + "You are in 'detached HEAD' state. You can look around, make experimental\n", + "changes and commit them, and you can discard any commits you make in this\n", + "state without impacting any branches by switching back to a branch.\n", + "\n", + "If you want to create a new branch to retain commits you create, you may\n", + "do so (now or later) by using -c with the switch command. Example:\n", + "\n", + " git switch -c \n", + "\n", + "Or undo this operation with:\n", + "\n", + " git switch -\n", + "\n", + "Turn off this advice by setting config variable advice.detachedHead to false\n", + "\n", + "HEAD is now at 2516efb Return version 23.04.00 from versions\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-systems\n", + " Building wheel for merlin-systems (PEP 517): started\n", + " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-systems: filename=merlin_systems-23.4.0-py3-none-any.whl size=82535 sha256=d8530f8b224d1be1c9d2e7f8e2de798a601e790365c4b098aa639c90dc6df383\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-zg_mvjzk/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + "Successfully built merlin-systems\n", + "Installing collected packages: merlin-systems\n", + " Attempting uninstall: merlin-systems\n", + " Found existing installation: merlin-systems 23.5.dev0+8.g2b1b90b\n", + " Uninstalling merlin-systems-23.5.dev0+8.g2b1b90b:\n", + " Successfully uninstalled merlin-systems-23.5.dev0+8.g2b1b90b\n", + "Successfully installed merlin-systems-23.4.0\n" + ] + } + ], + "source": [ + "%%bash\n", + "cd /core\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /dataloader\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /nvtabular\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /models\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /systems\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /transformers4rec\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /systems\n", + "git checkout origin/release-23.04\n", + "pip install . --no-deps" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e9929dc8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.7.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.12.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.12.2)\n", + "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.29.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.65.0)\n", + "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.4.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (3.1.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (1.25.8)\n", + "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading...\n", + "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=6953ace2-159c-421a-85be-411cfe20d627\n", + "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", + "100%|██████████| 43.4M/43.4M [00:00<00:00, 158MB/s] \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", + "Get:2 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [1009 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", + "Get:5 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1045 kB]\n", + "Get:6 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2674 kB]\n", + "Get:7 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", + "Get:8 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [2203 kB]\n", + "Get:9 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Get:11 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", + "Get:12 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3157 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2341 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1341 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", + "Fetched 27.3 MB in 4s (7788 kB/s)\n", + "Reading package lists...\n", + "Reading package lists...\n", + "Building dependency tree...\n", + "Reading state information...\n", + "unzip is already the newest version (6.0-25ubuntu1.1).\n", + "0 upgraded, 0 newly installed, 0 to remove and 60 not upgraded.\n", + "Archive: rees46_ecom_dataset_small_for_ci.zip\n", + " creating: ecom_dataset/0001/\n", + " inflating: ecom_dataset/0001/valid.parquet \n", + " extracting: ecom_dataset/0001/.zip \n", + " inflating: ecom_dataset/0001/train.parquet \n", + " inflating: ecom_dataset/0001/test.parquet \n", + " creating: ecom_dataset/0002/\n", + " inflating: ecom_dataset/0002/valid.parquet \n", + " inflating: ecom_dataset/0002/train.parquet \n", + " inflating: ecom_dataset/0002/test.parquet \n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "rm -rf ecom_dataset\n", + "mkdir -p ecom_dataset\n", + "\n", + "pip install gdown\n", + "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "apt-get update -y\n", + "apt-get install unzip -y\n", + "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fd80de2a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-05-09 09:22:21.081264: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-05-09 09:22:27.110014: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:66] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /lib:/usr/local/lib/python3.8/dist-packages/tensorflow:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda-11/lib64:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/lib:/repos/dist/lib:/usr/lib/jvm/default-java/lib:/usr/lib/jvm/default-java/lib/server:/opt/tritonserver/lib:/usr/local/hugectr/lib\n", + "2023-05-09 09:22:27.110043: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-05-09 09:22:27.110065: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (49ae924556c8): /proc/driver/nvidia/version does not exist\n", + "2023-05-09 09:22:27.665462: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8a07ad6f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0471f48d", + "metadata": {}, + "outputs": [], + "source": [ + "for fn in ['ecom_dataset/0001/train.parquet', 'ecom_dataset/0002/test.parquet']:\n", + " t = pd.read_parquet(fn)\n", + " t[['sess_pid_seq']].to_parquet(fn)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "11647dd3", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:264: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:264: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/test.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'\n", + "seq_name = target" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of starter hyperparams\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 1\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "410ea223", + "metadata": {}, + "outputs": [], + "source": [ + "# seq_name = 'seq'\n", + "# target = seq_name" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "4328f03a", + "metadata": {}, + "outputs": [], + "source": [ + "from nvtabular.inference.triton import export_tensorflow_ensemble\n", + "from nvtabular import Workflow\n", + "from nvtabular.ops import Categorify, Rename" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d5a9dd50", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:264: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:264: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:264: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "ops = ['sess_pid_seq'] >> Categorify(dtype=np.int32) #>> Rename(name=seq_name)\n", + "\n", + "wf = Workflow(ops)\n", + "\n", + "train = wf.fit_transform(train)\n", + "valid = wf.transform(valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + " \n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", + " input_block = mm.InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name(seq_name)\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "7baec64f", + "metadata": {}, + "outputs": [], + "source": [ + "model_transformer, xlnet_block = get_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2b09261c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "Model(\n", + " (_should_compute_train_metrics_for_batch): \n", + " (blocks): _TupleWrapper((SequentialBlock(\n", + " (layers): List(\n", + " (0): ParallelBlock(\n", + " (_aggregation): ConcatFeatures(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (parallel_layers): Dict(\n", + " (categorical): ParallelBlock(\n", + " (parallel_layers): Dict(\n", + " (sess_pid_seq): EmbeddingTable(\n", + " (features): Dict(\n", + " (sess_pid_seq): ColumnSchema(name='sess_pid_seq', tags={, , , , }, properties={'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, dtype=DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), is_list=True, is_ragged=True)\n", + " )\n", + " (table): Embedding(\n", + " (embeddings): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): SequentialBlock(\n", + " (layers): List(\n", + " (0): _Dense(\n", + " (dense): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): XLNetBlock(\n", + " (transformer): TFXLNetMainLayer(\n", + " (word_embedding): TFSharedEmbeddings(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer): List(\n", + " (0): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_kwargs): Dict(\n", + " (name): 'transformer'\n", + " (trainable): True\n", + " (dtype): 'float32'\n", + " )\n", + " (mask_emb): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_pre): PrepareTransformerInputs(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_post): LastHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_post): SequentialBlock(\n", + " (layers): List(\n", + " (0): TransformerOutputToRagged(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TransformerInferenceHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): SequenceCausalLastInference(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): ExtractMaskFromTargets(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " ), SequentialBlock(\n", + " (layers): List(\n", + " (0): _Dense(\n", + " (dense): Dense(\n", + " 448, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " ), CategoricalOutput(\n", + " (to_call): EmbeddingTablePrediction(\n", + " (table): EmbeddingTable(\n", + " (features): Dict(\n", + " (sess_pid_seq): ColumnSchema(name='sess_pid_seq', tags={, , , , }, properties={'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, dtype=DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), is_list=True, is_ragged=True)\n", + " )\n", + " (table): Embedding(\n", + " (embeddings): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (output_layer_bias): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )))\n", + " (context): ModelContext(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_prepare_features): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (output_names): List(\n", + " (0): 'sess_pid_seq/categorical_output'\n", + " )\n", + " (optimizer): Adam()\n", + " (loss): Dict(\n", + " (sess_pid_seq/categorical_output): CategoricalCrossEntropy()\n", + " )\n", + " (train_pre): SequencePredictNext(\n", + " (_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (1): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " )\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (transformer): XLNetBlock(\n", + " (transformer): TFXLNetMainLayer(\n", + " (word_embedding): TFSharedEmbeddings(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer): List(\n", + " (0): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_kwargs): Dict(\n", + " (name): 'transformer'\n", + " (trainable): True\n", + " (dtype): 'float32'\n", + " )\n", + " (mask_emb): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_pre): PrepareTransformerInputs(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_post): LastHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_post): SequentialBlock(\n", + " (layers): List(\n", + " (0): TransformerOutputToRagged(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TransformerInferenceHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): SequenceCausalLastInference(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): ExtractMaskFromTargets(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (test_pre): SequencePredictLast(\n", + " (_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (1): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " )\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (transformer): XLNetBlock(\n", + " (transformer): TFXLNetMainLayer(\n", + " (word_embedding): TFSharedEmbeddings(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer): List(\n", + " (0): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_kwargs): Dict(\n", + " (name): 'transformer'\n", + " (trainable): True\n", + " (dtype): 'float32'\n", + " )\n", + " (mask_emb): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_pre): PrepareTransformerInputs(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_post): LastHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_post): SequentialBlock(\n", + " (layers): List(\n", + " (0): TransformerOutputToRagged(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TransformerInferenceHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): SequenceCausalLastInference(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): ExtractMaskFromTargets(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (signatures): _SignatureMap({'serving_default': })\n", + ")" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.load('t4rec_model')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "4c62973a", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.loader.tensorflow import Loader" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "e5db703a", + "metadata": {}, + "outputs": [], + "source": [ + "loader = Loader(valid, batch_size=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "e11f107c", + "metadata": {}, + "outputs": [], + "source": [ + "it = iter(loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c216e7fb", + "metadata": {}, + "outputs": [], + "source": [ + "while True:\n", + " b = next(it)\n", + " if b[0]['sess_pid_seq__offsets'].numpy()[1] == 20:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "ea436b46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'sess_pid_seq__values': ,\n", + " 'sess_pid_seq__offsets': },\n", + " None)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "f2c36d75", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "618 ms ± 3.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "\n", + "model_transformer(b[0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking_works.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking_works.ipynb new file mode 100644 index 0000000000..1db5d4a103 --- /dev/null +++ b/T4Rec_repro/train_and_save_model_for_benchmarking_works.ipynb @@ -0,0 +1,1492 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "026bd245", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/core\n", + " 9d9b5c6a..c5c9bc25 release-23.04 -> origin/release-23.04\n", + " * [new branch] feature/merlin-array-dispatch -> origin/feature/merlin-array-dispatch\n", + " * [new branch] fix-repartition -> origin/fix-repartition\n", + " * [new branch] fix-with-properties -> origin/fix-with-properties\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] laiacano/docs-on-pr -> origin/laiacano/docs-on-pr\n", + " * [new branch] main -> origin/main\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] revert-163-refactor/dictarray-columns -> origin/revert-163-refactor/dictarray-columns\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] tags-intersection -> origin/tags-intersection\n", + " * [new branch] v0.2.0-docs -> origin/v0.2.0-docs\n", + " * [new tag] v0.10.0 -> v0.10.0\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v23.02.01 -> v23.02.01\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.1.1 -> v0.1.1\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n", + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /core\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-core\n", + " Building wheel for merlin-core (PEP 517): started\n", + " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-core: filename=merlin_core-23.5.dev0+21.ga0bcd30f-py3-none-any.whl size=161483 sha256=f76af8b2b454279185f67a9fcbb363f76377403bbbd578cfab87cad51461502e\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-zpysgkae/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + "Successfully built merlin-core\n", + "Installing collected packages: merlin-core\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 23.4.0\n", + " Uninstalling merlin-core-23.4.0:\n", + " Successfully uninstalled merlin-core-23.4.0\n", + "Successfully installed merlin-core-23.5.dev0+21.ga0bcd30f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/dataloader\n", + " * [new branch] chore/comprehensive-shapes -> origin/chore/comprehensive-shapes\n", + " * [new branch] chore/packages-action -> origin/chore/packages-action\n", + " * [new branch] collabify_examples -> origin/collabify_examples\n", + " * [new branch] docs-add-seo -> origin/docs-add-seo\n", + " * [new branch] docs-calver-banner -> origin/docs-calver-banner\n", + " * [new branch] ds-api -> origin/ds-api\n", + " * [new branch] feature/embedding-tags -> origin/feature/embedding-tags\n", + " * [new branch] fix-sparse-logic -> origin/fix-sparse-logic\n", + " * [new branch] fix/tf-batch-size-warning -> origin/fix/tf-batch-size-warning\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] gha-test -> origin/gha-test\n", + " * [new branch] laiacano/docs-pr -> origin/laiacano/docs-pr\n", + " * [new branch] main -> origin/main\n", + " * [new branch] no_gpu -> origin/no_gpu\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] update_github_actions -> origin/update_github_actions\n", + " * [new tag] v0.0.3 -> v0.0.3\n", + " * [new tag] v0.0.4 -> v0.0.4\n", + " * [new tag] v23.02.01 -> v23.02.01\n", + " * [new tag] v0.0.1 -> v0.0.1\n", + " * [new tag] v0.0.2 -> v0.0.2\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /dataloader\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-dataloader\n", + " Building wheel for merlin-dataloader (PEP 517): started\n", + " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-23.5.dev0+8.gd9e97b4-py3-none-any.whl size=34916 sha256=a53d8e72c09517b5035a17039957847870bda00e432060ad7c5049d7b7ec5d29\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-un9trbg4/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + "Successfully built merlin-dataloader\n", + "Installing collected packages: merlin-dataloader\n", + " Attempting uninstall: merlin-dataloader\n", + " Found existing installation: merlin-dataloader 23.4.0\n", + " Uninstalling merlin-dataloader-23.4.0:\n", + " Successfully uninstalled merlin-dataloader-23.4.0\n", + "Successfully installed merlin-dataloader-23.5.dev0+8.gd9e97b4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/NVTabular\n", + " f8f484e5..90489194 release-23.04 -> origin/release-23.04\n", + " * [new branch] 1077-implement -> origin/1077-implement\n", + " * [new branch] 21.09/column-tagging -> origin/21.09/column-tagging\n", + " * [new branch] 21.09/dataset-collection -> origin/21.09/dataset-collection\n", + " * [new branch] 21.09/operator-block -> origin/21.09/operator-block\n", + " * [new branch] 21.09/schema -> origin/21.09/schema\n", + " * [new branch] add_sum_to_supported_aggregations -> origin/add_sum_to_supported_aggregations\n", + " * [new branch] aiobotocore_v2 -> origin/aiobotocore_v2\n", + " * [new branch] alexanderronquillo-patch-1 -> origin/alexanderronquillo-patch-1\n", + " * [new branch] automate_pypi -> origin/automate_pypi\n", + " * [new branch] bench-pynvml-fix -> origin/bench-pynvml-fix\n", + " * [new branch] branch-0.6 -> origin/branch-0.6\n", + " * [new branch] bschifferer-remove_examples_1 -> origin/bschifferer-remove_examples_1\n", + " * [new branch] categorify-inference-int16 -> origin/categorify-inference-int16\n", + " * [new branch] columns_with_aggs_in_names -> origin/columns_with_aggs_in_names\n", + " * [new branch] conda-package-python-versions -> origin/conda-package-python-versions\n", + " * [new branch] conda_gh_action -> origin/conda_gh_action\n", + " * [new branch] dataloader-remove-sparse -> origin/dataloader-remove-sparse\n", + " * [new branch] dataloader_doc_fix -> origin/dataloader_doc_fix\n", + " * [new branch] disable-package-build-on-pull-requests -> origin/disable-package-build-on-pull-requests\n", + " * [new branch] dont_install_tests -> origin/dont_install_tests\n", + " * [new branch] drop_low_cardinality -> origin/drop_low_cardinality\n", + " * [new branch] fix-docs-tox-env -> origin/fix-docs-tox-env\n", + " * [new branch] fix-wf-file -> origin/fix-wf-file\n", + " * [new branch] fix/inference-deprecation -> origin/fix/inference-deprecation\n", + " * [new branch] fix_data_path -> origin/fix_data_path\n", + " * [new branch] fix_hugectr_nb -> origin/fix_hugectr_nb\n", + " * [new branch] fix_nbs -> origin/fix_nbs\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] groupby_without_groupby_col_in_col_selector -> origin/groupby_without_groupby_col_in_col_selector\n", + " * [new branch] hugectr-newapi -> origin/hugectr-newapi\n", + " * [new branch] laiacano/check-list-from-schema -> origin/laiacano/check-list-from-schema\n", + " * [new branch] laiacano/workflow-subgraph -> origin/laiacano/workflow-subgraph\n", + " * [new branch] main -> origin/main\n", + " * [new branch] na_sentinel -> origin/na_sentinel\n", + " * [new branch] notebooks-21.10 -> origin/notebooks-21.10\n", + " * [new branch] nvt-1195 -> origin/nvt-1195\n", + " * [new branch] nvtabular_examples -> origin/nvtabular_examples\n", + " * [new branch] packages-workflow-split -> origin/packages-workflow-split\n", + " * [new branch] readme_updates -> origin/readme_updates\n", + " * [new branch] refactor/fit-schema -> origin/refactor/fit-schema\n", + " * [new branch] refactor/input-column-selection -> origin/refactor/input-column-selection\n", + " * [new branch] refactor/postpone-schema-binding -> origin/refactor/postpone-schema-binding\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] remove_poetry -> origin/remove_poetry\n", + " * [new branch] remove_release_notes -> origin/remove_release_notes\n", + " * [new branch] repeat-ops -> origin/repeat-ops\n", + " * [new branch] rjzamora-simplify-criteo -> origin/rjzamora-simplify-criteo\n", + " * [new branch] rnyak-patch-1 -> origin/rnyak-patch-1\n", + " * [new branch] romeyn/input-api -> origin/romeyn/input-api\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] test-column-similarity-dataset-cpu-default-none -> origin/test-column-similarity-dataset-cpu-default-none\n", + " * [new branch] test-torch-dataloader-dataset-cpu-default-none -> origin/test-torch-dataloader-dataset-cpu-default-none\n", + " * [new branch] torch_catch -> origin/torch_catch\n", + " * [new branch] update-dask-reqs -> origin/update-dask-reqs\n", + " * [new branch] update_merlin_core -> origin/update_merlin_core\n", + " * [new branch] update_requirements -> origin/update_requirements\n", + " * [new branch] v0.10.0-docs -> origin/v0.10.0-docs\n", + " * [new branch] v0.11.0-docs -> origin/v0.11.0-docs\n", + " * [new branch] v0.7.1-docs -> origin/v0.7.1-docs\n", + " * [new branch] v0.8.0-docs -> origin/v0.8.0-docs\n", + " * [new branch] v0.9.0-docs -> origin/v0.9.0-docs\n", + " * [new branch] v1.0.0-docs -> origin/v1.0.0-docs\n", + " * [new tag] v0.6.1 -> v0.6.1\n", + " * [new tag] v1.6.0 -> v1.6.0\n", + " * [new tag] v1.7.0 -> v1.7.0\n", + " * [new tag] v1.8.1 -> v1.8.1\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.1.1 -> v0.1.1\n", + " * [new tag] v0.10.0 -> v0.10.0\n", + " * [new tag] v0.11.0 -> v0.11.0\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n", + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.5.1 -> v0.5.1\n", + " * [new tag] v0.5.2 -> v0.5.2\n", + " * [new tag] v0.5.3 -> v0.5.3\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v0.7.1 -> v0.7.1\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v1.0.0 -> v1.0.0\n", + " * [new tag] v1.1.0 -> v1.1.0\n", + " * [new tag] v1.1.1 -> v1.1.1\n", + " * [new tag] v1.2.0 -> v1.2.0\n", + " * [new tag] v1.2.1 -> v1.2.1\n", + " * [new tag] v1.2.2 -> v1.2.2\n", + " * [new tag] v1.3.0 -> v1.3.0\n", + " * [new tag] v1.3.1 -> v1.3.1\n", + " * [new tag] v1.3.2 -> v1.3.2\n", + " * [new tag] v1.3.3 -> v1.3.3\n", + " * [new tag] v1.4.0 -> v1.4.0\n", + " * [new tag] v1.5.0 -> v1.5.0\n", + " * [new tag] v1.8.0 -> v1.8.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /nvtabular\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: nvtabular\n", + " Building wheel for nvtabular (PEP 517): started\n", + " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", + " Created wheel for nvtabular: filename=nvtabular-23.5.dev0+7.g67136eba-cp38-cp38-linux_x86_64.whl size=259925 sha256=197d7ba28258dad52e99289d9cb6f1821a54930776ae97c8812b316108857063\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-mpy75mx7/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + "Successfully built nvtabular\n", + "Installing collected packages: nvtabular\n", + " Attempting uninstall: nvtabular\n", + " Found existing installation: nvtabular 23.4.0\n", + " Uninstalling nvtabular-23.4.0:\n", + " Successfully uninstalled nvtabular-23.4.0\n", + "Successfully installed nvtabular-23.5.dev0+7.g67136eba\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Models\n", + " a44eced6..56c7d6a4 release-23.04 -> origin/release-23.04\n", + " * [new branch] Mai -> origin/Mai\n", + " * [new branch] add_category_encoding_test -> origin/add_category_encoding_test\n", + " * [new branch] add_lightfm_and_explicit_training_example -> origin/add_lightfm_and_explicit_training_example\n", + " * [new branch] add_logo_tracking_to_07 -> origin/add_logo_tracking_to_07\n", + " * [new branch] add_notebooks_test -> origin/add_notebooks_test\n", + " * [new branch] advanced_example -> origin/advanced_example\n", + " * [new branch] asvdb_metric_tracking -> origin/asvdb_metric_tracking\n", + " * [new branch] batched-dataset/schema -> origin/batched-dataset/schema\n", + " * [new branch] benchmark-session-based -> origin/benchmark-session-based\n", + " * [new branch] block-context -> origin/block-context\n", + " * [new branch] blossom_report_skipped -> origin/blossom_report_skipped\n", + " * [new branch] break_ties -> origin/break_ties\n", + " * [new branch] bs_unittest_examples_v2 -> origin/bs_unittest_examples_v2\n", + " * [new branch] bschifferer-patch-1 -> origin/bschifferer-patch-1\n", + " * [new branch] change_two_tower_api_test -> origin/change_two_tower_api_test\n", + " * [new branch] ci/backend-tests -> origin/ci/backend-tests\n", + " * [new branch] ci/example-linting -> origin/ci/example-linting\n", + " * [new branch] ci/horovod -> origin/ci/horovod\n", + " * [new branch] cicd -> origin/cicd\n", + " * [new branch] codespell_fix -> origin/codespell_fix\n", + " * [new branch] compare_ranking_models -> origin/compare_ranking_models\n", + " * [new branch] conda_recipe -> origin/conda_recipe\n", + " * [new branch] consolidate-abstractions -> origin/consolidate-abstractions\n", + " * [new branch] dataloader_tag_fix -> origin/dataloader_tag_fix\n", + " * [new branch] dcn_tests -> origin/dcn_tests\n", + " * [new branch] deps/merlin-core-commit -> origin/deps/merlin-core-commit\n", + " * [new branch] docs-strings -> origin/docs-strings\n", + " * [new branch] docs/interrogate-cfg -> origin/docs/interrogate-cfg\n", + " * [new branch] docs/interrogate-config -> origin/docs/interrogate-config\n", + " * [new branch] emb_export_fix -> origin/emb_export_fix\n", + " * [new branch] evaluate_fixes -> origin/evaluate_fixes\n", + " * [new branch] examples/unit-tests -> origin/examples/unit-tests\n", + " * [new branch] examples/update_link -> origin/examples/update_link\n", + " * [new branch] examples_fixes -> origin/examples_fixes\n", + " * [new branch] fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", + " * [new branch] feature/multi-hot-columns -> origin/feature/multi-hot-columns\n", + " * [new branch] feature/retrieval-dnn -> origin/feature/retrieval-dnn\n", + " * [new branch] fix-contrastive-predictions -> origin/fix-contrastive-predictions\n", + " * [new branch] fix/aliccp_workflow -> origin/fix/aliccp_workflow\n", + " * [new branch] fix/batch_predict -> origin/fix/batch_predict\n", + " * [new branch] fix/example-tests -> origin/fix/example-tests\n", + " * [new branch] fix/python-version -> origin/fix/python-version\n", + " * [new branch] fix/shared_embeddings -> origin/fix/shared_embeddings\n", + " * [new branch] fix_aliccp_schema -> origin/fix_aliccp_schema\n", + " * [new branch] fix_cated_ohe -> origin/fix_cated_ohe\n", + " * [new branch] fix_datetime_issue_add_inference_on_TIS -> origin/fix_datetime_issue_add_inference_on_TIS\n", + " * [new branch] fix_lightfm_evaluate -> origin/fix_lightfm_evaluate\n", + " * [new branch] fix_masking -> origin/fix_masking\n", + " * [new branch] fix_mtl_metrics -> origin/fix_mtl_metrics\n", + " * [new branch] fix_notebooks -> origin/fix_notebooks\n", + " * [new branch] fix_regression -> origin/fix_regression\n", + " * [new branch] fix_retrieval -> origin/fix_retrieval\n", + " * [new branch] fix_retrieval_eval_loss -> origin/fix_retrieval_eval_loss\n", + " * [new branch] fix_sampled_softmax_evaluation -> origin/fix_sampled_softmax_evaluation\n", + " * [new branch] fix_test_07 -> origin/fix_test_07\n", + " * [new branch] getting_started_exp -> origin/getting_started_exp\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] hashed_cross_test -> origin/hashed_cross_test\n", + " * [new branch] implement_review_comments -> origin/implement_review_comments\n", + " * [new branch] in-bath-sampling-bug -> origin/in-bath-sampling-bug\n", + " * [new branch] infer_embeddings -> origin/infer_embeddings\n", + " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", + " * [new branch] laiacano/concurrency -> origin/laiacano/concurrency\n", + " * [new branch] laiacano/tox -> origin/laiacano/tox\n", + " * [new branch] layer_freezing_test -> origin/layer_freezing_test\n", + " * [new branch] load_retrieval_model -> origin/load_retrieval_model\n", + " * [new branch] logit_correction_nol2_temp -> origin/logit_correction_nol2_temp\n", + " * [new branch] losses -> origin/losses\n", + " * [new branch] main -> origin/main\n", + " * [new branch] masking_transforms -> origin/masking_transforms\n", + " * [new branch] merlin-standard-lib -> origin/merlin-standard-lib\n", + " * [new branch] metrics_opt -> origin/metrics_opt\n", + " * [new branch] metrics_opt2 -> origin/metrics_opt2\n", + " * [new branch] mikemckiernan-patch-1 -> origin/mikemckiernan-patch-1\n", + " * [new branch] mlm -> origin/mlm\n", + " * [new branch] mlm_alt -> origin/mlm_alt\n", + " * [new branch] mlp_selu -> origin/mlp_selu\n", + " * [new branch] mrr_fix -> origin/mrr_fix\n", + " * [new branch] mtl_example -> origin/mtl_example\n", + " * [new branch] mtl_loss -> origin/mtl_loss\n", + " * [new branch] mtl_models -> origin/mtl_models\n", + " * [new branch] mtl_regularization -> origin/mtl_regularization\n", + " * [new branch] multi_optimizer_example -> origin/multi_optimizer_example\n", + " * [new branch] neg_sampling -> origin/neg_sampling\n", + " * [new branch] poc -> origin/poc\n", + " * [new branch] pretrained_init -> origin/pretrained_init\n", + " * [new branch] radekosmulski-patch-2 -> origin/radekosmulski-patch-2\n", + " * [new branch] ragged_embeddings -> origin/ragged_embeddings\n", + " * [new branch] ranking_models_inputs -> origin/ranking_models_inputs\n", + " * [new branch] ranking_tests -> origin/ranking_tests\n", + " * [new branch] ranking_tests3 -> origin/ranking_tests3\n", + " * [new branch] readme_bash -> origin/readme_bash\n", + " * [new branch] refactor-docs-reqs -> origin/refactor-docs-reqs\n", + " * [new branch] refactor/docs-reqs -> origin/refactor/docs-reqs\n", + " * [new branch] refactor/embedding-layers -> origin/refactor/embedding-layers\n", + " * [new branch] refactor/youtube-retrieval -> origin/refactor/youtube-retrieval\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] remove/masking -> origin/remove/masking\n", + " * [new branch] reset-metrics -> origin/reset-metrics\n", + " * [new branch] retrieval-sample-weights -> origin/retrieval-sample-weights\n", + " * [new branch] retrieval_debug -> origin/retrieval_debug\n", + " * [new branch] retrieval_debug_no_l2norm -> origin/retrieval_debug_no_l2norm\n", + " * [new branch] retrieval_debug_scores_temp -> origin/retrieval_debug_scores_temp\n", + " * [new branch] retrieval_eval_fix -> origin/retrieval_eval_fix\n", + " * [new branch] retrieval_fixes -> origin/retrieval_fixes\n", + " * [new branch] retrieval_fixes_2 -> origin/retrieval_fixes_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " * [new branch] retrieval_integration_tests -> origin/retrieval_integration_tests\n", + " * [new branch] revert-813-laiacano/tox-and-tmpdir -> origin/revert-813-laiacano/tox-and-tmpdir\n", + " * [new branch] romeyn/block-api -> origin/romeyn/block-api\n", + " * [new branch] romeyn/block-cleanup -> origin/romeyn/block-cleanup\n", + " * [new branch] romeyn/inputs -> origin/romeyn/inputs\n", + " * [new branch] sampling -> origin/sampling\n", + " * [new branch] select-by-tag -> origin/select-by-tag\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] t4rec_use_case -> origin/t4rec_use_case\n", + " * [new branch] tf/add-bokeh-to-dev -> origin/tf/add-bokeh-to-dev\n", + " * [new branch] tf/base-model-test-graph-mode -> origin/tf/base-model-test-graph-mode\n", + " * [new branch] tf/batch_predict_fix -> origin/tf/batch_predict_fix\n", + " * [new branch] tf/categorical-prediction -> origin/tf/categorical-prediction\n", + " * [new branch] tf/categorical-prediction-2 -> origin/tf/categorical-prediction-2\n", + " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", + " * [new branch] tf/combinators-base -> origin/tf/combinators-base\n", + " * [new branch] tf/cond -> origin/tf/cond\n", + " * [new branch] tf/context-tensor -> origin/tf/context-tensor\n", + " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", + " * [new branch] tf/contrastive-prediction -> origin/tf/contrastive-prediction\n", + " * [new branch] tf/core -> origin/tf/core\n", + " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", + " * [new branch] tf/dep-prediction-tasks -> origin/tf/dep-prediction-tasks\n", + " * [new branch] tf/dlrm_dropout_fix -> origin/tf/dlrm_dropout_fix\n", + " * [new branch] tf/dynamic-memory-growth -> origin/tf/dynamic-memory-growth\n", + " * [new branch] tf/embedding-tables -> origin/tf/embedding-tables\n", + " * [new branch] tf/embeddings_regularization -> origin/tf/embeddings_regularization\n", + " * [new branch] tf/evaluate_retrieval -> origin/tf/evaluate_retrieval\n", + " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", + " * [new branch] tf/fix_logq_correction -> origin/tf/fix_logq_correction\n", + " * [new branch] tf/fix_mlm_test -> origin/tf/fix_mlm_test\n", + " * [new branch] tf/fix_tag_item_id -> origin/tf/fix_tag_item_id\n", + " * [new branch] tf/fix_tests_shared_state -> origin/tf/fix_tests_shared_state\n", + " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", + " * [new branch] tf/input-block -> origin/tf/input-block\n", + " * [new branch] tf/input-block-filter -> origin/tf/input-block-filter\n", + " * [new branch] tf/inputs-concat -> origin/tf/inputs-concat\n", + " * [new branch] tf/keras-embedding -> origin/tf/keras-embedding\n", + " * [new branch] tf/logit_correction -> origin/tf/logit_correction\n", + " * [new branch] tf/loglossmetric_callbacks -> origin/tf/loglossmetric_callbacks\n", + " * [new branch] tf/logq_correction -> origin/tf/logq_correction\n", + " * [new branch] tf/loss_batch_metric -> origin/tf/loss_batch_metric\n", + " * [new branch] tf/map-values -> origin/tf/map-values\n", + " * [new branch] tf/masking_block -> origin/tf/masking_block\n", + " * [new branch] tf/mf-retrieval-model -> origin/tf/mf-retrieval-model\n", + " * [new branch] tf/mlm-schema -> origin/tf/mlm-schema\n", + " * [new branch] tf/model-tests -> origin/tf/model-tests\n", + " * [new branch] tf/model/sequential -> origin/tf/model/sequential\n", + " * [new branch] tf/move-core -> origin/tf/move-core\n", + " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", + " * [new branch] tf/multi_task_improv -> origin/tf/multi_task_improv\n", + " * [new branch] tf/ncf_model -> origin/tf/ncf_model\n", + " * [new branch] tf/output-block -> origin/tf/output-block\n", + " * [new branch] tf/pop_metrics -> origin/tf/pop_metrics\n", + " * [new branch] tf/prediction -> origin/tf/prediction\n", + " * [new branch] tf/prediction-block -> origin/tf/prediction-block\n", + " * [new branch] tf/pretrained_emb -> origin/tf/pretrained_emb\n", + " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", + " * [new branch] tf/pruning-parallel-block -> origin/tf/pruning-parallel-block\n", + " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", + " * [new branch] tf/ragged-tensors -> origin/tf/ragged-tensors\n", + " * [new branch] tf/ranking_metrics_sort -> origin/tf/ranking_metrics_sort\n", + " * [new branch] tf/refactor -> origin/tf/refactor\n", + " * [new branch] tf/retireval_eval -> origin/tf/retireval_eval\n", + " * [new branch] tf/retrieval-eval -> origin/tf/retrieval-eval\n", + " * [new branch] tf/retrieval-model-v2 -> origin/tf/retrieval-model-v2\n", + " * [new branch] tf/retrieval-models -> origin/tf/retrieval-models\n", + " * [new branch] tf/sampling/items -> origin/tf/sampling/items\n", + " * [new branch] tf/save-regularizer -> origin/tf/save-regularizer\n", + " * [new branch] tf/target-propagation -> origin/tf/target-propagation\n", + " * [new branch] tf/targets -> origin/tf/targets\n", + " * [new branch] tf/tf-cont-list -> origin/tf/tf-cont-list\n", + " * [new branch] tf/topk_recommender -> origin/tf/topk_recommender\n", + " * [new branch] tf/tower-save -> origin/tf/tower-save\n", + " * [new branch] tf/train_metrics_steps_fix -> origin/tf/train_metrics_steps_fix\n", + " * [new branch] tf/transformer-api -> origin/tf/transformer-api\n", + " * [new branch] tf/transformer-block -> origin/tf/transformer-block\n", + " * [new branch] tf/transformer_block -> origin/tf/transformer_block\n", + " * [new branch] tf/wide_and_deep -> origin/tf/wide_and_deep\n", + " * [new branch] tf/wrap-as-model -> origin/tf/wrap-as-model\n", + " * [new branch] tf/xlnet-bug -> origin/tf/xlnet-bug\n", + " * [new branch] torch/clean-up -> origin/torch/clean-up\n", + " * [new branch] torch/dev -> origin/torch/dev\n", + " * [new branch] torch/masking -> origin/torch/masking\n", + " * [new branch] torch/prototype -> origin/torch/prototype\n", + " * [new branch] torch/remove-t4r-code -> origin/torch/remove-t4r-code\n", + " * [new branch] tox_github_actions_fix -> origin/tox_github_actions_fix\n", + " * [new branch] transformer-api -> origin/transformer-api\n", + " * [new branch] two_tower_fixes -> origin/two_tower_fixes\n", + " * [new branch] update_07 -> origin/update_07\n", + " * [new branch] update_advanced_notebook -> origin/update_advanced_notebook\n", + " * [new branch] update_example_01 -> origin/update_example_01\n", + " * [new branch] update_examples_with_tracking_logo -> origin/update_examples_with_tracking_logo\n", + " * [new branch] v0.2.0-docs -> origin/v0.2.0-docs\n", + " * [new branch] v0.3.0-docs -> origin/v0.3.0-docs\n", + " * [new branch] validation_data_fix -> origin/validation_data_fix\n", + " * [new branch] validation_data_fix2 -> origin/validation_data_fix2\n", + " * [new branch] wide_deep_example_test -> origin/wide_deep_example_test\n", + " * [new branch] wideanddeep_example -> origin/wideanddeep_example\n", + " * [new branch] xgboost/predict-without-target -> origin/xgboost/predict-without-target\n", + " * [new branch] youtube_dnn_retrieval -> origin/youtube_dnn_retrieval\n", + " * [new branch] youtubednn_improv -> origin/youtubednn_improv\n", + " * [new branch] youtubednn_logq -> origin/youtubednn_logq\n", + " * [new tag] v0.10.0 -> v0.10.0\n", + " * [new tag] v0.11.0 -> v0.11.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /models\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-models\n", + " Building wheel for merlin-models (PEP 517): started\n", + " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-models: filename=merlin_models-23.5.dev0+12.gd8133b8f-py3-none-any.whl size=343289 sha256=ea5d89a929291c07105d8d9cfbc0bb5cb7302c590c89144d6f28bf2d1bcf3941\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-hz3xmyr2/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + "Successfully built merlin-models\n", + "Installing collected packages: merlin-models\n", + " Attempting uninstall: merlin-models\n", + " Found existing installation: merlin-models 23.4.0\n", + " Uninstalling merlin-models-23.4.0:\n", + " Successfully uninstalled merlin-models-23.4.0\n", + "Successfully installed merlin-models-23.5.dev0+12.gd8133b8f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/systems\n", + " fce949f..2516efb release-23.04 -> origin/release-23.04\n", + " * [new branch] add_xgboost_serving_example -> origin/add_xgboost_serving_example\n", + " * [new branch] bschifferer-patch-1 -> origin/bschifferer-patch-1\n", + " * [new branch] bschifferer-patch-2 -> origin/bschifferer-patch-2\n", + " * [new branch] ci/cpu-action -> origin/ci/cpu-action\n", + " * [new branch] dataset-cpu-default-None -> origin/dataset-cpu-default-None\n", + " * [new branch] docs-nightly-build -> origin/docs-nightly-build\n", + " * [new branch] docs-remove-deps -> origin/docs-remove-deps\n", + " * [new branch] docs-tox -> origin/docs-tox\n", + " * [new branch] docs/contributing -> origin/docs/contributing\n", + " * [new branch] docs/coverage-threshold -> origin/docs/coverage-threshold\n", + " * [new branch] docs/docstring-coverage -> origin/docs/docstring-coverage\n", + " * [new branch] docs/interrogate-cfg -> origin/docs/interrogate-cfg\n", + " * [new branch] docs/interrogate-config -> origin/docs/interrogate-config\n", + " * [new branch] docs/issue-templates -> origin/docs/issue-templates\n", + " * [new branch] docs/readme -> origin/docs/readme\n", + " * [new branch] feast-errors -> origin/feast-errors\n", + " * [new branch] feature/pytorch -> origin/feature/pytorch\n", + " * [new branch] feature/t4r-serving -> origin/feature/t4r-serving\n", + " * [new branch] feature/torchscript -> origin/feature/torchscript\n", + " * [new branch] fix/dask-dist-deps -> origin/fix/dask-dist-deps\n", + " * [new branch] fix/faiss-types -> origin/fix/faiss-types\n", + " * [new branch] fix/multi-hot-dtypes -> origin/fix/multi-hot-dtypes\n", + " * [new branch] fix/multihot-schemas -> origin/fix/multihot-schemas\n", + " * [new branch] fix/pkg-build-lib -> origin/fix/pkg-build-lib\n", + " * [new branch] fix/pytest-feast -> origin/fix/pytest-feast\n", + " * [new branch] fix/skipped-tests -> origin/fix/skipped-tests\n", + " * [new branch] fix/tf-input-shapes -> origin/fix/tf-input-shapes\n", + " * [new branch] fix/torch-importorskip -> origin/fix/torch-importorskip\n", + " * [new branch] fix_model_outputnames -> origin/fix_model_outputnames\n", + " * [new branch] fix_nb -> origin/fix_nb\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] laiacano/slack-notify -> origin/laiacano/slack-notify\n", + " * [new branch] laiacano/transformer-import -> origin/laiacano/transformer-import\n", + " * [new branch] laiacano/upgrade-feast -> origin/laiacano/upgrade-feast\n", + " * [new branch] main -> origin/main\n", + " * [new branch] merlin_models_xgboost -> origin/merlin_models_xgboost\n", + " * [new branch] migration/from-nvt -> origin/migration/from-nvt\n", + " * [new branch] polish/remove-dtype-matching -> origin/polish/remove-dtype-matching\n", + " * [new branch] radekosmulski-patch-1 -> origin/radekosmulski-patch-1\n", + " * [new branch] radekosmulski-patch-1-1 -> origin/radekosmulski-patch-1-1\n", + " * [new branch] refactor/dtypes -> origin/refactor/dtypes\n", + " * [new branch] refactor/organize-tests -> origin/refactor/organize-tests\n", + " * [new branch] refactor/schema-validation-hook -> origin/refactor/schema-validation-hook\n", + " * [new branch] refactor/virtual-dataframe -> origin/refactor/virtual-dataframe\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] run_triton_utils -> origin/run_triton_utils\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] update-reqs -> origin/update-reqs\n", + " * [new branch] update/precommit-hooks -> origin/update/precommit-hooks\n", + " * [new branch] use_dataloader -> origin/use_dataloader\n", + " * [new branch] v0.0.1-docs -> origin/v0.0.1-docs\n", + " * [new branch] v0.1.0-docs -> origin/v0.1.0-docs\n", + " * [new tag] v0.7.0 -> v0.7.0\n", + " * [new tag] v0.8.0 -> v0.8.0\n", + " * [new tag] v0.9.0 -> v0.9.0\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v0.0.1 -> v0.0.1\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.2.0 -> v0.2.0\n", + " * [new tag] v0.3.0 -> v0.3.0\n", + " * [new tag] v0.4.0 -> v0.4.0\n", + " * [new tag] v0.5.0 -> v0.5.0\n", + " * [new tag] v0.6.0 -> v0.6.0\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-systems\n", + " Building wheel for merlin-systems (PEP 517): started\n", + " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-systems: filename=merlin_systems-23.5.dev0+8.g2b1b90b-py3-none-any.whl size=83188 sha256=1375160a02bdf3385338c75db0eb830ac273a7d382b02115998720eabfb856df\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-_1pwgzp6/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + "Successfully built merlin-systems\n", + "Installing collected packages: merlin-systems\n", + " Attempting uninstall: merlin-systems\n", + " Found existing installation: merlin-systems 23.4.0\n", + " Uninstalling merlin-systems-23.4.0:\n", + " Successfully uninstalled merlin-systems-23.4.0\n", + "Successfully installed merlin-systems-23.5.dev0+8.g2b1b90b\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Transformers4Rec\n", + " 4a9e7373..911355f4 release-23.04 -> origin/release-23.04\n", + " * [new branch] DDP_fix -> origin/DDP_fix\n", + " * [new branch] HF-update -> origin/HF-update\n", + " * [new branch] add_benchmarking_scripts -> origin/add_benchmarking_scripts\n", + " * [new branch] add_topk_layer -> origin/add_topk_layer\n", + " * [new branch] albert17-check -> origin/albert17-check\n", + " * [new branch] batches -> origin/batches\n", + " * [new branch] benfred/datasetschema -> origin/benfred/datasetschema\n", + " * [new branch] clean_rnn_block -> origin/clean_rnn_block\n", + " * [new branch] core-schema/deprecation-warning -> origin/core-schema/deprecation-warning\n", + " * [new branch] core-schema/tabular-features -> origin/core-schema/tabular-features\n", + " * [new branch] core-schema/trainer -> origin/core-schema/trainer\n", + " * [new branch] dataloader -> origin/dataloader\n", + " * [new branch] dataparallel_fix -> origin/dataparallel_fix\n", + " * [new branch] doc/supported_transformers -> origin/doc/supported_transformers\n", + " * [new branch] doc_fix -> origin/doc_fix\n", + " * [new branch] docs -> origin/docs\n", + " * [new branch] etl-nvt -> origin/etl-nvt\n", + " * [new branch] examples -> origin/examples\n", + " * [new branch] fix-data-repartition -> origin/fix-data-repartition\n", + " * [new branch] fix-failing-ci -> origin/fix-failing-ci\n", + " * [new branch] fix-inference -> origin/fix-inference\n", + " * [new branch] fix/transformers_config -> origin/fix/transformers_config\n", + " * [new branch] fix_gettingstarted_nb -> origin/fix_gettingstarted_nb\n", + " * [new branch] fix_inference -> origin/fix_inference\n", + " * [new branch] fix_nbs -> origin/fix_nbs\n", + " * [new branch] fix_oom_tests -> origin/fix_oom_tests\n", + " * [new branch] fix_req_paper_repro -> origin/fix_req_paper_repro\n", + " * [new branch] fix_stochastic -> origin/fix_stochastic\n", + " * [new branch] fix_unit_test -> origin/fix_unit_test\n", + " * [new branch] gh-pages -> origin/gh-pages\n", + " * [new branch] github-templates -> origin/github-templates\n", + " * [new branch] ignore-masking -> origin/ignore-masking\n", + " * [new branch] laiacano/merlin-core-schema -> origin/laiacano/merlin-core-schema\n", + " * [new branch] laiacano/skip-ci-on-closed-pr -> origin/laiacano/skip-ci-on-closed-pr\n", + " * [new branch] license -> origin/license\n", + " * [new branch] main -> origin/main\n", + " * [new branch] masking_quick_fix -> origin/masking_quick_fix\n", + " * [new branch] metric-names-prefix -> origin/metric-names-prefix\n", + " * [new branch] model_save_load -> origin/model_save_load\n", + " * [new branch] multi_gpu_doc -> origin/multi_gpu_doc\n", + " * [new branch] multi_gpu_doc_fix -> origin/multi_gpu_doc_fix\n", + " * [new branch] post_fusion_context -> origin/post_fusion_context\n", + " * [new branch] pretrained_embeddings_init -> origin/pretrained_embeddings_init\n", + " * [new branch] pretrained_module -> origin/pretrained_module\n", + " * [new branch] pyt_serving -> origin/pyt_serving\n", + " * [new branch] pytorch/item-id-aggregator -> origin/pytorch/item-id-aggregator\n", + " * [new branch] pytorch/label_smoothing -> origin/pytorch/label_smoothing\n", + " * [new branch] pytorch/model-and-heads -> origin/pytorch/model-and-heads\n", + " * [new branch] pytorch/model-updates -> origin/pytorch/model-updates\n", + " * [new branch] read_schema_from_core -> origin/read_schema_from_core\n", + " * [new branch] recsys22 -> origin/recsys22\n", + " * [new branch] refactor-prediction-task -> origin/refactor-prediction-task\n", + " * [new branch] refactor_part1 -> origin/refactor_part1\n", + " * [new branch] refactor_part2 -> origin/refactor_part2\n", + " * [new branch] release-22.10 -> origin/release-22.10\n", + " * [new branch] release-22.11 -> origin/release-22.11\n", + " * [new branch] release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-jperez999 -> origin/release-jperez999\n", + " * [new branch] remove_paper_assets -> origin/remove_paper_assets\n", + " * [new branch] romeyn/dev -> origin/romeyn/dev\n", + " * [new branch] romeyn/transformer-configs -> origin/romeyn/transformer-configs\n", + " * [new branch] save-schema-for-t4rec-model -> origin/save-schema-for-t4rec-model\n", + " * [new branch] schema-pbtxt-bug -> origin/schema-pbtxt-bug\n", + " * [new branch] schema-shape-fix -> origin/schema-shape-fix\n", + " * [new branch] seq_binary_classification -> origin/seq_binary_classification\n", + " * [new branch] serve_nvt_and__model -> origin/serve_nvt_and__model\n", + " * [new branch] session_features -> origin/session_features\n", + " * [new branch] slim_doc_deps -> origin/slim_doc_deps\n", + " * [new branch] soft_embeddings -> origin/soft_embeddings\n", + " * [new branch] ssn_seed -> origin/ssn_seed\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] stochastic_noise -> origin/stochastic_noise\n", + " * [new branch] stochastic_noise2 -> origin/stochastic_noise2\n", + " * [new branch] synthetic-data -> origin/synthetic-data\n", + " * [new branch] t4rec-MM-repro -> origin/t4rec-MM-repro\n", + " * [new branch] t4rec_paper_repro2 -> origin/t4rec_paper_repro2\n", + " * [new branch] t4rec_refactor -> origin/t4rec_refactor\n", + " * [new branch] tensorflow -> origin/tensorflow\n", + " * [new branch] test-data -> origin/test-data\n", + " * [new branch] test/text_module -> origin/test/text_module\n", + " * [new branch] testing/updates -> origin/testing/updates\n", + " * [new branch] tf/example_notebook -> origin/tf/example_notebook\n", + " * [new branch] tf/fix_compute_loss -> origin/tf/fix_compute_loss\n", + " * [new branch] tf/fix_graph_mode -> origin/tf/fix_graph_mode\n", + " * [new branch] tf/model_saving_and_loading -> origin/tf/model_saving_and_loading\n", + " * [new branch] tf/refactor_item_prediction_task -> origin/tf/refactor_item_prediction_task\n", + " * [new branch] tf/refactor_masking -> origin/tf/refactor_masking\n", + " * [new branch] tf/refactor_ranking_metric -> origin/tf/refactor_ranking_metric\n", + " * [new branch] tf/refactor_transformer_block -> origin/tf/refactor_transformer_block\n", + " * [new branch] tf/save_load_model -> origin/tf/save_load_model\n", + " * [new branch] tf/test-utils -> origin/tf/test-utils\n", + " * [new branch] tf/to_tf_model -> origin/tf/to_tf_model\n", + " * [new branch] torch/demo_utils -> origin/torch/demo_utils\n", + " * [new branch] torch/fit_eval -> origin/torch/fit_eval\n", + " * [new branch] torch/fix_evaluation -> origin/torch/fix_evaluation\n", + " * [new branch] torch/fix_examples_utils -> origin/torch/fix_examples_utils\n", + " * [new branch] torch/fix_wipe_memory -> origin/torch/fix_wipe_memory\n", + " * [new branch] torch/label_smoothing_loss -> origin/torch/label_smoothing_loss\n", + " * [new branch] torch/next_item_prediction -> origin/torch/next_item_prediction\n", + " * [new branch] torch/stochastic_swap_noise -> origin/torch/stochastic_swap_noise\n", + " * [new branch] trainer_predict_step -> origin/trainer_predict_step\n", + " * [new branch] tutorial -> origin/tutorial\n", + " * [new branch] unittest_endtoend_multi -> origin/unittest_endtoend_multi\n", + " * [new branch] update/torchmetrics -> origin/update/torchmetrics\n", + " * [new branch] utils -> origin/utils\n", + " * [new branch] v0.1.2-docs -> origin/v0.1.2-docs\n", + " * [new branch] v0.1.3-docs -> origin/v0.1.3-docs\n", + " * [new branch] v0.1.4-docs -> origin/v0.1.4-docs\n", + " * [new branch] v0.1.5-docs -> origin/v0.1.5-docs\n", + " * [new branch] v0.1.6-docs -> origin/v0.1.6-docs\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " * [new branch] v0.1.7-docs -> origin/v0.1.7-docs\n", + " * [new tag] v0.1.14 -> v0.1.14\n", + " * [new tag] v0.1.15 -> v0.1.15\n", + " * [new tag] v0.1.16 -> v0.1.16\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + " * [new tag] custom_dataloader -> custom_dataloader\n", + " * [new tag] v0.1.0 -> v0.1.0\n", + " * [new tag] v0.1.1 -> v0.1.1\n", + " * [new tag] v0.1.10 -> v0.1.10\n", + " * [new tag] v0.1.11 -> v0.1.11\n", + " * [new tag] v0.1.12 -> v0.1.12\n", + " * [new tag] v0.1.13 -> v0.1.13\n", + " * [new tag] v0.1.2 -> v0.1.2\n", + " * [new tag] v0.1.3 -> v0.1.3\n", + " * [new tag] v0.1.4 -> v0.1.4\n", + " * [new tag] v0.1.5 -> v0.1.5\n", + " * [new tag] v0.1.6 -> v0.1.6\n", + " * [new tag] v0.1.7 -> v0.1.7\n", + " * [new tag] v0.1.8 -> v0.1.8\n", + " * [new tag] v0.1.9 -> v0.1.9\n", + "Switched to a new branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch 'main' set up to track remote branch 'main' from 'origin'.\n", + "Processing /transformers4rec\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: transformers4rec\n", + " Building wheel for transformers4rec (PEP 517): started\n", + " Building wheel for transformers4rec (PEP 517): finished with status 'done'\n", + " Created wheel for transformers4rec: filename=transformers4rec-23.5.dev0+11.ga070e77f-py3-none-any.whl size=481639 sha256=6bae592418f42250e0c86ccdf6a1e47ee1ef98c15b1152a57933c162c5329b52\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-2hedypqg/wheels/24/44/e3/c29f7de8e7315585705f880ad32ffeae66fcaeb79003405ef6\n", + "Successfully built transformers4rec\n", + "Installing collected packages: transformers4rec\n", + " Attempting uninstall: transformers4rec\n", + " Found existing installation: transformers4rec 23.4.0\n", + " Uninstalling transformers4rec-23.4.0:\n", + " Successfully uninstalled transformers4rec-23.4.0\n", + "Successfully installed transformers4rec-23.5.dev0+11.ga070e77f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Note: switching to 'origin/release-23.04'.\n", + "\n", + "You are in 'detached HEAD' state. You can look around, make experimental\n", + "changes and commit them, and you can discard any commits you make in this\n", + "state without impacting any branches by switching back to a branch.\n", + "\n", + "If you want to create a new branch to retain commits you create, you may\n", + "do so (now or later) by using -c with the switch command. Example:\n", + "\n", + " git switch -c \n", + "\n", + "Or undo this operation with:\n", + "\n", + " git switch -\n", + "\n", + "Turn off this advice by setting config variable advice.detachedHead to false\n", + "\n", + "HEAD is now at 2516efb Return version 23.04.00 from versions\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Building wheels for collected packages: merlin-systems\n", + " Building wheel for merlin-systems (PEP 517): started\n", + " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-systems: filename=merlin_systems-23.4.0-py3-none-any.whl size=82535 sha256=01c306d63bfbe3cb3fa02b48fa87945e541bc564c62f51525e420d0add2127e5\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-s5zg7ehl/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + "Successfully built merlin-systems\n", + "Installing collected packages: merlin-systems\n", + " Attempting uninstall: merlin-systems\n", + " Found existing installation: merlin-systems 23.5.dev0+8.g2b1b90b\n", + " Uninstalling merlin-systems-23.5.dev0+8.g2b1b90b:\n", + " Successfully uninstalled merlin-systems-23.5.dev0+8.g2b1b90b\n", + "Successfully installed merlin-systems-23.4.0\n" + ] + } + ], + "source": [ + "%%bash\n", + "cd /core\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /dataloader\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /nvtabular\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /models\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /systems\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /transformers4rec\n", + "git config remote.origin.fetch \"+refs/heads/*:refs/remotes/origin/*\" && git fetch && git checkout main\n", + "pip install . --no-deps\n", + "\n", + "cd /systems\n", + "git checkout origin/release-23.04\n", + "pip install . --no-deps" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e9929dc8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting gdown\n", + " Downloading gdown-4.7.1-py3-none-any.whl (15 kB)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.12.2)\n", + "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.12.0)\n", + "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.29.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.65.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.4.1)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (1.25.8)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (3.1.0)\n", + "Collecting PySocks!=1.5.7,>=1.5.6; extra == \"socks\"\n", + " Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", + "Installing collected packages: gdown, PySocks\n", + "Successfully installed PySocks-1.7.1 gdown-4.7.1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading...\n", + "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=8c599e09-56a6-4c3f-a6f7-21a594214531\n", + "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", + "100%|██████████| 43.4M/43.4M [00:00<00:00, 189MB/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", + "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [1009 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", + "Get:4 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Get:5 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:7 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1045 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Get:9 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", + "Get:11 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", + "Get:12 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2674 kB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:14 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [2203 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1341 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3157 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2341 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", + "Fetched 27.3 MB in 3s (8434 kB/s)\n", + "Reading package lists...\n", + "Reading package lists...\n", + "Building dependency tree...\n", + "Reading state information...\n", + "unzip is already the newest version (6.0-25ubuntu1.1).\n", + "0 upgraded, 0 newly installed, 0 to remove and 60 not upgraded.\n", + "Archive: rees46_ecom_dataset_small_for_ci.zip\n", + " creating: ecom_dataset/0001/\n", + " inflating: ecom_dataset/0001/valid.parquet \n", + " extracting: ecom_dataset/0001/.zip \n", + " inflating: ecom_dataset/0001/train.parquet \n", + " inflating: ecom_dataset/0001/test.parquet \n", + " creating: ecom_dataset/0002/\n", + " inflating: ecom_dataset/0002/valid.parquet \n", + " inflating: ecom_dataset/0002/train.parquet \n", + " inflating: ecom_dataset/0002/test.parquet \n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "rm -rf ecom_dataset\n", + "mkdir -p ecom_dataset\n", + "\n", + "pip install gdown\n", + "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "apt-get update -y\n", + "apt-get install unzip -y\n", + "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fd80de2a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-05-09 08:36:30.091603: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-05-09 08:36:32.676489: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:66] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /lib:/usr/local/lib/python3.8/dist-packages/tensorflow:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda-11/lib64:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/lib:/repos/dist/lib:/usr/lib/jvm/default-java/lib:/usr/lib/jvm/default-java/lib/server:/opt/tritonserver/lib:/usr/local/hugectr/lib\n", + "2023-05-09 08:36:32.676527: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-05-09 08:36:32.676550: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:163] no NVIDIA GPU device is present: /dev/nvidia0 does not exist\n", + "2023-05-09 08:36:32.985205: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7f84cdd1", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "d8f5825b", + "metadata": {}, + "outputs": [], + "source": [ + "for fn in ['ecom_dataset/0001/train.parquet', 'ecom_dataset/0002/test.parquet']:\n", + " t = pd.read_parquet(fn)\n", + " t[['sess_pid_seq']].to_parquet(fn)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "11647dd3", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:264: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/io/dataset.py:264: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/test.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'\n", + "seq_name = target" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of starter hyperparams\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 1\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "410ea223", + "metadata": {}, + "outputs": [], + "source": [ + "# seq_name = 'seq'\n", + "# target = seq_name" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4328f03a", + "metadata": {}, + "outputs": [], + "source": [ + "from nvtabular.inference.triton import export_tensorflow_ensemble\n", + "from nvtabular import Workflow\n", + "from nvtabular.ops import Categorify, Rename" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d5a9dd50", + "metadata": {}, + "outputs": [], + "source": [ + "ops = ['sess_pid_seq'] >> Categorify(dtype=np.int32) #>> Rename(name=seq_name)\n", + "\n", + "wf = Workflow(ops)\n", + "\n", + "train = wf.fit_transform(train)\n", + "valid = wf.transform(valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + " \n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", + " input_block = mm.InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name(seq_name)\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7baec64f", + "metadata": {}, + "outputs": [], + "source": [ + "model_transformer, xlnet_block = get_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "523fe2ac", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n", + "2023-05-09 07:46:57.519563: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:648] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n", + "2023-05-09 07:46:58.350122: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8700\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:450: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-05-09 07:47:12.201780: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0xdd325f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", + "2023-05-09 07:47:12.201824: I tensorflow/compiler/xla/service/service.cc:181] StreamExecutor device (0): NVIDIA A10G, Compute Capability 8.6\n", + "2023-05-09 07:47:12.206483: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", + "2023-05-09 07:47:12.324526: I tensorflow/compiler/jit/xla_compilation_cache.cc:480] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 100s 121ms/step - loss: 7.3234 - recall_at_20: 0.1412 - mrr_at_20: 0.0795 - ndcg_at_20: 0.0932 - map_at_20: 0.0795 - precision_at_20: 0.0071 - regularization_loss: 0.0000e+00 - loss_batch: 7.3219\n", + "84/84 [==============================] - 6s 30ms/step - loss: 8.5802 - recall_at_20: 0.2295 - mrr_at_20: 0.0788 - ndcg_at_20: 0.1121 - map_at_20: 0.0788 - precision_at_20: 0.0115 - regularization_loss: 0.0000e+00 - loss_batch: 8.6138\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.580246925354004,\n", + " 'recall_at_20': 0.2330261468887329,\n", + " 'mrr_at_20': 0.07755612581968307,\n", + " 'ndcg_at_20': 0.11172891408205032,\n", + " 'map_at_20': 0.07755612581968307,\n", + " 'precision_at_20': 0.01165130827575922,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.065570831298828}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "569113e1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n", + "2023-05-09 07:48:59.937050: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 698881792 exceeds 10% of free system memory.\n", + "2023-05-09 07:49:00.242621: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 698881792 exceeds 10% of free system memory.\n", + "2023-05-09 07:49:00.480362: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 698881792 exceeds 10% of free system memory.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: t4rec_model/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: t4rec_model/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/legacy/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return serialization.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n", + "2023-05-09 07:49:25.123588: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 698881792 exceeds 10% of free system memory.\n", + "2023-05-09 07:49:25.123649: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 698881792 exceeds 10% of free system memory.\n" + ] + } + ], + "source": [ + "model_transformer.save('t4rec_model')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "5bd66ba8", + "metadata": {}, + "outputs": [], + "source": [ + "# from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "# from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n", + "\n", + "# serving_operators = [seq_name] >> TransformWorkflow(wf) >> PredictTensorflow(model_transformer)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e2a7b6ee", + "metadata": {}, + "outputs": [], + "source": [ + "# %%bash\n", + "\n", + "# rm -rf /workspace/models_for_benchmarking\n", + "# mkdir -p /workspace/models_for_benchmarking" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "55ad012c", + "metadata": {}, + "outputs": [], + "source": [ + "# train.schema.select_by_name('sess_pid_seq')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1a39b4f8", + "metadata": {}, + "outputs": [], + "source": [ + "# from merlin.systems.dag.ensemble import Ensemble\n", + "\n", + "# ensemble = Ensemble(serving_operators, wf.input_schema)\n", + "# ens_conf, node_confs = ensemble.export(\"/workspace/models_for_benchmarking\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d7cdc6cc", + "metadata": {}, + "outputs": [], + "source": [ + "# import nvtabular.inference.triton as nvt_triton\n", + "# import tritonclient.grpc as grpcclient\n", + "# import subprocess\n", + "\n", + "# subprocess.Popen(['tritonserver', '--model-repository=/workspace/models_for_benchmarking/'])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}