diff --git a/HugeCTR/include/common.hpp b/HugeCTR/include/common.hpp index 00bc55e692..bc284524ef 100644 --- a/HugeCTR/include/common.hpp +++ b/HugeCTR/include/common.hpp @@ -58,8 +58,8 @@ namespace HugeCTR { -#define HUGECTR_VERSION_MAJOR 23 -#define HUGECTR_VERSION_MINOR 12 +#define HUGECTR_VERSION_MAJOR 24 +#define HUGECTR_VERSION_MINOR 4 #define HUGECTR_VERSION_PATCH 0 #define WARP_SIZE 32 diff --git a/README.md b/README.md index d8fe7b1ade..f9172fa235 100755 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ If you'd like to quickly train a model using the Python interface, do the follow 1. Start a NGC container with your local host directory (/your/host/dir mounted) by running the following command: ``` - docker run --gpus=all --rm -it --cap-add SYS_NICE -v /your/host/dir:/your/container/dir -w /your/container/dir -it -u $(id -u):$(id -g) nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + docker run --gpus=all --rm -it --cap-add SYS_NICE -v /your/host/dir:/your/container/dir -w /your/container/dir -it -u $(id -u):$(id -g) nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` **NOTE**: The **/your/host/dir** directory is just as visible as the **/your/container/dir** directory. The **/your/host/dir** directory is also your starting directory. diff --git a/docs/source/hierarchical_parameter_server/hps_torch_user_guide.md b/docs/source/hierarchical_parameter_server/hps_torch_user_guide.md index 1898aaeb79..148d8d97a5 100644 --- a/docs/source/hierarchical_parameter_server/hps_torch_user_guide.md +++ b/docs/source/hierarchical_parameter_server/hps_torch_user_guide.md @@ -33,12 +33,12 @@ HPS is available within the Merlin Docker containers, which can be accessed thro To utilize these Docker containers, you will need to install the [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-docker) to provide GPU support for Docker. -The following sample commands pull and start the Merlin PyTorch container: +The following sample commands pull and start the Merlin HugeCTR container: -Merlin PyTorch +Merlin HugeCTR ```shell # Run the container in interactive mode -$ docker run --gpus=all --rm -it --cap-add SYS_NICE nvcr.io/nvidia/merlin/merlin-pytorch:23.12 +$ docker run --gpus=all --rm -it --cap-add SYS_NICE nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` You can check the existence of the HPS plugin for Torch after launching the container by running the following Python statements: diff --git a/docs/source/hierarchical_parameter_server/profiling_hps.md b/docs/source/hierarchical_parameter_server/profiling_hps.md index ae0fe9918f..9c80c42474 100644 --- a/docs/source/hierarchical_parameter_server/profiling_hps.md +++ b/docs/source/hierarchical_parameter_server/profiling_hps.md @@ -67,13 +67,13 @@ To build HPS profiler from source, do the following: Pull the container using the following command: ```shell -docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 +docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` Launch the container in interactive mode (mount the HugeCTR root directory into the container for your convenience) by running this command: ```shell - docker run --gpus all --rm -it --cap-add SYS_NICE --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 -u root -v $(pwd):/HugeCTR -w /HugeCTR -p 8888:8888 nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + docker run --gpus all --rm -it --cap-add SYS_NICE --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 -u root -v $(pwd):/HugeCTR -w /HugeCTR -p 8888:8888 nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 3. Here is an example of how you can build HPS Profiler using the build options: diff --git a/docs/source/hugectr_user_guide.md b/docs/source/hugectr_user_guide.md index b80a83d1de..a2d983449f 100644 --- a/docs/source/hugectr_user_guide.md +++ b/docs/source/hugectr_user_guide.md @@ -83,7 +83,7 @@ The following sample command pulls and starts the Merlin Training container: ```shell # Run the container in interactive mode -$ docker run --gpus=all --rm -it --cap-add SYS_NICE nvcr.io/nvidia/merlin/merlin-hugectr:23.12 +$ docker run --gpus=all --rm -it --cap-add SYS_NICE nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Building HugeCTR from Scratch diff --git a/hps_tf/hps_cc/config.hpp b/hps_tf/hps_cc/config.hpp index 889cdb3e6c..69e67e2dd6 100644 --- a/hps_tf/hps_cc/config.hpp +++ b/hps_tf/hps_cc/config.hpp @@ -15,7 +15,7 @@ */ #pragma once -// TODO: The configurations are not needed anymore in merlin-base:23.12 +// TODO: The configurations are not needed anymore in merlin-base:24.04 // #include // #undef ABSL_OPTION_USE_STD_STRING_VIEW // #define ABSL_OPTION_USE_STD_STRING_VIEW 0 diff --git a/hps_tf/notebooks/hierarchical_parameter_server_demo.ipynb b/hps_tf/notebooks/hierarchical_parameter_server_demo.ipynb index d5111083a9..8e43862a44 100755 --- a/hps_tf/notebooks/hierarchical_parameter_server_demo.ipynb +++ b/hps_tf/notebooks/hierarchical_parameter_server_demo.ipynb @@ -58,7 +58,7 @@ "\n", "### Get HPS from NGC\n", "\n", - "The HPS Python module is preinstalled in the 23.12 and later [Merlin TensorFlow Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow): `nvcr.io/nvidia/merlin/merlin-tensorflow:23.12`.\n", + "The HPS Python module is preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container.\n", "\n", diff --git a/hps_tf/notebooks/hps_multi_table_sparse_input_demo.ipynb b/hps_tf/notebooks/hps_multi_table_sparse_input_demo.ipynb index df62b86348..5848480e61 100755 --- a/hps_tf/notebooks/hps_multi_table_sparse_input_demo.ipynb +++ b/hps_tf/notebooks/hps_multi_table_sparse_input_demo.ipynb @@ -58,7 +58,7 @@ "\n", "### Get HPS from NGC\n", "\n", - "The HPS Python module is preinstalled in the 23.12 and later [Merlin TensorFlow Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow): `nvcr.io/nvidia/merlin/merlin-tensorflow:23.12`.\n", + "The HPS Python module is preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container.\n", "\n", diff --git a/hps_tf/notebooks/hps_pretrained_model_training_demo.ipynb b/hps_tf/notebooks/hps_pretrained_model_training_demo.ipynb index 5398650038..e416eb8505 100755 --- a/hps_tf/notebooks/hps_pretrained_model_training_demo.ipynb +++ b/hps_tf/notebooks/hps_pretrained_model_training_demo.ipynb @@ -58,7 +58,7 @@ "\n", "### Get HPS from NGC\n", "\n", - "The HPS Python module is preinstalled in the 23.12 and later [Merlin TensorFlow Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-tensorflow:23.12`.\n", + "The HPS Python module is preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container.\n", "\n", diff --git a/hps_tf/notebooks/hps_table_fusion_demo.ipynb b/hps_tf/notebooks/hps_table_fusion_demo.ipynb index 7f1a8d0280..17f1f53ef5 100644 --- a/hps_tf/notebooks/hps_table_fusion_demo.ipynb +++ b/hps_tf/notebooks/hps_table_fusion_demo.ipynb @@ -57,7 +57,7 @@ "\n", "### Get HPS from NGC\n", "\n", - "The HPS Python module is preinstalled in the 23.12 and later [Merlin TensorFlow Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow): `nvcr.io/nvidia/merlin/merlin-tensorflow:23.12`.\n", + "The HPS Python module is preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container.\n", "\n", diff --git a/hps_tf/notebooks/hps_tensorflow_triton_deployment_demo.ipynb b/hps_tf/notebooks/hps_tensorflow_triton_deployment_demo.ipynb index d7d79437bb..d8f141a27f 100755 --- a/hps_tf/notebooks/hps_tensorflow_triton_deployment_demo.ipynb +++ b/hps_tf/notebooks/hps_tensorflow_triton_deployment_demo.ipynb @@ -58,7 +58,7 @@ "\n", "### Get HPS from NGC\n", "\n", - "The HPS Python module is preinstalled in the 23.12 and later [Merlin TensorFlow Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow): `nvcr.io/nvidia/merlin/merlin-tensorflow:23.12`.\n", + "The HPS Python module is preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container.\n", "\n", @@ -854,9 +854,9 @@ "INFO:tensorflow:Automatic mixed precision has been deactivated.\n", "2022-11-23 01:37:23.028482: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n", "2022-11-23 01:37:23.028568: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session\n", - "2022-11-23 01:37:23.121909: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30991 MB memory: -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:06:00.0, compute capability: 7.0\n", - "2022-11-23 01:37:23.128593: W tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc:198] Calibration with FP32 or FP16 is not implemented. Falling back to use_calibration = False.Note that the default value of use_calibration is True.\n", - "2022-11-23 01:37:23.129761: W tensorflow/compiler/tf2tensorrt/segment/segment.cc:952] \n", + "2022-11-23 01:37:24.041909: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30991 MB memory: -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:06:00.0, compute capability: 7.0\n", + "2022-11-23 01:37:24.048593: W tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc:198] Calibration with FP32 or FP16 is not implemented. Falling back to use_calibration = False.Note that the default value of use_calibration is True.\n", + "2022-11-23 01:37:24.049761: W tensorflow/compiler/tf2tensorrt/segment/segment.cc:952] \n", "\n", "################################################################################\n", "TensorRT unsupported/non-converted OP Report:\n", @@ -872,9 +872,9 @@ "For more information see https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html#supported-ops.\n", "################################################################################\n", "\n", - "2022-11-23 01:37:23.129860: W tensorflow/compiler/tf2tensorrt/segment/segment.cc:1280] The environment variable TF_TRT_MAX_ALLOWED_ENGINES=20 has no effect since there are only 1 TRT Engines with at least minimum_segment_size=3 nodes.\n", - "2022-11-23 01:37:23.129893: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:799] Number of TensorRT candidate segments: 1\n", - "2022-11-23 01:37:23.120667: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:916] Replaced segment 0 consisting of 9 nodes by TRTEngineOp_000_000.\n" + "2022-11-23 01:37:24.049860: W tensorflow/compiler/tf2tensorrt/segment/segment.cc:1280] The environment variable TF_TRT_MAX_ALLOWED_ENGINES=20 has no effect since there are only 1 TRT Engines with at least minimum_segment_size=3 nodes.\n", + "2022-11-23 01:37:24.049893: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:799] Number of TensorRT candidate segments: 1\n", + "2022-11-23 01:37:24.040667: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:916] Replaced segment 0 consisting of 9 nodes by TRTEngineOp_000_000.\n" ] }, { diff --git a/hps_tf/notebooks/sok_to_hps_dlrm_demo.ipynb b/hps_tf/notebooks/sok_to_hps_dlrm_demo.ipynb index 33e4f3af82..71271deb55 100755 --- a/hps_tf/notebooks/sok_to_hps_dlrm_demo.ipynb +++ b/hps_tf/notebooks/sok_to_hps_dlrm_demo.ipynb @@ -58,7 +58,7 @@ "\n", "### Get SOK from NGC\n", "\n", - "Both SOK and HPS Python modules are preinstalled in the 23.12 and later [Merlin TensorFlow Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow): `nvcr.io/nvidia/merlin/merlin-tensorflow:23.12`.\n", + "Both SOK and HPS Python modules are preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container.\n", "\n", diff --git a/hps_torch/notebooks/README.md b/hps_torch/notebooks/README.md index 9e26e60b0d..4f1e4b5fcb 100644 --- a/hps_torch/notebooks/README.md +++ b/hps_torch/notebooks/README.md @@ -13,7 +13,7 @@ If you prefer to build the HugeCTR Docker image on your own, refer to [Set Up th Pull the container using the following command: ```shell -docker pull nvcr.io/nvidia/merlin/merlin-pytorch:23.12 +docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Clone the HugeCTR Repository @@ -28,7 +28,7 @@ git clone https://github.com/NVIDIA/HugeCTR 1. Launch the container in interactive mode (mount the HugeCTR root directory into the container for your convenience) by running this command: ```shell - docker run --runtime=nvidia --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr -p 8888:8888 nvcr.io/nvidia/merlin/merlin-pytorch:23.12 + docker run --runtime=nvidia --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr -p 8888:8888 nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Start Jupyter using these commands: @@ -55,4 +55,4 @@ The specifications of the system on which each notebook can run successfully are | Notebook | CPU | GPU | #GPUs | Author | | -------- | --- | --- | ----- | ------ | -| [hps_torch_demo.ipynb](hps_torch_demo.ipynb) | Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz
512 GB Memory | Tesla V100-SXM2-32GB
32 GB Memory | 1 | Kingsley Liu | \ No newline at end of file +| [hps_torch_demo.ipynb](hps_torch_demo.ipynb) | Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz
512 GB Memory | Tesla V100-SXM2-32GB
32 GB Memory | 1 | Kingsley Liu | diff --git a/hps_torch/notebooks/hps_torch_demo.ipynb b/hps_torch/notebooks/hps_torch_demo.ipynb index 1ca8a936f2..b48b89d9b1 100644 --- a/hps_torch/notebooks/hps_torch_demo.ipynb +++ b/hps_torch/notebooks/hps_torch_demo.ipynb @@ -60,7 +60,7 @@ "\n", "### Get HPS from NGC\n", "\n", - "The HPS Python module is preinstalled in the 23.12 and later [Merlin PyTorch Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch): `nvcr.io/nvidia/merlin/merlin-pytorch:23.12`.\n", + "The HPS Python module is preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container.\n", "\n", diff --git a/hps_trt/notebooks/benchmark_tf_trained_large_model.ipynb b/hps_trt/notebooks/benchmark_tf_trained_large_model.ipynb index 29be456dd6..56b6d580a9 100755 --- a/hps_trt/notebooks/benchmark_tf_trained_large_model.ipynb +++ b/hps_trt/notebooks/benchmark_tf_trained_large_model.ipynb @@ -1279,8 +1279,8 @@ " ```shell\n", " git clone https://github.com/NVIDIA-Merlin/Merlin.git\n", " cd Merlin/docker\n", - " docker build -t nvcr.io/nvstaging/merlin/merlin-base:23.12 -f dockerfile.merlin .\n", - " docker build -t nvcr.io/nvstaging/merlin/merlin-tensorflow:23.12 -f dockerfile.tf .\n", + " docker build -t nvcr.io/nvstaging/merlin/merlin-base:24.04 -f dockerfile.merlin.ctr .\n", + " docker build -t nvcr.io/nvstaging/merlin/merlin-hugectr:24.04 -f dockerfile.ctr .\n", " cd ../..\n", " ```\n", "- **Option B (G+H optimized HugeCTR)**:\n", @@ -1288,8 +1288,8 @@ " git clone https://github.com/NVIDIA-Merlin/Merlin.git\n", " cd Merlin/docker\n", " sed -i -e 's/\" -DENABLE_INFERENCE=ON/\" -DUSE_HUGE_PAGES=ON -DENABLE_INFERENCE=ON/g' dockerfile.merlin\n", - " docker build -t nvcr.io/nvstaging/merlin/merlin-base:23.12 -f dockerfile.merlin .\n", - " docker build -t nvcr.io/nvstaging/merlin/merlin-tensorflow:23.12 -f dockerfile.tf .\n", + " docker build -t nvcr.io/nvstaging/merlin/merlin-base:24.04 -f dockerfile.merlin.ctr .\n", + " docker build -t nvcr.io/nvstaging/merlin/merlin-hugectr:24.04 -f dockerfile.ctr .\n", " cd ../..\n", " ````" ] @@ -1325,7 +1325,7 @@ "\n", "Your filesystem or system environment might impose constraints. The following command just serves as an example. It assumes HugeCTR was downloaded from GitHub into the current working directory (`git clone https://github.com/NVIDIA-Merlin/HugeCTR.git`). To allow writing files, we first give root user (inside the docker image you are root) to access to the notebook folder (this folder), and then startup a suitable Jupyter server.\n", "```shell\n", - "export HCTR_SRC=\"${PWD}/HugeCTR\" && chmod -R 777 \"${HCTR_SRC}/hps_trt/notebooks\" && docker run -it --rm --gpus all --network=host -v ${HCTR_SRC}:/hugectr nvcr.io/nvstaging/merlin/merlin-tensorflow:23.12 jupyter-lab --allow-root --ip 0.0.0.0 --port 8888 --no-browser --notebook-dir=/hugectr/hps_trt/notebooks\n", + "export HCTR_SRC=\"${PWD}/HugeCTR\" && chmod -R 777 \"${HCTR_SRC}/hps_trt/notebooks\" && docker run -it --rm --gpus all --network=host -v ${HCTR_SRC}:/hugectr nvcr.io/nvstaging/merlin/merlin-hugectr:24.04 jupyter-lab --allow-root --ip 0.0.0.0 --port 8888 --no-browser --notebook-dir=/hugectr/hps_trt/notebooks\n", "``` " ] }, diff --git a/hps_trt/notebooks/demo_for_hugectr_trained_model.ipynb b/hps_trt/notebooks/demo_for_hugectr_trained_model.ipynb index 612ddcf318..e2e3998332 100755 --- a/hps_trt/notebooks/demo_for_hugectr_trained_model.ipynb +++ b/hps_trt/notebooks/demo_for_hugectr_trained_model.ipynb @@ -31,7 +31,7 @@ "\n", "### Use NGC\n", "\n", - "The HPS TensorRT plugin is preinstalled in the 23.12 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:23.12`.\n", + "The HPS TensorRT plugin is preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container." ] diff --git a/hps_trt/notebooks/demo_for_pytorch_trained_model.ipynb b/hps_trt/notebooks/demo_for_pytorch_trained_model.ipynb index 73c053c928..0dbe0d5200 100644 --- a/hps_trt/notebooks/demo_for_pytorch_trained_model.ipynb +++ b/hps_trt/notebooks/demo_for_pytorch_trained_model.ipynb @@ -31,7 +31,7 @@ "\n", "### Use NGC\n", "\n", - "The HPS TensorRT plugin is preinstalled in the 23.12 and later [Merlin PyTorch Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch): `nvcr.io/nvidia/merlin/merlin-pytorch:23.12`.\n", + "The HPS TensorRT plugin is preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container." ] diff --git a/hps_trt/notebooks/demo_for_tf_trained_model.ipynb b/hps_trt/notebooks/demo_for_tf_trained_model.ipynb index 183014ec5d..aa21c5a146 100755 --- a/hps_trt/notebooks/demo_for_tf_trained_model.ipynb +++ b/hps_trt/notebooks/demo_for_tf_trained_model.ipynb @@ -31,7 +31,7 @@ "\n", "### Use NGC\n", "\n", - "The HPS TensorRT plugin is preinstalled in the 23.12 and later [Merlin TensorFlow Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow): `nvcr.io/nvidia/merlin/merlin-tensorflow:23.12`.\n", + "The HPS TensorRT plugin is preinstalled in the 24.04 and later [Merlin HugeCTR Container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr): `nvcr.io/nvidia/merlin/merlin-hugectr:24.04`.\n", "\n", "You can check the existence of the required libraries by running the following Python code after launching this container." ] diff --git a/notebooks/README.md b/notebooks/README.md index a31a03e75a..90d76c1d2a 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -19,16 +19,16 @@ git clone https://github.com/NVIDIA/HugeCTR Pull the container using the following command: ```shell -docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 +docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` Launch the container in interactive mode (mount the HugeCTR root directory into the container for your convenience) by running this command: ```shell - docker run --gpus all --rm -it --cap-add SYS_NICE --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 -u root -v $(pwd):/HugeCTR -w /HugeCTR --network=host --runtime=nvidia nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + docker run --gpus all --rm -it --cap-add SYS_NICE --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 -u root -v $(pwd):/HugeCTR -w /HugeCTR --network=host --runtime=nvidia nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` - > To run the Sparse Operation Kit notebooks, specify the `nvcr.io/nvidia/merlin/merlin-tensorflow:23.12` container. + > To run the Sparse Operation Kit notebooks, specify the `nvcr.io/nvidia/merlin/merlin-tensorflow:24.04` container. ## 3. Customized Building (Optional) diff --git a/release_notes.md b/release_notes.md index ac3cc4cd93..3664a9c906 100755 --- a/release_notes.md +++ b/release_notes.md @@ -252,7 +252,7 @@ In this release, we have fixed issues and enhanced the code. ```{important} In January 2023, the HugeCTR team plans to deprecate semantic versioning, such as `v4.3`. - Afterward, the library will use calendar versioning only, such as `v23.12`. + Afterward, the library will use calendar versioning only, such as `v23.01`. ``` + **Support for BERT and Variants**: @@ -334,7 +334,7 @@ The [HugeCTR Training and Inference with Remote File System Example](https://nvi ```{important} In January 2023, the HugeCTR team plans to deprecate semantic versioning, such as `v4.2`. - Afterward, the library will use calendar versioning only, such as `v23.12`. + Afterward, the library will use calendar versioning only, such as `v23.01`. ``` + **Change to HPS with Redis or Kafka**: diff --git a/samples/criteo/README.md b/samples/criteo/README.md index 798fd262e1..d3f1afda61 100644 --- a/samples/criteo/README.md +++ b/samples/criteo/README.md @@ -11,11 +11,11 @@ HugeCTR is available as buildable source code, but the easiest way to install an 1. Pull the HugeCTR NGC Docker by running the following command: ```bash - $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Launch the container in interactive mode with the HugeCTR root directory mounted into the container by running the following command: ```bash - $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Build the HugeCTR Docker Container on Your Own ### diff --git a/samples/dcn/README.md b/samples/dcn/README.md index 741851e192..c5e41d1192 100644 --- a/samples/dcn/README.md +++ b/samples/dcn/README.md @@ -11,11 +11,11 @@ HugeCTR is available as buildable source code, but the easiest way to install an 1. Pull the HugeCTR NGC Docker by running the following command: ```bash - $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Launch the container in interactive mode with the HugeCTR root directory mounted into the container by running the following command: ```bash - $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Build the HugeCTR Docker Container on Your Own ### diff --git a/samples/deepfm/README.md b/samples/deepfm/README.md index 038ddfc073..a3e4abf515 100644 --- a/samples/deepfm/README.md +++ b/samples/deepfm/README.md @@ -11,11 +11,11 @@ HugeCTR is available as buildable source code, but the easiest way to install an 1. Pull the HugeCTR NGC Docker by running the following command: ```bash - $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Launch the container in interactive mode with the HugeCTR root directory mounted into the container by running the following command: ```bash - $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Build the HugeCTR Docker Container on Your Own ### diff --git a/samples/din/README.md b/samples/din/README.md index 94cf7897db..f965a0c412 100644 --- a/samples/din/README.md +++ b/samples/din/README.md @@ -11,11 +11,11 @@ HugeCTR is available as buildable source code, but the easiest way to install an 1. Pull the HugeCTR NGC Docker by running the following command: ```bash - $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Launch the container in interactive mode with the HugeCTR root directory mounted into the container by running the following command: ```bash - $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Build the HugeCTR Docker Container on Your Own ### diff --git a/samples/dlrm/README.md b/samples/dlrm/README.md index deba5b181e..38c1ed6a9b 100644 --- a/samples/dlrm/README.md +++ b/samples/dlrm/README.md @@ -18,11 +18,11 @@ HugeCTR is available as buildable source code, but the easiest way to install an 1. Pull the HugeCTR NGC Docker by running the following command: ```bash - $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Launch the container in interactive mode with the HugeCTR root directory mounted into the container by running the following command: ```bash - $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Build the HugeCTR Docker Container on Your Own ### @@ -86,7 +86,7 @@ As a result, `day_*_sparse_multi_hot.npz` files will be created (24 files in tot Because HugeCTR uses, among others, [raw format](https://nvidia-merlin.github.io/HugeCTR/main/api/python_interface.html#raw) for input data, we need to convert NumPy files created in the preceding steps to this format. To this end, use `preprocessing/convert_to_raw.py` script that comes with the container created in section [Build the container and push to a docker registry](#build-the-container-and-push-to-a-docker-registry) below. ``` -docker run -it --rm --network=host --ipc=host -v /data:/data nvcr.io/nvidia/merlin/merlin-hugectr:23.12 +docker run -it --rm --network=host --ipc=host -v /data:/data nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` In that container, run: ``` diff --git a/samples/ftrl/README.md b/samples/ftrl/README.md index 36f568b18e..b0c7c0e5d1 100644 --- a/samples/ftrl/README.md +++ b/samples/ftrl/README.md @@ -15,11 +15,11 @@ HugeCTR is available as buildable source code, but the easiest way to install an 1. Pull the HugeCTR NGC Docker by running the following command: ```bash - $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Launch the container in interactive mode with the HugeCTR root directory mounted into the container by running the following command: ```bash - $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Build the HugeCTR Docker Container on Your Own ### diff --git a/samples/mmoe/README.md b/samples/mmoe/README.md index 2c482508ab..f7ffd98d46 100644 --- a/samples/mmoe/README.md +++ b/samples/mmoe/README.md @@ -11,11 +11,11 @@ HugeCTR is available as buildable source code, but the easiest way to install an 1. Pull the HugeCTR NGC Docker by running the following command: ```bash - $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Launch the container in interactive mode with the HugeCTR root directory mounted into the container by running the following command: ```bash - $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Build the HugeCTR Docker Container on Your Own ### diff --git a/samples/ncf/README.md b/samples/ncf/README.md index b263eff6b7..5d087c4180 100644 --- a/samples/ncf/README.md +++ b/samples/ncf/README.md @@ -11,11 +11,11 @@ HugeCTR is available as buildable source code, but the easiest way to install an 1. Pull the HugeCTR NGC Docker by running the following command: ```bash - $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Launch the container in interactive mode with the HugeCTR root directory mounted into the container by running the following command: ```bash - $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Build the HugeCTR Docker Container on Your Own ### diff --git a/samples/wdl/README.md b/samples/wdl/README.md index 53cb7ec8d4..272086eb81 100644 --- a/samples/wdl/README.md +++ b/samples/wdl/README.md @@ -11,11 +11,11 @@ HugeCTR is available as buildable source code, but the easiest way to install an 1. Pull the HugeCTR NGC Docker by running the following command: ```bash - $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker pull nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` 2. Launch the container in interactive mode with the HugeCTR root directory mounted into the container by running the following command: ```bash - $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:23.12 + $ docker run --gpus=all --rm -it --cap-add SYS_NICE -u $(id -u):$(id -g) -v $(pwd):/hugectr -w /hugectr nvcr.io/nvidia/merlin/merlin-hugectr:24.04 ``` ### Build the HugeCTR Docker Container on Your Own ### diff --git a/tools/dlrm_script/hash/concurrent_unordered_map.cuh b/tools/dlrm_script/hash/concurrent_unordered_map.cuh index f4e924e19f..2952b76008 100644 --- a/tools/dlrm_script/hash/concurrent_unordered_map.cuh +++ b/tools/dlrm_script/hash/concurrent_unordered_map.cuh @@ -37,7 +37,7 @@ // cudf 22.06 moved the namespace of default_hash to cudf::detail // and renamed CUDA_TRY to CUDF_CUDA_TRY. Handle both of these // so that we can compile against both cudf 22.04 and cudf 22.06+ -// CUDF_GE_2306: from cudf 23.12 moves the new file and the `hash_functions.cuh` +// CUDF_GE_2306: from cudf 23.06 moves the new file and the `hash_functions.cuh` // from `cpp/include/cudf/detail/utilities/` to `cpp/include/cudf/hashing/detail` // The hash functions were redeclared from the `cudf::detail` namespace // to the `cudf::hashing::detail` namespace