diff --git a/build.sbt b/build.sbt index 8ff8b4c4e1..af3232b6a6 100644 --- a/build.sbt +++ b/build.sbt @@ -8,7 +8,7 @@ import scala.xml.transform.{RewriteRule, RuleTransformer} import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _} val condaEnvName = "synapseml" -val sparkVersion = "3.3.1" +val sparkVersion = "3.3.2" name := "synapseml" ThisBuild / organization := "com.microsoft.azure" ThisBuild / scalaVersion := "2.12.15" diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala index d468df2da4..be19cc81a2 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala @@ -68,7 +68,7 @@ object PyCodegen { // There's `Already borrowed` error found in transformers 4.16.2 when using tokenizers s"""extras_require={"extras": [ | "cmake", - | "horovod==0.28.1", + | "horovod==0.27.0", | "pytorch_lightning>=1.5.0,<1.5.10", | "torch==1.13.1", | "torchvision>=0.14.1", diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksGPUTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksGPUTests.scala index be308c7af7..d99ac4a672 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksGPUTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksGPUTests.scala @@ -11,11 +11,7 @@ import java.io.File import scala.collection.mutable.ListBuffer class DatabricksGPUTests extends DatabricksTestHelper { - val horovodInstallationScript: File = FileUtilities.join( - BuildInfo.baseDirectory.getParent, "deep-learning", - "src", "main", "python", "horovod_installation.sh").getCanonicalFile - uploadFileToDBFS(horovodInstallationScript, "/FileStore/horovod-fix-commit/horovod_installation.sh") - val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 2, GpuPoolId, GPUInitScripts) + val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 2, GpuPoolId, "[]") val jobIdsToCancel: ListBuffer[Int] = databricksTestHelper( clusterId, GPULibraries, GPUNotebooks) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala index 9749afc0f2..678c3fa4ee 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala @@ -29,11 +29,11 @@ object DatabricksUtilities { // ADB Info val Region = "eastus" - val PoolName = "synapseml-build-11.2" - val GpuPoolName = "synapseml-build-11.2-gpu" - val AdbRuntime = "11.2.x-scala2.12" - // https://learn.microsoft.com/en-us/azure/databricks/release-notes/runtime/11.2 - val AdbGpuRuntime = "11.2.x-gpu-ml-scala2.12" + val PoolName = "synapseml-build-12.2" + val GpuPoolName = "synapseml-build-12.2-gpu" + val AdbRuntime = "12.2.x-scala2.12" + // https://learn.microsoft.com/en-us/azure/databricks/release-notes/runtime/ + val AdbGpuRuntime = "12.2.x-gpu-ml-scala2.12" val NumWorkers = 5 val AutoTerminationMinutes = 15 @@ -77,10 +77,10 @@ object DatabricksUtilities { val GPULibraries: String = List( Map("maven" -> Map("coordinates" -> PackageMavenCoordinate, "repo" -> PackageRepository)), Map("pypi" -> Map("package" -> "pytorch-lightning==1.5.0")), - Map("pypi" -> Map("package" -> "torchvision==0.12.0")), - Map("pypi" -> Map("package" -> "transformers==4.32.1")), - Map("pypi" -> Map("package" -> "petastorm==0.12.0")), - Map("pypi" -> Map("package" -> "protobuf==3.20.3")) + Map("pypi" -> Map("package" -> "torchvision==0.14.1")), + Map("pypi" -> Map("package" -> "transformers==4.25.1")), + Map("pypi" -> Map("package" -> "petastorm==0.12.1")), + Map("pypi" -> Map("package" -> "protobuf==3.19.4")) ).toJson.compactPrint val GPUInitScripts: String = List( diff --git a/deep-learning/src/main/python/horovod_installation.sh b/deep-learning/src/main/python/horovod_installation.sh index 7b41421c3f..8bd5f19c02 100644 --- a/deep-learning/src/main/python/horovod_installation.sh +++ b/deep-learning/src/main/python/horovod_installation.sh @@ -7,10 +7,10 @@ set -eu # Install prerequisite libraries that horovod depends on pip install pytorch-lightning==1.5.0 -pip install torchvision==0.12.0 -pip install transformers==4.15.0 +pip install torchvision==0.14.1 +pip install transformers==4.25.1 pip install petastorm>=0.12.0 -pip install protobuf==3.20.3 +pip install protobuf==3.19.1 # Remove Outdated Signing Key: sudo apt-key del 7fa2af80 @@ -35,8 +35,8 @@ libcusparse-dev-11-0=11.1.1.245-1 git clone --recursive https://github.com/horovod/horovod.git cd horovod -# git fetch origin refs/tags/v0.28.1:tags/v0.28.1 -git checkout 1d217b59949986d025f6db93c49943fb6b6cc78f +# git fetch origin refs/tags/v0.27.0:tags/v0.27.0 +git checkout bfaca90d5cf66780a97d8799d4e1573855b64560 git checkout -b tmp-branch rm -rf build/ dist/ HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_CUDA_HOME=/usr/local/cuda-11/ HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \ @@ -44,4 +44,4 @@ HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_CUDA_HOME=/usr/local/cuda-11/ HOROVOD_WITH_PY readlink -f dist/horovod-*.whl -pip install --no-cache-dir dist/horovod-0.28.1-cp38-cp38-linux_x86_64.whl --force-reinstall --no-deps +pip install --no-cache-dir dist/horovod-0.27.0-cp38-cp38-linux_x86_64.whl --force-reinstall --no-deps diff --git a/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py b/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py index 0702fc828b..b6f3bc81dc 100644 --- a/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py +++ b/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py @@ -11,12 +11,12 @@ if _TRANSFORMERS_AVAILABLE: import transformers - _TRANSFORMERS_EQUAL_4_15_0 = transformers.__version__ == "4.15.0" - if _TRANSFORMERS_EQUAL_4_15_0: + _TRANSFORMERS_EQUAL_4_25_1 = transformers.__version__ == "4.25.1" + if _TRANSFORMERS_EQUAL_4_25_1: from transformers import AutoTokenizer else: raise RuntimeError( - "transformers should be == 4.15.0, found: {}".format( + "transformers should be == 4.25.1, found: {}".format( transformers.__version__ ) ) diff --git a/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py b/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py index 0e10afe862..59dee2b5c9 100644 --- a/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py +++ b/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py @@ -19,10 +19,10 @@ if _HOROVOD_AVAILABLE: import horovod - _HOROVOD_EQUAL_0_28_1 = horovod.__version__ == "0.28.1" - if not _HOROVOD_EQUAL_0_28_1: + _HOROVOD_EQUAL_0_27_0 = horovod.__version__ == "0.27.0" + if not _HOROVOD_EQUAL_0_27_0: raise RuntimeError( - "horovod should be of version 0.28.1, found: {}".format(horovod.__version__) + "horovod should be of version 0.27.0, found: {}".format(horovod.__version__) ) else: raise ModuleNotFoundError("module not found: horovod") diff --git a/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py b/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py index 2283281c0b..b17b9f5f18 100644 --- a/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py +++ b/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py @@ -13,12 +13,12 @@ if _TRANSFORMERS_AVAILABLE: import transformers - _TRANSFORMERS_EQUAL_4_15_0 = transformers.__version__ == "4.15.0" - if _TRANSFORMERS_EQUAL_4_15_0: + _TRANSFORMERS_EQUAL_4_25_1 = transformers.__version__ == "4.25.1" + if _TRANSFORMERS_EQUAL_4_25_1: from transformers import AutoModelForSequenceClassification else: raise RuntimeError( - "transformers should be == 4.15.0, found: {}".format( + "transformers should be == 4.25.1, found: {}".format( transformers.__version__ ) ) diff --git a/environment.yml b/environment.yml index 5c61ae0378..729fdeae30 100644 --- a/environment.yml +++ b/environment.yml @@ -11,7 +11,7 @@ dependencies: - r-devtools=2.4.2 - pip: - pyarrow>=0.15.0 - - pyspark==3.3.1 + - pyspark==3.3.2 - pandas==1.2.5 - wheel - sphinx==4.2.0 @@ -31,16 +31,16 @@ dependencies: - twine - jupyter - mlflow - - numpy==1.23.0 - - torch==1.11.0 - - torchvision==0.12.0 - - horovod==0.25.0 + - numpy + - torch==1.13.1 + - torchvision==0.14.1 + - horovod==0.27.0 - petastorm>=0.11.0 - pytorch_lightning==1.5.0 - onnxmltools==1.7.0 - matplotlib - Pillow - - transformers==4.15.0 + - transformers==4.25.1 - huggingface-hub>=0.8.1 - langchain==0.0.151 - openai==0.27.5 diff --git a/pipeline.yaml b/pipeline.yaml index 55c3f27ecf..7bff95ea25 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -511,7 +511,7 @@ jobs: fi sbt publishM2 - SPARK_VERSION=3.3.1 + SPARK_VERSION=3.3.2 HADOOP_VERSION=3 wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz (timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR) diff --git a/start b/start index b8ba0e8621..6265989f8f 100644 --- a/start +++ b/start @@ -1,7 +1,7 @@ #!/bin/bash export OPENMPI_VERSION="3.1.2" -export SPARK_VERSION="3.3.1" +export SPARK_VERSION="3.3.2" export HADOOP_VERSION="2.7" export SYNAPSEML_VERSION="0.11.2" # Binder compatibility version diff --git a/tools/docker/demo/Dockerfile b/tools/docker/demo/Dockerfile index c0eeabbfd3..16b4ebd0d5 100644 --- a/tools/docker/demo/Dockerfile +++ b/tools/docker/demo/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 ARG SYNAPSEML_VERSION=0.11.2 ARG DEBIAN_FRONTEND=noninteractive -ENV SPARK_VERSION=3.3.1 +ENV SPARK_VERSION=3.3.2 ENV HADOOP_VERSION=3 ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION} ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64 diff --git a/tools/docker/minimal/Dockerfile b/tools/docker/minimal/Dockerfile index 9d9845051b..e64269652d 100644 --- a/tools/docker/minimal/Dockerfile +++ b/tools/docker/minimal/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 ARG SYNAPSEML_VERSION=0.11.2 ARG DEBIAN_FRONTEND=noninteractive -ENV SPARK_VERSION=3.3.1 +ENV SPARK_VERSION=3.3.2 ENV HADOOP_VERSION=3 ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION} ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64 diff --git a/tools/dotnet/dotnetSetup.sh b/tools/dotnet/dotnetSetup.sh index 297d37310a..1c7d732fec 100644 --- a/tools/dotnet/dotnetSetup.sh +++ b/tools/dotnet/dotnetSetup.sh @@ -21,10 +21,10 @@ echo "##vso[task.setvariable variable=DOTNET_WORKER_DIR]$DOTNET_WORKER_DIR" dotnet tool install -g sleet # Install Apache Spark-3.3 -curl https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz -o spark-3.3.1-bin-hadoop3.tgz +curl https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz -o spark-3.3.2-bin-hadoop3.tgz mkdir ~/bin -tar -xzvf spark-3.3.1-bin-hadoop3.tgz -C ~/bin -export SPARK_HOME=~/bin/spark-3.3.1-bin-hadoop3/ +tar -xzvf spark-3.3.2-bin-hadoop3.tgz -C ~/bin +export SPARK_HOME=~/bin/spark-3.3.2-bin-hadoop3/ export PATH=$SPARK_HOME/bin:$PATH echo "##vso[task.setvariable variable=SPARK_HOME]$SPARK_HOME" echo "##vso[task.setvariable variable=PATH]$SPARK_HOME/bin:$PATH" diff --git a/tools/tests/run_r_tests.R b/tools/tests/run_r_tests.R index 26becbcd5e..9c8f99a40f 100644 --- a/tools/tests/run_r_tests.R +++ b/tools/tests/run_r_tests.R @@ -3,7 +3,7 @@ if (!require("sparklyr")) { library("sparklyr") } -spark_install_tar(paste(getwd(), "/../../../../../../spark-3.3.1-bin-hadoop3.tgz", sep = "")) +spark_install_tar(paste(getwd(), "/../../../../../../spark-3.3.2-bin-hadoop3.tgz", sep = "")) options("testthat.output_file" = "../../../../r-test-results.xml") devtools::test(reporter = JunitReporter$new())