diff --git a/build.sbt b/build.sbt index 09aac567ac..82d50a9bd6 100644 --- a/build.sbt +++ b/build.sbt @@ -8,7 +8,7 @@ import scala.xml.transform.{RewriteRule, RuleTransformer} import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _} val condaEnvName = "synapseml" -val sparkVersion = "3.2.3" +val sparkVersion = "3.3.2" name := "synapseml" ThisBuild / organization := "com.microsoft.azure" ThisBuild / scalaVersion := "2.12.15" diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala index f6fd86e438..be19cc81a2 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala @@ -68,11 +68,11 @@ object PyCodegen { // There's `Already borrowed` error found in transformers 4.16.2 when using tokenizers s"""extras_require={"extras": [ | "cmake", - | "horovod==0.25.0", + | "horovod==0.27.0", | "pytorch_lightning>=1.5.0,<1.5.10", - | "torch==1.11.0", - | "torchvision>=0.12.0", - | "transformers==4.15.0", + | "torch==1.13.1", + | "torchvision>=0.14.1", + | "transformers==4.32.1", | "petastorm>=0.12.0", | "huggingface-hub>=0.8.1", |]}, diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/RTestGen.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/RTestGen.scala index 8e3fd1e85f..0b129e180f 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/RTestGen.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/RTestGen.scala @@ -101,7 +101,7 @@ object RTestGen { | "spark.sql.shuffle.partitions=10", | "spark.sql.crossJoin.enabled=true") | - |sc <- spark_connect(master = "local", version = "3.2.4", config = conf) + |sc <- spark_connect(master = "local", version = "3.3.2", config = conf) | |""".stripMargin, StandardOpenOption.CREATE) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksGPUTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksGPUTests.scala index be308c7af7..d99ac4a672 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksGPUTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksGPUTests.scala @@ -11,11 +11,7 @@ import java.io.File import scala.collection.mutable.ListBuffer class DatabricksGPUTests extends DatabricksTestHelper { - val horovodInstallationScript: File = FileUtilities.join( - BuildInfo.baseDirectory.getParent, "deep-learning", - "src", "main", "python", "horovod_installation.sh").getCanonicalFile - uploadFileToDBFS(horovodInstallationScript, "/FileStore/horovod-fix-commit/horovod_installation.sh") - val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 2, GpuPoolId, GPUInitScripts) + val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 2, GpuPoolId, "[]") val jobIdsToCancel: ListBuffer[Int] = databricksTestHelper( clusterId, GPULibraries, GPUNotebooks) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala index 43f5203324..678c3fa4ee 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala @@ -29,10 +29,11 @@ object DatabricksUtilities { // ADB Info val Region = "eastus" - val PoolName = "synapseml-build-10.4" - val GpuPoolName = "synapseml-build-10.4-gpu" - val AdbRuntime = "10.4.x-scala2.12" - val AdbGpuRuntime = "10.4.x-gpu-ml-scala2.12" + val PoolName = "synapseml-build-12.2" + val GpuPoolName = "synapseml-build-12.2-gpu" + val AdbRuntime = "12.2.x-scala2.12" + // https://learn.microsoft.com/en-us/azure/databricks/release-notes/runtime/ + val AdbGpuRuntime = "12.2.x-gpu-ml-scala2.12" val NumWorkers = 5 val AutoTerminationMinutes = 15 @@ -75,8 +76,11 @@ object DatabricksUtilities { // TODO: install synapse.ml.dl wheel package here val GPULibraries: String = List( Map("maven" -> Map("coordinates" -> PackageMavenCoordinate, "repo" -> PackageRepository)), - Map("pypi" -> Map("package" -> "transformers==4.15.0")), - Map("pypi" -> Map("package" -> "petastorm==0.12.0")) + Map("pypi" -> Map("package" -> "pytorch-lightning==1.5.0")), + Map("pypi" -> Map("package" -> "torchvision==0.14.1")), + Map("pypi" -> Map("package" -> "transformers==4.25.1")), + Map("pypi" -> Map("package" -> "petastorm==0.12.1")), + Map("pypi" -> Map("package" -> "protobuf==3.19.4")) ).toJson.compactPrint val GPUInitScripts: String = List( diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala index 317218c08d..478e829d79 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala @@ -255,7 +255,7 @@ object SynapseUtilities { | "nodeSizeFamily": "MemoryOptimized", | "provisioningState": "Succeeded", | "sessionLevelPackagesEnabled": "true", - | "sparkVersion": "3.2" + | "sparkVersion": "3.3" | } |} |""".stripMargin diff --git a/deep-learning/src/main/python/horovod_installation.sh b/deep-learning/src/main/python/horovod_installation.sh index b983be0dad..8bd5f19c02 100644 --- a/deep-learning/src/main/python/horovod_installation.sh +++ b/deep-learning/src/main/python/horovod_installation.sh @@ -7,10 +7,10 @@ set -eu # Install prerequisite libraries that horovod depends on pip install pytorch-lightning==1.5.0 -pip install torchvision==0.12.0 -pip install transformers==4.15.0 +pip install torchvision==0.14.1 +pip install transformers==4.25.1 pip install petastorm>=0.12.0 -pip install protobuf==3.20.3 +pip install protobuf==3.19.1 # Remove Outdated Signing Key: sudo apt-key del 7fa2af80 @@ -35,11 +35,8 @@ libcusparse-dev-11-0=11.1.1.245-1 git clone --recursive https://github.com/horovod/horovod.git cd horovod -# # fix version 0.25.0 -# git fetch origin refs/tags/v0.25.0:tags/v0.25.0 -# git checkout tags/v0.25.0 -b v0.25.0-branch -# fix to this commit number until they release a new version -git checkout ab97fd15bbba3258adcdd12983f36a1cdeacbc94 +# git fetch origin refs/tags/v0.27.0:tags/v0.27.0 +git checkout bfaca90d5cf66780a97d8799d4e1573855b64560 git checkout -b tmp-branch rm -rf build/ dist/ HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_CUDA_HOME=/usr/local/cuda-11/ HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \ @@ -47,4 +44,4 @@ HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_CUDA_HOME=/usr/local/cuda-11/ HOROVOD_WITH_PY readlink -f dist/horovod-*.whl -pip install --no-cache-dir dist/horovod-0.25.0-cp38-cp38-linux_x86_64.whl --force-reinstall --no-deps +pip install --no-cache-dir dist/horovod-0.27.0-cp38-cp38-linux_x86_64.whl --force-reinstall --no-deps diff --git a/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py b/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py index 0702fc828b..b6f3bc81dc 100644 --- a/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py +++ b/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py @@ -11,12 +11,12 @@ if _TRANSFORMERS_AVAILABLE: import transformers - _TRANSFORMERS_EQUAL_4_15_0 = transformers.__version__ == "4.15.0" - if _TRANSFORMERS_EQUAL_4_15_0: + _TRANSFORMERS_EQUAL_4_25_1 = transformers.__version__ == "4.25.1" + if _TRANSFORMERS_EQUAL_4_25_1: from transformers import AutoTokenizer else: raise RuntimeError( - "transformers should be == 4.15.0, found: {}".format( + "transformers should be == 4.25.1, found: {}".format( transformers.__version__ ) ) diff --git a/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py b/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py index 2968fbd7a8..59dee2b5c9 100644 --- a/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py +++ b/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py @@ -19,10 +19,10 @@ if _HOROVOD_AVAILABLE: import horovod - _HOROVOD_EQUAL_0_25_0 = horovod.__version__ == "0.25.0" - if not _HOROVOD_EQUAL_0_25_0: + _HOROVOD_EQUAL_0_27_0 = horovod.__version__ == "0.27.0" + if not _HOROVOD_EQUAL_0_27_0: raise RuntimeError( - "horovod should be of version 0.25.0, found: {}".format(horovod.__version__) + "horovod should be of version 0.27.0, found: {}".format(horovod.__version__) ) else: raise ModuleNotFoundError("module not found: horovod") diff --git a/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py b/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py index 2283281c0b..b17b9f5f18 100644 --- a/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py +++ b/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py @@ -13,12 +13,12 @@ if _TRANSFORMERS_AVAILABLE: import transformers - _TRANSFORMERS_EQUAL_4_15_0 = transformers.__version__ == "4.15.0" - if _TRANSFORMERS_EQUAL_4_15_0: + _TRANSFORMERS_EQUAL_4_25_1 = transformers.__version__ == "4.25.1" + if _TRANSFORMERS_EQUAL_4_25_1: from transformers import AutoModelForSequenceClassification else: raise RuntimeError( - "transformers should be == 4.15.0, found: {}".format( + "transformers should be == 4.25.1, found: {}".format( transformers.__version__ ) ) diff --git a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb index 3de3075f50..50e8cf6414 100644 --- a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb +++ b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb @@ -30,16 +30,16 @@ { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "%pip install sqlparse raiwidgets interpret-community mlflow==2.6.0" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "%pip install sqlparse raiwidgets interpret-community mlflow==2.5.0" + ] }, { "cell_type": "markdown", diff --git a/docs/Explore Algorithms/Deep Learning/Quickstart - Fine-tune a Text Classifier.ipynb b/docs/Explore Algorithms/Deep Learning/Quickstart - Fine-tune a Text Classifier.ipynb index f730d14b58..8098b70162 100644 --- a/docs/Explore Algorithms/Deep Learning/Quickstart - Fine-tune a Text Classifier.ipynb +++ b/docs/Explore Algorithms/Deep Learning/Quickstart - Fine-tune a Text Classifier.ipynb @@ -16,24 +16,33 @@ }, { "cell_type": "markdown", - "source": [ - "### Environment Setup on databricks" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Environment Setup on databricks" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "# install cloudpickle 2.0.0 to add synapse module for usage of horovod\n", "%pip install cloudpickle==2.0.0 --force-reinstall --no-deps" - ], - "metadata": { - "collapsed": false - } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install protobuf==3.20.1 --force-reinstall" + ] }, { "cell_type": "code", diff --git a/docs/Explore Algorithms/Deep Learning/Quickstart - Fine-tune a Vision Classifier.ipynb b/docs/Explore Algorithms/Deep Learning/Quickstart - Fine-tune a Vision Classifier.ipynb index a6e0930399..129ce05e5c 100644 --- a/docs/Explore Algorithms/Deep Learning/Quickstart - Fine-tune a Vision Classifier.ipynb +++ b/docs/Explore Algorithms/Deep Learning/Quickstart - Fine-tune a Vision Classifier.ipynb @@ -25,6 +25,15 @@ "%pip install cloudpickle==2.0.0 --force-reinstall --no-deps" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install protobuf==3.20.1 --force-reinstall" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/docs/Explore Algorithms/Hyperparameter Tuning/HyperOpt.ipynb b/docs/Explore Algorithms/Hyperparameter Tuning/HyperOpt.ipynb index 808f3c1488..d97c718ce7 100644 --- a/docs/Explore Algorithms/Hyperparameter Tuning/HyperOpt.ipynb +++ b/docs/Explore Algorithms/Hyperparameter Tuning/HyperOpt.ipynb @@ -31,7 +31,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install hyperopt mlflow" + "%pip install hyperopt mlflow==2.5.0" ] }, { diff --git a/environment.yml b/environment.yml index 9dac854ab7..729fdeae30 100644 --- a/environment.yml +++ b/environment.yml @@ -11,8 +11,7 @@ dependencies: - r-devtools=2.4.2 - pip: - pyarrow>=0.15.0 - - numpy>=1.19.3 - - pyspark==3.2.3 + - pyspark==3.3.2 - pandas==1.2.5 - wheel - sphinx==4.2.0 @@ -32,15 +31,16 @@ dependencies: - twine - jupyter - mlflow - - torch==1.11.0 - - torchvision==0.12.0 - - horovod==0.25.0 + - numpy + - torch==1.13.1 + - torchvision==0.14.1 + - horovod==0.27.0 - petastorm>=0.11.0 - pytorch_lightning==1.5.0 - onnxmltools==1.7.0 - matplotlib - Pillow - - transformers==4.15.0 + - transformers==4.25.1 - huggingface-hub>=0.8.1 - langchain==0.0.151 - openai==0.27.5 diff --git a/pipeline.yaml b/pipeline.yaml index c2dc70c165..9cf2f31d37 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -527,8 +527,8 @@ jobs: fi sbt publishM2 - SPARK_VERSION=3.2.4 - HADOOP_VERSION=3.2 + SPARK_VERSION=3.3.2 + HADOOP_VERSION=3 wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz (timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR) - task: PublishTestResults@2 diff --git a/start b/start index 997da4c4be..6c4bf1d117 100644 --- a/start +++ b/start @@ -1,8 +1,8 @@ #!/bin/bash export OPENMPI_VERSION="3.1.2" -export SPARK_VERSION="3.2.3" -export HADOOP_VERSION="2.7" +export SPARK_VERSION="3.3.2" +export HADOOP_VERSION="3.3" export SYNAPSEML_VERSION="0.11.3" # Binder compatibility version echo "Beginning Spark Session..." diff --git a/tools/docker/demo/Dockerfile b/tools/docker/demo/Dockerfile index 8d11a96998..512408dabf 100644 --- a/tools/docker/demo/Dockerfile +++ b/tools/docker/demo/Dockerfile @@ -3,8 +3,8 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 ARG SYNAPSEML_VERSION=0.11.3 ARG DEBIAN_FRONTEND=noninteractive -ENV SPARK_VERSION=3.2.3 -ENV HADOOP_VERSION=2.7 +ENV SPARK_VERSION=3.3.2 +ENV HADOOP_VERSION=3 ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION} ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64 diff --git a/tools/docker/minimal/Dockerfile b/tools/docker/minimal/Dockerfile index 4d8c5cfa86..32213d9213 100644 --- a/tools/docker/minimal/Dockerfile +++ b/tools/docker/minimal/Dockerfile @@ -3,8 +3,8 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 ARG SYNAPSEML_VERSION=0.11.3 ARG DEBIAN_FRONTEND=noninteractive -ENV SPARK_VERSION=3.2.3 -ENV HADOOP_VERSION=2.7 +ENV SPARK_VERSION=3.3.2 +ENV HADOOP_VERSION=3 ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION} ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64 diff --git a/tools/dotnet/dotnetSetup.sh b/tools/dotnet/dotnetSetup.sh index 1244caf479..1c7d732fec 100644 --- a/tools/dotnet/dotnetSetup.sh +++ b/tools/dotnet/dotnetSetup.sh @@ -20,11 +20,11 @@ echo "##vso[task.setvariable variable=DOTNET_WORKER_DIR]$DOTNET_WORKER_DIR" # Install Sleet dotnet tool install -g sleet -# Install Apache Spark-3.2 -curl https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2.tgz -o spark-3.2.0-bin-hadoop3.2.tgz +# Install Apache Spark-3.3 +curl https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz -o spark-3.3.2-bin-hadoop3.tgz mkdir ~/bin -tar -xzvf spark-3.2.0-bin-hadoop3.2.tgz -C ~/bin -export SPARK_HOME=~/bin/spark-3.2.0-bin-hadoop3.2/ +tar -xzvf spark-3.3.2-bin-hadoop3.tgz -C ~/bin +export SPARK_HOME=~/bin/spark-3.3.2-bin-hadoop3/ export PATH=$SPARK_HOME/bin:$PATH echo "##vso[task.setvariable variable=SPARK_HOME]$SPARK_HOME" echo "##vso[task.setvariable variable=PATH]$SPARK_HOME/bin:$PATH" diff --git a/tools/tests/run_r_tests.R b/tools/tests/run_r_tests.R index 0d66844fef..9c8f99a40f 100644 --- a/tools/tests/run_r_tests.R +++ b/tools/tests/run_r_tests.R @@ -3,7 +3,7 @@ if (!require("sparklyr")) { library("sparklyr") } -spark_install_tar(paste(getwd(), "/../../../../../../spark-3.2.4-bin-hadoop3.2.tgz", sep = "")) +spark_install_tar(paste(getwd(), "/../../../../../../spark-3.3.2-bin-hadoop3.tgz", sep = "")) options("testthat.output_file" = "../../../../r-test-results.xml") devtools::test(reporter = JunitReporter$new())