Skip to content

Commit

Permalink
chore: bump to spark 3.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
JessicaXYWang authored and mhamilton723 committed Sep 27, 2023
1 parent 21cff04 commit 8808d19
Show file tree
Hide file tree
Showing 21 changed files with 84 additions and 69 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import scala.xml.transform.{RewriteRule, RuleTransformer}
import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _}

val condaEnvName = "synapseml"
val sparkVersion = "3.2.3"
val sparkVersion = "3.3.2"
name := "synapseml"
ThisBuild / organization := "com.microsoft.azure"
ThisBuild / scalaVersion := "2.12.15"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@ object PyCodegen {
// There's `Already borrowed` error found in transformers 4.16.2 when using tokenizers
s"""extras_require={"extras": [
| "cmake",
| "horovod==0.25.0",
| "horovod==0.27.0",
| "pytorch_lightning>=1.5.0,<1.5.10",
| "torch==1.11.0",
| "torchvision>=0.12.0",
| "transformers==4.15.0",
| "torch==1.13.1",
| "torchvision>=0.14.1",
| "transformers==4.32.1",
| "petastorm>=0.12.0",
| "huggingface-hub>=0.8.1",
|]},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ object RTestGen {
| "spark.sql.shuffle.partitions=10",
| "spark.sql.crossJoin.enabled=true")
|
|sc <- spark_connect(master = "local", version = "3.2.4", config = conf)
|sc <- spark_connect(master = "local", version = "3.3.2", config = conf)
|
|""".stripMargin, StandardOpenOption.CREATE)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@ import java.io.File
import scala.collection.mutable.ListBuffer

class DatabricksGPUTests extends DatabricksTestHelper {
val horovodInstallationScript: File = FileUtilities.join(
BuildInfo.baseDirectory.getParent, "deep-learning",
"src", "main", "python", "horovod_installation.sh").getCanonicalFile
uploadFileToDBFS(horovodInstallationScript, "/FileStore/horovod-fix-commit/horovod_installation.sh")
val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 2, GpuPoolId, GPUInitScripts)
val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 2, GpuPoolId, "[]")
val jobIdsToCancel: ListBuffer[Int] = databricksTestHelper(
clusterId, GPULibraries, GPUNotebooks)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ object DatabricksUtilities {

// ADB Info
val Region = "eastus"
val PoolName = "synapseml-build-10.4"
val GpuPoolName = "synapseml-build-10.4-gpu"
val AdbRuntime = "10.4.x-scala2.12"
val AdbGpuRuntime = "10.4.x-gpu-ml-scala2.12"
val PoolName = "synapseml-build-12.2"
val GpuPoolName = "synapseml-build-12.2-gpu"
val AdbRuntime = "12.2.x-scala2.12"
// https://learn.microsoft.com/en-us/azure/databricks/release-notes/runtime/
val AdbGpuRuntime = "12.2.x-gpu-ml-scala2.12"
val NumWorkers = 5
val AutoTerminationMinutes = 15

Expand Down Expand Up @@ -75,8 +76,11 @@ object DatabricksUtilities {
// TODO: install synapse.ml.dl wheel package here
val GPULibraries: String = List(
Map("maven" -> Map("coordinates" -> PackageMavenCoordinate, "repo" -> PackageRepository)),
Map("pypi" -> Map("package" -> "transformers==4.15.0")),
Map("pypi" -> Map("package" -> "petastorm==0.12.0"))
Map("pypi" -> Map("package" -> "pytorch-lightning==1.5.0")),
Map("pypi" -> Map("package" -> "torchvision==0.14.1")),
Map("pypi" -> Map("package" -> "transformers==4.25.1")),
Map("pypi" -> Map("package" -> "petastorm==0.12.1")),
Map("pypi" -> Map("package" -> "protobuf==3.19.4"))
).toJson.compactPrint

val GPUInitScripts: String = List(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ object SynapseUtilities {
| "nodeSizeFamily": "MemoryOptimized",
| "provisioningState": "Succeeded",
| "sessionLevelPackagesEnabled": "true",
| "sparkVersion": "3.2"
| "sparkVersion": "3.3"
| }
|}
|""".stripMargin
Expand Down
15 changes: 6 additions & 9 deletions deep-learning/src/main/python/horovod_installation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ set -eu

# Install prerequisite libraries that horovod depends on
pip install pytorch-lightning==1.5.0
pip install torchvision==0.12.0
pip install transformers==4.15.0
pip install torchvision==0.14.1
pip install transformers==4.25.1
pip install petastorm>=0.12.0
pip install protobuf==3.20.3
pip install protobuf==3.19.1

# Remove Outdated Signing Key:
sudo apt-key del 7fa2af80
Expand All @@ -35,16 +35,13 @@ libcusparse-dev-11-0=11.1.1.245-1

git clone --recursive https://github.com/horovod/horovod.git
cd horovod
# # fix version 0.25.0
# git fetch origin refs/tags/v0.25.0:tags/v0.25.0
# git checkout tags/v0.25.0 -b v0.25.0-branch
# fix to this commit number until they release a new version
git checkout ab97fd15bbba3258adcdd12983f36a1cdeacbc94
# git fetch origin refs/tags/v0.27.0:tags/v0.27.0
git checkout bfaca90d5cf66780a97d8799d4e1573855b64560
git checkout -b tmp-branch
rm -rf build/ dist/
HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_CUDA_HOME=/usr/local/cuda-11/ HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \
/databricks/python3/bin/python setup.py bdist_wheel

readlink -f dist/horovod-*.whl

pip install --no-cache-dir dist/horovod-0.25.0-cp38-cp38-linux_x86_64.whl --force-reinstall --no-deps
pip install --no-cache-dir dist/horovod-0.27.0-cp38-cp38-linux_x86_64.whl --force-reinstall --no-deps
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
if _TRANSFORMERS_AVAILABLE:
import transformers

_TRANSFORMERS_EQUAL_4_15_0 = transformers.__version__ == "4.15.0"
if _TRANSFORMERS_EQUAL_4_15_0:
_TRANSFORMERS_EQUAL_4_25_1 = transformers.__version__ == "4.25.1"
if _TRANSFORMERS_EQUAL_4_25_1:
from transformers import AutoTokenizer
else:
raise RuntimeError(
"transformers should be == 4.15.0, found: {}".format(
"transformers should be == 4.25.1, found: {}".format(
transformers.__version__
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
if _HOROVOD_AVAILABLE:
import horovod

_HOROVOD_EQUAL_0_25_0 = horovod.__version__ == "0.25.0"
if not _HOROVOD_EQUAL_0_25_0:
_HOROVOD_EQUAL_0_27_0 = horovod.__version__ == "0.27.0"
if not _HOROVOD_EQUAL_0_27_0:
raise RuntimeError(
"horovod should be of version 0.25.0, found: {}".format(horovod.__version__)
"horovod should be of version 0.27.0, found: {}".format(horovod.__version__)
)
else:
raise ModuleNotFoundError("module not found: horovod")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
if _TRANSFORMERS_AVAILABLE:
import transformers

_TRANSFORMERS_EQUAL_4_15_0 = transformers.__version__ == "4.15.0"
if _TRANSFORMERS_EQUAL_4_15_0:
_TRANSFORMERS_EQUAL_4_25_1 = transformers.__version__ == "4.25.1"
if _TRANSFORMERS_EQUAL_4_25_1:
from transformers import AutoModelForSequenceClassification
else:
raise RuntimeError(
"transformers should be == 4.15.0, found: {}".format(
"transformers should be == 4.25.1, found: {}".format(
transformers.__version__
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,16 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"%pip install sqlparse raiwidgets interpret-community mlflow==2.6.0"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
"outputs": [],
"source": [
"%pip install sqlparse raiwidgets interpret-community mlflow==2.5.0"
]
},
{
"cell_type": "markdown",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,33 @@
},
{
"cell_type": "markdown",
"source": [
"### Environment Setup on databricks"
],
"metadata": {
"collapsed": false
}
},
"source": [
"### Environment Setup on databricks"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# install cloudpickle 2.0.0 to add synapse module for usage of horovod\n",
"%pip install cloudpickle==2.0.0 --force-reinstall --no-deps"
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install protobuf==3.20.1 --force-reinstall"
]
},
{
"cell_type": "code",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@
"%pip install cloudpickle==2.0.0 --force-reinstall --no-deps"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install protobuf==3.20.1 --force-reinstall"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install hyperopt mlflow"
"%pip install hyperopt mlflow==2.5.0"
]
},
{
Expand Down
12 changes: 6 additions & 6 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ dependencies:
- r-devtools=2.4.2
- pip:
- pyarrow>=0.15.0
- numpy>=1.19.3
- pyspark==3.2.3
- pyspark==3.3.2
- pandas==1.2.5
- wheel
- sphinx==4.2.0
Expand All @@ -32,15 +31,16 @@ dependencies:
- twine
- jupyter
- mlflow
- torch==1.11.0
- torchvision==0.12.0
- horovod==0.25.0
- numpy
- torch==1.13.1
- torchvision==0.14.1
- horovod==0.27.0
- petastorm>=0.11.0
- pytorch_lightning==1.5.0
- onnxmltools==1.7.0
- matplotlib
- Pillow
- transformers==4.15.0
- transformers==4.25.1
- huggingface-hub>=0.8.1
- langchain==0.0.151
- openai==0.27.5
Expand Down
4 changes: 2 additions & 2 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -527,8 +527,8 @@ jobs:
fi
sbt publishM2
SPARK_VERSION=3.2.4
HADOOP_VERSION=3.2
SPARK_VERSION=3.3.2
HADOOP_VERSION=3
wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
(timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR)
- task: PublishTestResults@2
Expand Down
4 changes: 2 additions & 2 deletions start
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash

export OPENMPI_VERSION="3.1.2"
export SPARK_VERSION="3.2.3"
export HADOOP_VERSION="2.7"
export SPARK_VERSION="3.3.2"
export HADOOP_VERSION="3.3"
export SYNAPSEML_VERSION="0.11.3" # Binder compatibility version

echo "Beginning Spark Session..."
Expand Down
4 changes: 2 additions & 2 deletions tools/docker/demo/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
ARG SYNAPSEML_VERSION=0.11.3
ARG DEBIAN_FRONTEND=noninteractive

ENV SPARK_VERSION=3.2.3
ENV HADOOP_VERSION=2.7
ENV SPARK_VERSION=3.3.2
ENV HADOOP_VERSION=3
ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION}
ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64

Expand Down
4 changes: 2 additions & 2 deletions tools/docker/minimal/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
ARG SYNAPSEML_VERSION=0.11.3
ARG DEBIAN_FRONTEND=noninteractive

ENV SPARK_VERSION=3.2.3
ENV HADOOP_VERSION=2.7
ENV SPARK_VERSION=3.3.2
ENV HADOOP_VERSION=3
ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION}
ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64

Expand Down
8 changes: 4 additions & 4 deletions tools/dotnet/dotnetSetup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ echo "##vso[task.setvariable variable=DOTNET_WORKER_DIR]$DOTNET_WORKER_DIR"
# Install Sleet
dotnet tool install -g sleet

# Install Apache Spark-3.2
curl https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2.tgz -o spark-3.2.0-bin-hadoop3.2.tgz
# Install Apache Spark-3.3
curl https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz -o spark-3.3.2-bin-hadoop3.tgz
mkdir ~/bin
tar -xzvf spark-3.2.0-bin-hadoop3.2.tgz -C ~/bin
export SPARK_HOME=~/bin/spark-3.2.0-bin-hadoop3.2/
tar -xzvf spark-3.3.2-bin-hadoop3.tgz -C ~/bin
export SPARK_HOME=~/bin/spark-3.3.2-bin-hadoop3/
export PATH=$SPARK_HOME/bin:$PATH
echo "##vso[task.setvariable variable=SPARK_HOME]$SPARK_HOME"
echo "##vso[task.setvariable variable=PATH]$SPARK_HOME/bin:$PATH"
2 changes: 1 addition & 1 deletion tools/tests/run_r_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ if (!require("sparklyr")) {
library("sparklyr")
}

spark_install_tar(paste(getwd(), "/../../../../../../spark-3.2.4-bin-hadoop3.2.tgz", sep = ""))
spark_install_tar(paste(getwd(), "/../../../../../../spark-3.3.2-bin-hadoop3.tgz", sep = ""))

options("testthat.output_file" = "../../../../r-test-results.xml")
devtools::test(reporter = JunitReporter$new())

0 comments on commit 8808d19

Please sign in to comment.