From d3bbd47bf24ecba60f47ded451af62957b73f283 Mon Sep 17 00:00:00 2001 From: Brendan Walsh Date: Wed, 10 Apr 2024 12:06:27 -0700 Subject: [PATCH 1/2] chore: downgrade to spark3.3 --- build.sbt | 15 +++++---------- .../azure/synapse/ml/codegen/PyCodegen.scala | 2 +- .../synapse/ml/core/env/PackageUtils.scala | 2 +- .../DistributionBalanceMeasure.scala | 3 +-- .../azure/synapse/ml/nn/BallTree.scala | 15 ++++++++------- .../recommendation/RecommendationHelper.scala | 13 +++++-------- .../azure/synapse/ml/codegen/RTestGen.scala | 2 +- .../ml/nbtest/DatabricksUtilities.scala | 16 ++++++++-------- .../SynapseExtensionUtilities.scala | 2 +- .../synapse/ml/nbtest/SynapseUtilities.scala | 2 +- .../src/main/python/horovod_installation.sh | 10 +++++----- .../python/synapse/ml/dl/DeepTextClassifier.py | 6 +++--- .../synapse/ml/dl/DeepVisionClassifier.py | 6 +++--- .../python/synapse/ml/dl/LitDeepTextModel.py | 6 +++--- environment.yml | 18 +++++++++--------- pipeline.yaml | 2 +- project/plugins.sbt | 8 ++------ start | 3 +-- tools/docker/demo/Dockerfile | 2 +- tools/docker/minimal/Dockerfile | 2 +- tools/tests/run_r_tests.R | 2 +- 21 files changed, 62 insertions(+), 75 deletions(-) diff --git a/build.sbt b/build.sbt index 0b6f9f6f79..e24e30a7c2 100644 --- a/build.sbt +++ b/build.sbt @@ -7,10 +7,10 @@ import scala.xml.transform.{RewriteRule, RuleTransformer} import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _} val condaEnvName = "synapseml" -val sparkVersion = "3.4.1" +val sparkVersion = "3.3.3" name := "synapseml" ThisBuild / organization := "com.microsoft.azure" -ThisBuild / scalaVersion := "2.12.17" +ThisBuild / scalaVersion := "2.12.15" val scalaMajorVersion = 2.12 @@ -20,27 +20,22 @@ val excludes = Seq( ) val coreDependencies = Seq( - // Excluding protobuf-java, as spark-core is bringing the older version transitively. - "org.apache.spark" %% "spark-core" % sparkVersion % "compile" exclude("com.google.protobuf", "protobuf-java"), + "org.apache.spark" %% "spark-core" % sparkVersion % "compile", "org.apache.spark" %% "spark-mllib" % sparkVersion % "compile", - "org.apache.spark" %% "spark-avro" % sparkVersion % "compile", + "org.apache.spark" %% "spark-avro" % sparkVersion % "provided", "org.apache.spark" %% "spark-tags" % sparkVersion % "test", "com.globalmentor" % "hadoop-bare-naked-local-fs" % "0.1.0" % "test", "org.scalatest" %% "scalatest" % "3.2.14" % "test") val extraDependencies = Seq( - "commons-lang" % "commons-lang" % "2.6", "org.scalactic" %% "scalactic" % "3.2.14", "io.spray" %% "spray-json" % "1.3.5", "com.jcraft" % "jsch" % "0.1.54", "org.apache.httpcomponents.client5" % "httpclient5" % "5.1.3", "org.apache.httpcomponents" % "httpmime" % "4.5.13", - "com.linkedin.isolation-forest" %% "isolation-forest_3.4.2" % "3.0.4" + "com.linkedin.isolation-forest" %% "isolation-forest_3.3.3" % "3.0.4" exclude("com.google.protobuf", "protobuf-java") exclude("org.apache.spark", "spark-mllib_2.12") exclude("org.apache.spark", "spark-core_2.12") exclude("org.apache.spark", "spark-avro_2.12") exclude("org.apache.spark", "spark-sql_2.12"), - // Although breeze 2.1.0 is already provided by Spark, this is needed for Azure Synapse Spark 3.4 pools. - // Otherwise a NoSuchMethodError will be thrown by interpretability code. - "org.scalanlp" %% "breeze" % "2.1.0" ).map(d => d excludeAll (excludes: _*)) val dependencies = coreDependencies ++ extraDependencies diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala index 425d7314f6..6c50f43ea7 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/PyCodegen.scala @@ -70,7 +70,7 @@ object PyCodegen { // There's `Already borrowed` error found in transformers 4.16.2 when using tokenizers s"""extras_require={"extras": [ | "cmake", - | "horovod==0.28.1", + | "horovod==0.27.0", | "pytorch_lightning>=1.5.0,<1.5.10", | "torch==1.13.1", | "torchvision>=0.14.1", diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala index bc7d07087e..6fcbb7429e 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala @@ -21,7 +21,7 @@ object PackageUtils { // Use a fixed version for local testing // val PackageMavenCoordinate = s"$PackageGroup:$PackageName:1.0.5" - private val AvroCoordinate = "org.apache.spark:spark-avro_2.12:3.4.1" + private val AvroCoordinate = "org.apache.spark:spark-avro_2.12:3.3.3" val PackageRepository: String = SparkMLRepository // If testing onnx package with snapshots repo, make sure to switch to using diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/exploratory/DistributionBalanceMeasure.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/exploratory/DistributionBalanceMeasure.scala index 190f7e8ca2..571fe043aa 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/exploratory/DistributionBalanceMeasure.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/exploratory/DistributionBalanceMeasure.scala @@ -3,7 +3,7 @@ package com.microsoft.azure.synapse.ml.exploratory -import breeze.stats.distributions.{ChiSquared, RandBasis} +import breeze.stats.distributions.{ChiSquared} import com.microsoft.azure.synapse.ml.codegen.Wrappable import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions import com.microsoft.azure.synapse.ml.logging.{FeatureNames, SynapseMLLogging} @@ -261,7 +261,6 @@ private[exploratory] case class DistributionMetrics(numFeatures: Int, // Calculates left-tailed p-value from degrees of freedom and chi-squared test statistic def chiSquaredPValue: Column = { - implicit val rand: RandBasis = RandBasis.mt0 val degOfFreedom = numFeatures - 1 val scoreCol = chiSquaredTestStatistic val chiSqPValueUdf = udf( diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BallTree.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BallTree.scala index 9f4435afe7..fd2451692f 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BallTree.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BallTree.scala @@ -8,6 +8,7 @@ import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.using import java.io._ import scala.collection.JavaConverters._ +import breeze.linalg.functions.euclideanDistance private case class Query(point: DenseVector[Double], normOfQueryPoint: Double, @@ -100,16 +101,16 @@ trait BallTreeBase[V] { } /** Performs fast lookups of nearest neighbors using the Ball Tree algorithm for space partitioning - * - * Note that this code borrows heavily from - * https://github.com/felixmaximilian/mips - * - * @author Felix Maximilian - */ + * + * Note that this code borrows heavily from + * https://github.com/felixmaximilian/mips + * + * @author Felix Maximilian + */ case class BallTree[V](override val keys: IndexedSeq[DenseVector[Double]], override val values: IndexedSeq[V], override val leafSize: Int = 50) //scalastyle:ignore magic.number - extends Serializable with BallTreeBase[V] { + extends Serializable with BallTreeBase[V] { private val root: Node = makeBallTree(pointIdx) diff --git a/core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala b/core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala index 68169552f7..90c9814cab 100644 --- a/core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala +++ b/core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala @@ -199,20 +199,17 @@ object SparkHelpers { def flatten(ratings: Dataset[_], num: Int, dstOutputColumn: String, srcOutputColumn: String): DataFrame = { import ratings.sparkSession.implicits._ - import org.apache.spark.sql.functions.{collect_top_k, struct} + + val topKAggregator = new TopByKeyAggregator[Int, Int, Float](num, Ordering.by(_._2)) + val recs = ratings.as[(Int, Int, Float)].groupByKey(_._1).agg(topKAggregator.toColumn) + .toDF("id", "recommendations") val arrayType = ArrayType( new StructType() .add(dstOutputColumn, IntegerType) .add(Constants.RatingCol, FloatType) ) - - ratings.toDF(srcOutputColumn, dstOutputColumn, Constants.RatingCol).groupBy(srcOutputColumn) - .agg(collect_top_k(struct(Constants.RatingCol, dstOutputColumn), num, false)) - .as[(Int, Seq[(Float, Int)])] - .map(t => (t._1, t._2.map(p => (p._2, p._1)))) - .toDF(srcOutputColumn, Constants.Recommendations) - .withColumn(Constants.Recommendations, col(Constants.Recommendations).cast(arrayType)) + recs.select(col("id").as(srcOutputColumn), col("recommendations").cast(arrayType)) } } diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/RTestGen.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/RTestGen.scala index 2a86894bc2..be47791e69 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/RTestGen.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/RTestGen.scala @@ -101,7 +101,7 @@ object RTestGen { | "spark.sql.shuffle.partitions=10", | "spark.sql.crossJoin.enabled=true") | - |sc <- spark_connect(master = "local", version = "3.4.1", config = conf) + |sc <- spark_connect(master = "local", version = "3.3.3", config = conf) | |""".stripMargin, StandardOpenOption.CREATE) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala index 55e0fbdfce..fdbb3c4058 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala @@ -31,11 +31,11 @@ object DatabricksUtilities { // ADB Info val Region = "eastus" - val PoolName = "synapseml-build-13.3" - val GpuPoolName = "synapseml-build-13.3-gpu" - val AdbRuntime = "13.3.x-scala2.12" - // https://docs.databricks.com/en/release-notes/runtime/13.3lts-ml.html - val AdbGpuRuntime = "13.3.x-gpu-ml-scala2.12" + val PoolName = "synapseml-build-12.2" + val GpuPoolName = "synapseml-build-12.2-gpu" + val AdbRuntime = "12.2.x-scala2.12" + // https://learn.microsoft.com/en-us/azure/databricks/release-notes/runtime/ + val AdbGpuRuntime = "12.2.x-gpu-ml-scala2.12" val NumWorkers = 5 val AutoTerminationMinutes = 15 @@ -84,9 +84,9 @@ object DatabricksUtilities { Map("maven" -> Map("coordinates" -> PackageMavenCoordinate, "repo" -> PackageRepository)), Map("pypi" -> Map("package" -> "pytorch-lightning==1.5.0")), Map("pypi" -> Map("package" -> "torchvision==0.14.1")), - Map("pypi" -> Map("package" -> "transformers==4.32.1")), - Map("pypi" -> Map("package" -> "petastorm==0.12.0")), - Map("pypi" -> Map("package" -> "protobuf==3.20.3")) + Map("pypi" -> Map("package" -> "transformers==4.25.1")), + Map("pypi" -> Map("package" -> "petastorm==0.12.1")), + Map("pypi" -> Map("package" -> "protobuf==3.19.4")) ).toJson.compactPrint val RapidsInitScripts: String = List( diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseExtension/SynapseExtensionUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseExtension/SynapseExtensionUtilities.scala index f2264025f4..95c5205bad 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseExtension/SynapseExtensionUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseExtension/SynapseExtensionUtilities.scala @@ -83,7 +83,7 @@ object SynapseExtensionUtilities { |"{ | 'Default${store}ArtifactId': '$storeId', | 'ExecutableFile': '$path', - | 'SparkVersion':'3.4', + | 'SparkVersion':'3.3', | 'SparkSettings': { | 'spark.jars.packages' : '$SparkMavenPackageList', | 'spark.jars.repositories' : '$SparkMavenRepositoryList', diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala index 433c0c6601..512bbe6560 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala @@ -255,7 +255,7 @@ object SynapseUtilities { | "nodeSizeFamily": "MemoryOptimized", | "provisioningState": "Succeeded", | "sessionLevelPackagesEnabled": "true", - | "sparkVersion": "3.4" + | "sparkVersion": "3.3" | } |} |""".stripMargin diff --git a/deep-learning/src/main/python/horovod_installation.sh b/deep-learning/src/main/python/horovod_installation.sh index 22124422ff..8bd5f19c02 100644 --- a/deep-learning/src/main/python/horovod_installation.sh +++ b/deep-learning/src/main/python/horovod_installation.sh @@ -8,9 +8,9 @@ set -eu # Install prerequisite libraries that horovod depends on pip install pytorch-lightning==1.5.0 pip install torchvision==0.14.1 -pip install transformers==4.32.1 +pip install transformers==4.25.1 pip install petastorm>=0.12.0 -pip install protobuf==3.20.3 +pip install protobuf==3.19.1 # Remove Outdated Signing Key: sudo apt-key del 7fa2af80 @@ -35,8 +35,8 @@ libcusparse-dev-11-0=11.1.1.245-1 git clone --recursive https://github.com/horovod/horovod.git cd horovod -# git fetch origin refs/tags/v0.28.1:tags/v0.28.1 -git checkout 1d217b59949986d025f6db93c49943fb6b6cc78f +# git fetch origin refs/tags/v0.27.0:tags/v0.27.0 +git checkout bfaca90d5cf66780a97d8799d4e1573855b64560 git checkout -b tmp-branch rm -rf build/ dist/ HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_CUDA_HOME=/usr/local/cuda-11/ HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \ @@ -44,4 +44,4 @@ HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_CUDA_HOME=/usr/local/cuda-11/ HOROVOD_WITH_PY readlink -f dist/horovod-*.whl -pip install --no-cache-dir dist/horovod-0.28.1-cp38-cp38-linux_x86_64.whl --force-reinstall --no-deps +pip install --no-cache-dir dist/horovod-0.27.0-cp38-cp38-linux_x86_64.whl --force-reinstall --no-deps diff --git a/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py b/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py index 8e001f3be6..fcd6ff59cb 100644 --- a/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py +++ b/deep-learning/src/main/python/synapse/ml/dl/DeepTextClassifier.py @@ -11,12 +11,12 @@ if _TRANSFORMERS_AVAILABLE: import transformers - _TRANSFORMERS_EQUAL_4_32_1 = transformers.__version__ == "4.32.1" - if _TRANSFORMERS_EQUAL_4_32_1: + _TRANSFORMERS_EQUAL_4_25_1 = transformers.__version__ == "4.25.1" + if _TRANSFORMERS_EQUAL_4_25_1: from transformers import AutoTokenizer else: raise RuntimeError( - "transformers should be == 4.32.1, found: {}".format( + "transformers should be == 4.25.1, found: {}".format( transformers.__version__ ) ) diff --git a/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py b/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py index f8b624e6c7..4723da1016 100644 --- a/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py +++ b/deep-learning/src/main/python/synapse/ml/dl/DeepVisionClassifier.py @@ -19,10 +19,10 @@ if _HOROVOD_AVAILABLE: import horovod - _HOROVOD_EQUAL_0_28_1 = horovod.__version__ == "0.28.1" - if not _HOROVOD_EQUAL_0_28_1: + _HOROVOD_EQUAL_0_27_0 = horovod.__version__ == "0.27.0" + if not _HOROVOD_EQUAL_0_27_0: raise RuntimeError( - "horovod should be of version 0.28.1, found: {}".format(horovod.__version__) + "horovod should be of version 0.27.0, found: {}".format(horovod.__version__) ) else: raise ModuleNotFoundError("module not found: horovod") diff --git a/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py b/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py index 134bc5f135..b17b9f5f18 100644 --- a/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py +++ b/deep-learning/src/main/python/synapse/ml/dl/LitDeepTextModel.py @@ -13,12 +13,12 @@ if _TRANSFORMERS_AVAILABLE: import transformers - _TRANSFORMERS_EQUAL_4_32_1 = transformers.__version__ == "4.32.1" - if _TRANSFORMERS_EQUAL_4_32_1: + _TRANSFORMERS_EQUAL_4_25_1 = transformers.__version__ == "4.25.1" + if _TRANSFORMERS_EQUAL_4_25_1: from transformers import AutoModelForSequenceClassification else: raise RuntimeError( - "transformers should be == 4.32.1, found: {}".format( + "transformers should be == 4.25.1, found: {}".format( transformers.__version__ ) ) diff --git a/environment.yml b/environment.yml index e9361ad5de..8e618d83ef 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - default dependencies: - - python=3.11.8 + - python=3.8.8 - requests=2.26.0 - pip=21.3 - r-base=4.1.1 @@ -11,10 +11,10 @@ dependencies: - r-devtools=2.4.2 - pip: - pyarrow>=0.15.0 - - pyspark==3.4.1 - - pandas==1.4.0 + - pyspark==3.3.3 + - pandas==1.2.5 - wheel - - sphinx==5.0.2 + - sphinx==4.2.0 - sphinx_paramlinks==0.5.2 - sphinx_rtd_theme - coverage @@ -32,17 +32,17 @@ dependencies: - twine - mlflow - numpy - - torch==2.0.0 - - torchvision==0.15.1 - - horovod==0.28.1 + - torch==1.13.1 + - torchvision==0.14.1 + - horovod==0.27.0 - petastorm>=0.11.0 - pytorch_lightning==1.5.0 - onnxmltools==1.7.0 - matplotlib - Pillow - - transformers==4.32.1 + - transformers==4.25.1 - huggingface-hub>=0.8.1 - - langchain==0.0.152 + - langchain==0.0.151 - openai==0.27.5 - black==22.3.0 - black[jupyter]==22.3.0 diff --git a/pipeline.yaml b/pipeline.yaml index 0e75e509c1..6903b4c3f4 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -457,7 +457,7 @@ jobs: (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) sbt codegen sbt publishM2 - SPARK_VERSION=3.4.1 + SPARK_VERSION=3.3.3 HADOOP_VERSION=3 wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - task: AzureCLI@2 diff --git a/project/plugins.sbt b/project/plugins.sbt index 562a0e139b..cc48a44dbd 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -4,10 +4,6 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0") addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.8") addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.1") addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.0.0") -addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.8") +addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.2") addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.10.0-RC1") -addSbtPlugin("no.arktekk.sbt" % "aether-deploy" % "0.26.0") - -ThisBuild / libraryDependencySchemes ++= Seq( - "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always -) +addSbtPlugin("no.arktekk.sbt" % "aether-deploy" % "0.26.0") \ No newline at end of file diff --git a/start b/start index ac866307f1..c15215529d 100644 --- a/start +++ b/start @@ -1,8 +1,7 @@ #!/bin/bash export OPENMPI_VERSION="3.1.2" - -export SPARK_VERSION="3.4.1" +export SPARK_VERSION="3.3.3" export HADOOP_VERSION="3.3" export SYNAPSEML_VERSION="1.0.5" # Binder compatibility version diff --git a/tools/docker/demo/Dockerfile b/tools/docker/demo/Dockerfile index c5db1030e4..6d974540b8 100644 --- a/tools/docker/demo/Dockerfile +++ b/tools/docker/demo/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 ARG SYNAPSEML_VERSION=1.0.5 ARG DEBIAN_FRONTEND=noninteractive -ENV SPARK_VERSION=3.4.1 +ENV SPARK_VERSION=3.3.3 ENV HADOOP_VERSION=3 ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION} ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64 diff --git a/tools/docker/minimal/Dockerfile b/tools/docker/minimal/Dockerfile index 33fcc54366..efcf95035e 100644 --- a/tools/docker/minimal/Dockerfile +++ b/tools/docker/minimal/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 ARG SYNAPSEML_VERSION=1.0.5 ARG DEBIAN_FRONTEND=noninteractive -ENV SPARK_VERSION=3.4.1 +ENV SPARK_VERSION=3.3.3 ENV HADOOP_VERSION=3 ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION} ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64 diff --git a/tools/tests/run_r_tests.R b/tools/tests/run_r_tests.R index a5a61260f2..e9e684bbf1 100644 --- a/tools/tests/run_r_tests.R +++ b/tools/tests/run_r_tests.R @@ -3,7 +3,7 @@ if (!require("sparklyr")) { library("sparklyr") } -spark_install_tar(paste(getwd(), "/../../../../../../spark-3.4.1-bin-hadoop3.tgz", sep = "")) +spark_install_tar(paste(getwd(), "/../../../../../../spark-3.3.3-bin-hadoop3.tgz", sep = "")) options("testthat.output_file" = "../../../../r-test-results.xml") devtools::test(reporter = JunitReporter$new()) From f5187e2e40fee992f240006c21ce384df78f7810 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 9 Sep 2024 21:36:06 -0700 Subject: [PATCH 2/2] fix: NoSuchMethodError from breeze in interpretibility notebook when running in Fabric 3.3 pool --- build.sbt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build.sbt b/build.sbt index e24e30a7c2..be0cb1cb9a 100644 --- a/build.sbt +++ b/build.sbt @@ -36,6 +36,9 @@ val extraDependencies = Seq( exclude("com.google.protobuf", "protobuf-java") exclude("org.apache.spark", "spark-mllib_2.12") exclude("org.apache.spark", "spark-core_2.12") exclude("org.apache.spark", "spark-avro_2.12") exclude("org.apache.spark", "spark-sql_2.12"), + // Although breeze 1.2 is already provided by Spark, this is needed for Fabric Spark 3.3 pools. + // Otherwise a NoSuchMethodError will be thrown by interpretability code. + "org.scalanlp" %% "breeze" % "1.2" ).map(d => d excludeAll (excludes: _*)) val dependencies = coreDependencies ++ extraDependencies