Skip to content

Commit

Permalink
Start of Scala + Armada work
Browse files Browse the repository at this point in the history
  • Loading branch information
ClifHouck committed Dec 2, 2024
1 parent 039f796 commit 322295c
Show file tree
Hide file tree
Showing 10 changed files with 795 additions and 3 deletions.
10 changes: 10 additions & 0 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,16 @@
</dependency>
</dependencies>
</profile>
<profile>
<id>armada</id>
<dependencies>
<dependency>
<groupId>io.armadaproject.spark</groupId>
<artifactId>spark-armada_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>
<profile>
<id>hive</id>
<dependencies>
Expand Down
25 changes: 23 additions & 2 deletions core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,10 @@ private[spark] class SparkSubmit extends Logging {
case "yarn" => YARN
case m if m.startsWith("spark") => STANDALONE
case m if m.startsWith("k8s") => KUBERNETES
case m if m.startsWith("armada") => ARMADA
case m if m.startsWith("local") => LOCAL
case _ =>
error("Master must either be yarn or start with spark, k8s, or local")
error("Master must either be yarn or start with spark, k8s, armada, or local")
-1
}
case None => LOCAL // default master or remote mode.
Expand Down Expand Up @@ -296,6 +297,14 @@ private[spark] class SparkSubmit extends Logging {
}
}

if (clusterManager == ARMADA) {
if (!Utils.classIsLoadable(ARMADA_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
error(
s"Could not load ARMADA classes. \"${ARMADA_CLUSTER_SUBMIT_CLASS}\"" +
"This copy of Spark may not have been compiled with ARMADA support.")
}
}

// Fail fast, the following modes are not supported or applicable
(clusterManager, deployMode) match {
case (STANDALONE, CLUSTER) if args.isPython =>
Expand Down Expand Up @@ -329,6 +338,8 @@ private[spark] class SparkSubmit extends Logging {
val isKubernetesClient = clusterManager == KUBERNETES && deployMode == CLIENT
val isKubernetesClusterModeDriver = isKubernetesClient &&
sparkConf.getBoolean("spark.kubernetes.submitInDriver", false)
val isArmadaCluster = clusterManager == ARMADA && deployMode == CLUSTER
// TODO: Support armada & client?
val isCustomClasspathInClusterModeDisallowed =
!sparkConf.get(ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE) &&
args.proxyUser != null &&
Expand Down Expand Up @@ -416,6 +427,7 @@ private[spark] class SparkSubmit extends Logging {
downloadFileList(_, targetDir, sparkConf, hadoopConf)
}.orNull

// TODO: May have to do the same/similar for Armada
if (isKubernetesClusterModeDriver) {
// SPARK-33748: this mimics the behaviour of Yarn cluster mode. If the driver is running
// in cluster mode, the archives should be available in the driver's current working
Expand Down Expand Up @@ -670,6 +682,7 @@ private[spark] class SparkSubmit extends Logging {
confKey = KEYTAB.key),
OptionAssigner(args.pyFiles, ALL_CLUSTER_MGRS, CLUSTER, confKey = SUBMIT_PYTHON_FILES.key),

// TODO: Add Armada where appropriate.
// Propagate attributes for dependency resolution at the driver side
OptionAssigner(args.packages, STANDALONE | KUBERNETES,
CLUSTER, confKey = JAR_PACKAGES.key),
Expand Down Expand Up @@ -864,6 +877,11 @@ private[spark] class SparkSubmit extends Logging {
}
}

if (isArmadaCluster) {
childMainClass = ARMADA_CLUSTER_SUBMIT_CLASS
// TODO: Setup childArgs
}

// Load any properties specified through --conf and the default properties file
for ((k, v) <- args.sparkProperties) {
sparkConf.setIfMissing(k, v)
Expand Down Expand Up @@ -1071,7 +1089,8 @@ object SparkSubmit extends CommandLineUtils with Logging {
private val STANDALONE = 2
private val LOCAL = 8
private val KUBERNETES = 16
private val ALL_CLUSTER_MGRS = YARN | STANDALONE | LOCAL | KUBERNETES
private val ARMADA = 32
private val ALL_CLUSTER_MGRS = YARN | STANDALONE | LOCAL | KUBERNETES | ARMADA

// Deploy modes
private val CLIENT = 1
Expand All @@ -1095,6 +1114,8 @@ object SparkSubmit extends CommandLineUtils with Logging {
private[deploy] val STANDALONE_CLUSTER_SUBMIT_CLASS = classOf[ClientApp].getName()
private[deploy] val KUBERNETES_CLUSTER_SUBMIT_CLASS =
"org.apache.spark.deploy.k8s.submit.KubernetesClientApplication"
private[deploy] val ARMADA_CLUSTER_SUBMIT_CLASS =
"org.apache.spark.deploy.armada.submit.ArmadaClientApplication"

override def main(args: Array[String]): Unit = {
Option(System.getenv("SPARK_PREFER_IPV6"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
s"""
|Options:
| --master MASTER_URL spark://host:port, yarn,
| k8s://https://host:port, or local (Default: local[*]).
| k8s://https://host:port, armada://host:port,
| or local (Default: local[*]).
| --deploy-mode DEPLOY_MODE Whether to launch the driver program locally ("client") or
| on one of the worker machines inside the cluster ("cluster")
| (Default: client).
Expand Down
8 changes: 8 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3409,6 +3409,14 @@
</modules>
</profile>

<profile>
<id>armada</id>
<modules>
<module>resource-managers/armada/core</module>
</modules>
</profile>


<profile>
<id>hive-thriftserver</id>
<modules>
Expand Down
186 changes: 186 additions & 0 deletions resource-managers/armada/core/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!-- TODO: Verify this somehow for Armada -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent_2.13</artifactId>
<version>4.0.0-SNAPSHOT</version>
<relativePath>../../../pom.xml</relativePath>
</parent>

<artifactId>spark-armada_2.13</artifactId>
<packaging>jar</packaging>
<name>Spark Project Armada</name>
<properties>
<sbt.project.name>armada</sbt.project.name>
</properties>

<profiles>
<profile>
<id>volcano</id>
<dependencies>
<!--
<dependency>
<groupId>io.fabric8</groupId>
<artifactId>volcano-model-v1beta1</artifactId>
<version>${kubernetes-client.version}</version>
</dependency>
<dependency>
<groupId>io.fabric8</groupId>
<artifactId>volcano-client</artifactId>
<version>${kubernetes-client.version}</version>
</dependency>
-->
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-volcano-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>volcano/src/main/scala</source>
</sources>
</configuration>
</execution>
<execution>
<id>add-volcano-test-sources</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>volcano/src/test/scala</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>

<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-tags_${scala.binary.version}</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<!--
<dependency>
<groupId>io.fabric8</groupId>
<artifactId>kubernetes-httpclient-okhttp</artifactId>
<version>${kubernetes-client.version}</version>
</dependency>
<dependency>
<groupId>io.fabric8</groupId>
<artifactId>kubernetes-client</artifactId>
<version>${kubernetes-client.version}</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-yaml</artifactId>
</exclusion>
<exclusion>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
</exclusion>
</exclusions>
</dependency>
-->


<!-- Explicitly depend on shaded dependencies from the parent, since shaded deps aren't transitive -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<!-- End of shaded deps. -->

<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>net.bytebuddy</groupId>
<artifactId>byte-buddy</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>net.bytebuddy</groupId>
<artifactId>byte-buddy-agent</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.jmock</groupId>
<artifactId>jmock-junit5</artifactId>
<exclusions>
<exclusion>
<groupId>org.junit.jupiter</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>org.junit.platform</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
<scope>test</scope>
</dependency>

</dependencies>


<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

io.armadaproject.spark.deploy.submit.ArmadaSubmitOperation
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

org.apache.spark.scheduler.cluster.k8s.KubernetesClusterManager
Loading

0 comments on commit 322295c

Please sign in to comment.