Skip to content

Commit

Permalink
#193: Drop support of Spark 2.4 (#196)
Browse files Browse the repository at this point in the history
* dropping Spark 2.4 support
* strict use of sbt projectmatrix plugin, tohave cross-builds
* basic settings moved to `project/Setup.scala` class
* some move or project setup code to fall into logical areas
* enhanced README.md to describe possible Spark 2.4 build
* increase version of sbt projectMatrix plugin
* using `SemVer` class for Scala versions instead of `String`
* moved publish related values into `publish.sbt`
* assembly fixed
* assembly plugin version upgraded
* commonSettings moved 
---------

Co-authored-by: Ladislav Sulak <[email protected]>
  • Loading branch information
benedeki and lsulak authored May 22, 2024
1 parent f98828b commit 8adf2b3
Show file tree
Hide file tree
Showing 10 changed files with 295 additions and 181 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ It provides functionality for computing and pushing control metrics to the API l

For more information, see the [Vocabulary section](#Vocabulary) or `agent/README.md` for more technical documentation.

#### Spark 2.4 support
Because there are some java level incompatibilities between Spark 2.4 and Spark 3.x whe build on Java 11+, we have to
drop support for Spark 2.4. If you need the agent to work with Spark 2.4 follow these steps:
* Switch to Java 8
* In `'build.sbt'` change the matrix rows, to be Spark 2.4 and Scala 2.11 for modules _agent_ and _model_
* Build these two modules and use them in your project

### Server `server/`
An API under construction that communicates with the Agent and with the persistent storage. It also provides measure
configuration to the agent.
Expand Down
113 changes: 42 additions & 71 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,25 @@
* limitations under the License.
*/

import Dependencies._
import JacocoSetup._
import sbt.Keys.name
import sbt.*
import Dependencies.*
import Dependencies.Versions.spark3
import VersionAxes.*

ThisBuild / organization := "za.co.absa.atum-service"
sonatypeProfileName := "za.co.absa"

ThisBuild / scalaVersion := Versions.scala213 // default version
ThisBuild / scalaVersion := Setup.scala213.asString // default version TODO

ThisBuild / versionScheme := Some("early-semver")

Global / onChangedBuildSource := ReloadOnSourceChanges

publish / skip := true

lazy val printSparkScalaVersion = taskKey[Unit]("Print Spark and Scala versions for atum-service is being built for.")
lazy val printScalaVersion = taskKey[Unit]("Print Scala versions for atum-service is being built for.")
initialize := {
val _ = initialize.value // Ensure previous initializations are run

lazy val commonSettings = Seq(
scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature", "-Xfatal-warnings"),
Test / parallelExecution := false,
jacocoExcludes := jacocoProjectExcludes()
)

val serverMergeStrategy = assembly / assemblyMergeStrategy := {
case PathList("META-INF", "services", xs @ _*) => MergeStrategy.filterDistinctLines
case PathList("META-INF", "maven", "org.webjars", "swagger-ui", "pom.properties") => MergeStrategy.singleOrError
case PathList("META-INF", "resources", "webjars", "swagger-ui", _*) => MergeStrategy.singleOrError
case PathList("META-INF", _*) => MergeStrategy.discard
case PathList("META-INF", "versions", "9", xs@_*) => MergeStrategy.discard
case PathList("module-info.class") => MergeStrategy.discard
case "application.conf" => MergeStrategy.concat
case "reference.conf" => MergeStrategy.concat
case _ => MergeStrategy.first
val requiredJavaVersion = VersionNumber("11")
val currentJavaVersion = VersionNumber(sys.props("java.specification.version"))
println(s"Running on Java version $currentJavaVersion, required is at least version $requiredJavaVersion")
//this routine can be used to assert the required Java version
}

enablePlugins(FlywayPlugin)
Expand All @@ -58,86 +43,72 @@ flywayLocations := FlywayConfiguration.flywayLocations
flywaySqlMigrationSuffixes := FlywayConfiguration.flywaySqlMigrationSuffixes
libraryDependencies ++= flywayDependencies


/**
* Module `server` is the service application that collects and stores measured data And upo request retrives them
*/
lazy val server = (projectMatrix in file("server"))
.settings(
commonSettings ++ Seq(
Setup.commonSettings ++ Seq(
name := "atum-server",
libraryDependencies ++= Dependencies.serverDependencies ++ testDependencies,
javacOptions ++= Seq("-source", "11", "-target", "11", "-Xlint"),
scalacOptions ++= Seq("-release", "11", "-Ymacro-annotations"),
javacOptions ++= Setup.serverAndDbJavacOptions,
Compile / packageBin / publishArtifact := false,
printScalaVersion := {
val log = streams.value.log
log.info(s"Building ${name.value} with Scala ${scalaVersion.value}")
},
(Compile / compile) := ((Compile / compile) dependsOn printScalaVersion).value,
packageBin := (Compile / assembly).value,
artifactPath / (Compile / packageBin) := baseDirectory.value / s"target/${name.value}-${version.value}.jar",
testFrameworks += new TestFramework("zio.test.sbt.ZTestFramework"),
jacocoReportSettings := jacocoSettings(scalaVersion.value, "atum-server"),
serverMergeStrategy
Setup.serverMergeStrategy
): _*
)
.enablePlugins(AssemblyPlugin)
.enablePlugins(AutomateHeaderPlugin)
.jvmPlatform(scalaVersions = Seq(Versions.serviceScalaVersion))
.addSingleScalaBuild(Setup.serverAndDbScalaVersion, Dependencies.serverDependencies)
.dependsOn(model)

/**
* Module `agent` is the library to be plugged into the Spark application to measure the data and send it to the server
*/
lazy val agent = (projectMatrix in file("agent"))
.disablePlugins(sbtassembly.AssemblyPlugin)
.settings(
commonSettings ++ Seq(
Setup.commonSettings ++ Seq(
name := "atum-agent",
javacOptions ++= Seq("-source", "1.8", "-target", "1.8", "-Xlint"),
libraryDependencies ++= jsonSerdeDependencies ++ testDependencies ++ Dependencies.agentDependencies(
if (scalaVersion.value == Versions.scala211) Versions.spark2 else Versions.spark3,
scalaVersion.value
),
printSparkScalaVersion := {
val log = streams.value.log
val sparkVer = sparkVersionForScala(scalaVersion.value)
log.info(s"Building ${name.value} with Spark $sparkVer, Scala ${scalaVersion.value}")
},
(Compile / compile) := ((Compile / compile) dependsOn printSparkScalaVersion).value,
jacocoReportSettings := jacocoSettings(scalaVersion.value, "atum-agent")
javacOptions ++= Setup.clientJavacOptions
): _*
)
.jvmPlatform(scalaVersions = Versions.clientSupportedScalaVersions)
.addSparkCrossBuild(SparkVersionAxis(spark3), Setup.clientSupportedScalaVersions, Dependencies.agentDependencies)
.dependsOn(model)

/**
* Module `model` is the data model for data exchange with server
*/
lazy val model = (projectMatrix in file("model"))
.disablePlugins(sbtassembly.AssemblyPlugin)
.settings(
commonSettings ++ Seq(
Setup.commonSettings ++ Seq(
name := "atum-model",
javacOptions ++= Seq("-source", "1.8", "-target", "1.8", "-Xlint"),
libraryDependencies ++= jsonSerdeDependencies ++ testDependencies ++ Dependencies.modelDependencies(scalaVersion.value),
printScalaVersion := {
val log = streams.value.log
log.info(s"Building ${name.value} with Scala ${scalaVersion.value}")
},
(Compile / compile) := ((Compile / compile) dependsOn printScalaVersion).value,
jacocoReportSettings := jacocoSettings(scalaVersion.value, "atum-agent: model")
javacOptions ++= Setup.clientJavacOptions,
): _*
)
.jvmPlatform(scalaVersions = Versions.clientSupportedScalaVersions)
.addScalaCrossBuild(Setup.clientSupportedScalaVersions, Dependencies.modelDependencies)

/**
* Module `database` is the source of database structures of the service
*/
lazy val database = (projectMatrix in file("database"))
.disablePlugins(sbtassembly.AssemblyPlugin)
.settings(
commonSettings ++ Seq(
Setup.commonSettings ++ Seq(
name := "atum-database",
printScalaVersion := {
val log = streams.value.log
log.info(s"Building ${name.value} with Scala ${scalaVersion.value}")
},
libraryDependencies ++= Dependencies.databaseDependencies,
(Compile / compile) := ((Compile / compile) dependsOn printScalaVersion).value,
javacOptions ++= Setup.serverAndDbJavacOptions,
test := {}
): _*
)
.jvmPlatform(scalaVersions = Seq(Versions.serviceScalaVersion))
.addSingleScalaBuild(Setup.serverAndDbScalaVersion, Dependencies.databaseDependencies)

//----------------------------------------------------------------------------------------------------------------------
lazy val dbTest = taskKey[Unit]("Launch DB tests")

dbTest := {
println("Running DB tests")
(database.jvm(Versions.serviceScalaVersion) / Test / test).value
(database.jvm(Setup.serverAndDbScalaVersion.asString) / Test / test).value
}
86 changes: 41 additions & 45 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,15 @@
* limitations under the License.
*/

import sbt._
import sbt.*
import za.co.absa.commons.version.{Component, Version}

object Dependencies {

object Versions {
val spark2 = "2.4.8"
val spark2 = "2.4.7"
val spark3 = "3.3.2"

val scala211 = "2.11.12"
val scala212 = "2.12.18"
val scala213 = "2.13.11"

val serviceScalaVersion: String = scala213
val clientSupportedScalaVersions: Seq[String] = Seq(scala211, scala212, scala213)

val scalatest = "3.2.15"
val scalaMockito = "1.17.12"
val scalaLangJava8Compat = "1.0.2"
Expand All @@ -54,6 +48,19 @@ object Dependencies {

val json4s_spark2 = "3.5.3"
val json4s_spark3 = "3.7.0-M11"
def json4s(scalaVersion: Version): String = {
// TODO done this impractical way until https://github.com/AbsaOSS/commons/issues/134
val maj2 = Component("2")
val min11 = Component("11")
val min12 = Component("12")
val min13 = Component("13")
scalaVersion.components match {
case Seq(`maj2`, `min11`, _) => json4s_spark2
case Seq(`maj2`, `min12`, _) => json4s_spark3
case Seq(`maj2`, `min13`, _) => json4s_spark3
case _ => throw new IllegalArgumentException("Only Scala 2.11, 2.12, and 2.13 are currently supported.")
}
}

val logback = "1.2.3"

Expand All @@ -72,41 +79,23 @@ object Dependencies {
val awssdk = "2.23.15"

val scalaNameof = "4.0.0"
}


private def truncateVersion(version: String, parts: Int): String = {
version.split("\\.").take(parts).mkString(".")
}

def getVersionUpToMinor(version: String): String = {
truncateVersion(version, 2)
}
val absaCommons = "2.0.0"

def getVersionUpToMajor(version: String): String = {
truncateVersion(version, 1)
}
def truncateVersion(version: String, parts: Int): String = {
version.split("\\.").take(parts).mkString(".")
}

// this is just for the compile-depended printing task
def sparkVersionForScala(scalaVersion: String): String = {
truncateVersion(scalaVersion, 2) match {
case "2.11" => Versions.spark2
case "2.12" => Versions.spark3
case "2.13" => Versions.spark3
case _ => throw new IllegalArgumentException("Only Scala 2.11, 2.12, and 2.13 are currently supported.")
def getVersionUpToMinor(version: String): String = {
truncateVersion(version, 2)
}
}

def json4sVersionForScala(scalaVersion: String): String = {
truncateVersion(scalaVersion, 2) match {
case "2.11" => Versions.json4s_spark2
case "2.12" => Versions.json4s_spark3
case "2.13" => Versions.json4s_spark3
case _ => throw new IllegalArgumentException("Only Scala 2.11, 2.12, and 2.13 are currently supported.")
def getVersionUpToMajor(version: String): String = {
truncateVersion(version, 1)
}
}

def testDependencies: Seq[ModuleID] = {
private def testDependencies: Seq[ModuleID] = {
lazy val scalatest = "org.scalatest" %% "scalatest" % Versions.scalatest % Test
lazy val mockito = "org.mockito" %% "mockito-scala" % Versions.scalaMockito % Test

Expand All @@ -116,8 +105,8 @@ object Dependencies {
)
}

def jsonSerdeDependencies: Seq[ModuleID] = {
val json4sVersion = json4sVersionForScala(Versions.scala212)
private def jsonSerdeDependencies(scalaVersion: Version): Seq[ModuleID] = {
val json4sVersion = Versions.json4s(scalaVersion)

lazy val jacksonModuleScala = "com.fasterxml.jackson.module" %% "jackson-module-scala" % Versions.jacksonModuleScala

Expand Down Expand Up @@ -209,12 +198,13 @@ object Dependencies {
zioTestSbt,
zioTestJunit,
sbtJunitInterface
)
) ++
testDependencies
}

def agentDependencies(sparkVersion: String, scalaVersion: String): Seq[ModuleID] = {
val sparkMinorVersion = getVersionUpToMinor(sparkVersion)
val scalaMinorVersion = getVersionUpToMinor(scalaVersion)
def agentDependencies(sparkVersion: String, scalaVersion: Version): Seq[ModuleID] = {
val sparkMinorVersion = Versions.getVersionUpToMinor(sparkVersion)
val scalaMinorVersion = Versions.getVersionUpToMinor(scalaVersion.asString)

lazy val sparkCore = "org.apache.spark" %% "spark-core" % sparkVersion % Provided
lazy val sparkSql = "org.apache.spark" %% "spark-sql" % sparkVersion % Provided
Expand All @@ -238,14 +228,20 @@ object Dependencies {
sttp,
logback,
nameOf
)
) ++
testDependencies
}

def modelDependencies(scalaVersion: String): Seq[ModuleID] = {
def modelDependencies(scalaVersion: Version): Seq[ModuleID] = {
lazy val specs2core = "org.specs2" %% "specs2-core" % Versions.specs2 % Test
lazy val typeSafeConfig = "com.typesafe" % "config" % Versions.typesafeConfig

Seq(specs2core, typeSafeConfig)
Seq(
specs2core,
typeSafeConfig
) ++
testDependencies ++
jsonSerdeDependencies(scalaVersion)
}

def databaseDependencies: Seq[ModuleID] = {
Expand Down
16 changes: 9 additions & 7 deletions project/JacocoSetup.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import com.github.sbt.jacoco.JacocoKeys.JacocoReportFormats
import com.github.sbt.jacoco.report.JacocoReportSettings
import za.co.absa.commons.version.Version

import java.time.format.DateTimeFormatter
import java.time.{ZoneId, ZonedDateTime}
Expand All @@ -25,16 +26,17 @@ object JacocoSetup {
formats = Seq(JacocoReportFormats.HTML, JacocoReportFormats.XML)
)

def jacocoSettings(sparkVersion: String, scalaVersion: String, moduleName: String): JacocoReportSettings = {
private def now: String = {
val utcDateTime = ZonedDateTime.now.withZoneSameInstant(ZoneId.of("UTC"))
val now = s"as of ${DateTimeFormatter.ofPattern("yyyy-MM-dd hh:mm Z z").format(utcDateTime)}"
jacocoReportCommonSettings.withTitle(s"Jacoco Report on `$moduleName` for spark:$sparkVersion - scala:$scalaVersion [$now]")
s"as of ${DateTimeFormatter.ofPattern("yyyy-MM-dd hh:mm Z z").format(utcDateTime)}"
}

def jacocoSettings(scalaVersion: String, moduleName: String): JacocoReportSettings = {
val utcDateTime = ZonedDateTime.now.withZoneSameInstant(ZoneId.of("UTC"))
val now = s"as of ${DateTimeFormatter.ofPattern("yyyy-MM-dd hh:mm Z z").format(utcDateTime)}"
jacocoReportCommonSettings.withTitle(s"Jacoco Report on `$moduleName` for scala:$scalaVersion [$now]")
def jacocoSettings(sparkVersion: String, scalaVersion: Version, moduleName: String): JacocoReportSettings = {
jacocoReportCommonSettings.withTitle(s"Jacoco Report on `$moduleName` for spark:$sparkVersion - scala:${scalaVersion.asString} [$now]")
}

def jacocoSettings(scalaVersion: Version, moduleName: String): JacocoReportSettings = {
jacocoReportCommonSettings.withTitle(s"Jacoco Report on `$moduleName` for scala:${scalaVersion.asString} [$now]")
}

def jacocoProjectExcludes(): Seq[String] = {
Expand Down
Loading

0 comments on commit 8adf2b3

Please sign in to comment.