From 260c8598b9fd7bb02427b90910ece3aa95aa4bf3 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 7 Feb 2025 07:49:41 +0000 Subject: [PATCH 1/2] Use 350db143 jars in integration tests on [databricks] 14.3S Resolves #11988 and related issues - Consolidate DBR-specific logic in jenkins/databricks/common_vars.sh - Add DBR versions suffix when necessary Signed-off-by: Gera Shegalov --- integration_tests/run_pyspark_from_build.sh | 35 --------------- jenkins/databricks/common_vars.sh | 47 ++++++++++++++++++++- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh index be06189c7f3..cf773197e7f 100755 --- a/integration_tests/run_pyspark_from_build.sh +++ b/integration_tests/run_pyspark_from_build.sh @@ -287,41 +287,6 @@ else export PYSP_TEST_spark_hadoop_hive_exec_scratchdir="$RUN_DIR/hive" - # Extract Databricks version from deployed configs. - # spark.databricks.clusterUsageTags.sparkVersion is set automatically on Databricks - # notebooks but not when running Spark manually. - # - # At the OS level the DBR version can be obtailed via - # 1. DATABRICKS_RUNTIME_VERSION environment set by Databricks, e.g., 11.3 - # 2. File at /databricks/DBR_VERSION created by Databricks, e.g., 11.3 - # 3. The value for Spark conf in file /databricks/common/conf/deploy.conf created by Databricks, - # e.g. 11.3.x-gpu-ml-scala2.12 - # - # For cases 1 and 2 append '.' for version matching in 3XYdb SparkShimServiceProvider - # - DBR_VERSION=/databricks/DBR_VERSION - DB_DEPLOY_CONF=/databricks/common/conf/deploy.conf - if [[ -n "${DATABRICKS_RUNTIME_VERSION}" ]]; then - export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion="${DATABRICKS_RUNTIME_VERSION}." - elif [[ -f $DBR_VERSION || -f $DB_DEPLOY_CONF ]]; then - DB_VER="$(< ${DBR_VERSION})." || \ - DB_VER=$(grep spark.databricks.clusterUsageTags.sparkVersion $DB_DEPLOY_CONF | sed -e 's/.*"\(.*\)".*/\1/') - # if we did not error out on reads we should have at least four characters "x.y." - if (( ${#DB_VER} < 4 )); then - echo >&2 "Unable to determine Databricks version, unexpected length of: ${DB_VER}" - exit 1 - fi - export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion=$DB_VER - else - cat << EOF -This node does not define -- DATABRICKS_RUNTIME_VERSION environment, -- Files containing version information: $DBR_VERSION, $DB_DEPLOY_CONF - -Proceeding assuming a non-Databricks environment. -EOF - - fi # Set spark.task.maxFailures for most schedulers. # diff --git a/jenkins/databricks/common_vars.sh b/jenkins/databricks/common_vars.sh index 805eb989c53..4eb868eca1f 100644 --- a/jenkins/databricks/common_vars.sh +++ b/jenkins/databricks/common_vars.sh @@ -21,7 +21,52 @@ if [ -n "$EXTRA_ENVS" ]; then fi SPARK_VER=${SPARK_VER:-$(< /databricks/spark/VERSION)} -export SPARK_SHIM_VER=${SPARK_SHIM_VER:-spark${SPARK_VER//.}db} + + +# Extract Databricks version from deployed configs. +# spark.databricks.clusterUsageTags.sparkVersion is set automatically on Databricks +# notebooks but not when running Spark manually. +# +# At the OS level the DBR version can be obtailed via +# 1. DATABRICKS_RUNTIME_VERSION environment set by Databricks, e.g., 11.3 +# 2. File at /databricks/DBR_VERSION created by Databricks, e.g., 11.3 +# 3. The value for Spark conf in file /databricks/common/conf/deploy.conf created by Databricks, +# e.g. 11.3.x-gpu-ml-scala2.12 +# +# For cases 1 and 2 append '.' for version matching in 3XYdb SparkShimServiceProvider +# +DBR_VERSION=/databricks/DBR_VERSION +DB_DEPLOY_CONF=/databricks/common/conf/deploy.conf +if [[ -n "${DATABRICKS_RUNTIME_VERSION}" ]]; then + export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion="${DATABRICKS_RUNTIME_VERSION}." +elif [[ -f $DBR_VERSION || -f $DB_DEPLOY_CONF ]]; then + DB_VER="$(< ${DBR_VERSION})." || \ + DB_VER=$(grep spark.databricks.clusterUsageTags.sparkVersion $DB_DEPLOY_CONF | sed -e 's/.*"\(.*\)".*/\1/') + # if we did not error out on reads we should have at least four characters "x.y." + if (( ${#DB_VER} < 4 )); then + echo >&2 "Unable to determine Databricks version, unexpected length of: ${DB_VER}" + exit 1 + fi + export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion=$DB_VER +else + cat << EOF +This node does not define +- DATABRICKS_RUNTIME_VERSION environment, +- Files containing version information: $DBR_VERSION, $DB_DEPLOY_CONF + +Proceeding assuming a non-Databricks environment. +EOF + +fi + +# TODO make this standard going forward +if [[ "$SPARK_VER" == '3.5.0' ]]; then + DB_VER_SUFFIX="${PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion//./}" +else + DB_VER_SUFFIX="" +fi + +export SPARK_SHIM_VER=${SPARK_SHIM_VER:-"spark${SPARK_VER//.}db${DB_VER_SUFFIX}"} # Setup SPARK_HOME if need if [[ -z "$SPARK_HOME" ]]; then From accde811641b0cce59402b58d4b15fd1e8701863 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 7 Feb 2025 08:02:41 +0000 Subject: [PATCH 2/2] Update copyright Signed-off-by: Gera Shegalov --- jenkins/databricks/common_vars.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/databricks/common_vars.sh b/jenkins/databricks/common_vars.sh index 4eb868eca1f..37fbc4e1124 100644 --- a/jenkins/databricks/common_vars.sh +++ b/jenkins/databricks/common_vars.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.