Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use 350db143 jars in integration tests on [databricks] 14.3 #12082

Merged
merged 3 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 0 additions & 35 deletions integration_tests/run_pyspark_from_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -287,41 +287,6 @@ else

export PYSP_TEST_spark_hadoop_hive_exec_scratchdir="$RUN_DIR/hive"

# Extract Databricks version from deployed configs.
# spark.databricks.clusterUsageTags.sparkVersion is set automatically on Databricks
# notebooks but not when running Spark manually.
#
# At the OS level the DBR version can be obtailed via
# 1. DATABRICKS_RUNTIME_VERSION environment set by Databricks, e.g., 11.3
# 2. File at /databricks/DBR_VERSION created by Databricks, e.g., 11.3
# 3. The value for Spark conf in file /databricks/common/conf/deploy.conf created by Databricks,
# e.g. 11.3.x-gpu-ml-scala2.12
#
# For cases 1 and 2 append '.' for version matching in 3XYdb SparkShimServiceProvider
#
DBR_VERSION=/databricks/DBR_VERSION
DB_DEPLOY_CONF=/databricks/common/conf/deploy.conf
if [[ -n "${DATABRICKS_RUNTIME_VERSION}" ]]; then
export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion="${DATABRICKS_RUNTIME_VERSION}."
elif [[ -f $DBR_VERSION || -f $DB_DEPLOY_CONF ]]; then
DB_VER="$(< ${DBR_VERSION})." || \
DB_VER=$(grep spark.databricks.clusterUsageTags.sparkVersion $DB_DEPLOY_CONF | sed -e 's/.*"\(.*\)".*/\1/')
# if we did not error out on reads we should have at least four characters "x.y."
if (( ${#DB_VER} < 4 )); then
echo >&2 "Unable to determine Databricks version, unexpected length of: ${DB_VER}"
exit 1
fi
export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion=$DB_VER
else
cat << EOF
This node does not define
- DATABRICKS_RUNTIME_VERSION environment,
- Files containing version information: $DBR_VERSION, $DB_DEPLOY_CONF

Proceeding assuming a non-Databricks environment.
EOF

fi

# Set spark.task.maxFailures for most schedulers.
#
Expand Down
49 changes: 47 additions & 2 deletions jenkins/databricks/common_vars.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,52 @@ if [ -n "$EXTRA_ENVS" ]; then
fi

SPARK_VER=${SPARK_VER:-$(< /databricks/spark/VERSION)}
export SPARK_SHIM_VER=${SPARK_SHIM_VER:-spark${SPARK_VER//.}db}


# Extract Databricks version from deployed configs.
# spark.databricks.clusterUsageTags.sparkVersion is set automatically on Databricks
# notebooks but not when running Spark manually.
#
# At the OS level the DBR version can be obtailed via
# 1. DATABRICKS_RUNTIME_VERSION environment set by Databricks, e.g., 11.3
# 2. File at /databricks/DBR_VERSION created by Databricks, e.g., 11.3
# 3. The value for Spark conf in file /databricks/common/conf/deploy.conf created by Databricks,
# e.g. 11.3.x-gpu-ml-scala2.12
#
# For cases 1 and 2 append '.' for version matching in 3XYdb SparkShimServiceProvider
#
DBR_VERSION=/databricks/DBR_VERSION
DB_DEPLOY_CONF=/databricks/common/conf/deploy.conf
if [[ -n "${DATABRICKS_RUNTIME_VERSION}" ]]; then
export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion="${DATABRICKS_RUNTIME_VERSION}."
elif [[ -f $DBR_VERSION || -f $DB_DEPLOY_CONF ]]; then
DB_VER="$(< ${DBR_VERSION})." || \
DB_VER=$(grep spark.databricks.clusterUsageTags.sparkVersion $DB_DEPLOY_CONF | sed -e 's/.*"\(.*\)".*/\1/')
# if we did not error out on reads we should have at least four characters "x.y."
if (( ${#DB_VER} < 4 )); then
echo >&2 "Unable to determine Databricks version, unexpected length of: ${DB_VER}"
exit 1
fi
export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion=$DB_VER
else
cat << EOF
This node does not define
- DATABRICKS_RUNTIME_VERSION environment,
- Files containing version information: $DBR_VERSION, $DB_DEPLOY_CONF

Proceeding assuming a non-Databricks environment.
EOF

fi

# TODO make this standard going forward
if [[ "$SPARK_VER" == '3.5.0' ]]; then
DB_VER_SUFFIX="${PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion//./}"
else
DB_VER_SUFFIX=""
fi

export SPARK_SHIM_VER=${SPARK_SHIM_VER:-"spark${SPARK_VER//.}db${DB_VER_SUFFIX}"}

# Setup SPARK_HOME if need
if [[ -z "$SPARK_HOME" ]]; then
Expand Down