From c4cb04ec370c1a520120c1f629d0b63ef730ae62 Mon Sep 17 00:00:00 2001 From: Ayaz Salikhov Date: Sat, 19 Oct 2024 19:08:04 +0100 Subject: [PATCH] Make Spark scripts more robust: support preview versions and Spark 4 output --- images/pyspark-notebook/setup_spark.py | 14 +++++++++++--- tagging/taggers.py | 8 ++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/images/pyspark-notebook/setup_spark.py b/images/pyspark-notebook/setup_spark.py index a494b8322a..266f2f7885 100755 --- a/images/pyspark-notebook/setup_spark.py +++ b/images/pyspark-notebook/setup_spark.py @@ -38,10 +38,18 @@ def get_latest_spark_version() -> str: for ref in all_refs if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref ] + # Compare versions semantically - latest_version = max( - stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")] - ) + def version_array(ver: str) -> tuple[int, int, int, str]: + # 3.5.3 -> [3, 5, 3, ""] + # 4.0.0-preview2 -> [4, 0, 0, "preview2"] + arr = ver.split(".") + assert len(arr) == 3, arr + major, minor = int(arr[0]), int(arr[1]) + patch, _, preview = arr[2].partition("-") + return (major, minor, int(patch), preview) + + latest_version = max(stable_versions, key=lambda ver: version_array(ver)) LOGGER.info(f"Latest version: {latest_version}") return latest_version diff --git a/tagging/taggers.py b/tagging/taggers.py index a42c9d053c..0f53827ee8 100644 --- a/tagging/taggers.py +++ b/tagging/taggers.py @@ -122,8 +122,12 @@ def tag_value(container: Container) -> str: SPARK_VERSION_LINE_PREFIX = r" /___/ .__/\_,_/_/ /_/\_\ version" spark_version = _get_program_version(container, "spark-submit") - version_line = spark_version.split("\n")[4] - assert version_line.startswith(SPARK_VERSION_LINE_PREFIX) + version_line = next( + filter( + lambda line: line.startswith(SPARK_VERSION_LINE_PREFIX), + spark_version.split("\n"), + ) + ) return "spark-" + version_line.split(" ")[-1]