Skip to content

Commit

Permalink
Make Spark scripts more robust: support preview versions and Spark 4 …
Browse files Browse the repository at this point in the history
…output
  • Loading branch information
mathbunnyru committed Oct 19, 2024
1 parent b6b22e1 commit c4cb04e
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 5 deletions.
14 changes: 11 additions & 3 deletions images/pyspark-notebook/setup_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,18 @@ def get_latest_spark_version() -> str:
for ref in all_refs
if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref
]

# Compare versions semantically
latest_version = max(
stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")]
)
def version_array(ver: str) -> tuple[int, int, int, str]:
# 3.5.3 -> [3, 5, 3, ""]
# 4.0.0-preview2 -> [4, 0, 0, "preview2"]
arr = ver.split(".")
assert len(arr) == 3, arr
major, minor = int(arr[0]), int(arr[1])
patch, _, preview = arr[2].partition("-")
return (major, minor, int(patch), preview)

latest_version = max(stable_versions, key=lambda ver: version_array(ver))
LOGGER.info(f"Latest version: {latest_version}")
return latest_version

Expand Down
8 changes: 6 additions & 2 deletions tagging/taggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,12 @@ def tag_value(container: Container) -> str:
SPARK_VERSION_LINE_PREFIX = r" /___/ .__/\_,_/_/ /_/\_\ version"

spark_version = _get_program_version(container, "spark-submit")
version_line = spark_version.split("\n")[4]
assert version_line.startswith(SPARK_VERSION_LINE_PREFIX)
version_line = next(
filter(
lambda line: line.startswith(SPARK_VERSION_LINE_PREFIX),
spark_version.split("\n"),
)
)
return "spark-" + version_line.split(" ")[-1]


Expand Down

0 comments on commit c4cb04e

Please sign in to comment.