From bb64705121b7052e0fc837e33dbb0ac7c07fb6a4 Mon Sep 17 00:00:00 2001 From: jeffbrennan Date: Sun, 21 Jan 2024 15:51:42 -0500 Subject: [PATCH] handle pyspark < 3.3.0 --- quinn/dataframe_helpers.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/quinn/dataframe_helpers.py b/quinn/dataframe_helpers.py index d4cc9ad8..687457ff 100644 --- a/quinn/dataframe_helpers.py +++ b/quinn/dataframe_helpers.py @@ -21,7 +21,16 @@ def column_to_list(df: DataFrame, col_name: str) -> list[Any]: :rtype: List[Any] """ pyarrow_kv = ("spark.sql.execution.arrow.pyspark.enabled", "true") + + if "pyspark" not in sys.modules: + raise ImportError + + # sparksession from df is not available in older versions of pyspark + if sys.modules["pyspark"].__version__ < "3.3.0": + return df.select(col_name).rdd.flatMap(lambda x: x).collect() + spark_config = df.sparkSession.sparkContext.getConf().getAll() + pyarrow_enabled: bool = pyarrow_kv in spark_config pyarrow_valid = pyarrow_enabled and sys.modules["pyarrow"] >= "0.17.0"