diff --git a/quinn/dataframe_helpers.py b/quinn/dataframe_helpers.py index d4cc9ad8..687457ff 100644 --- a/quinn/dataframe_helpers.py +++ b/quinn/dataframe_helpers.py @@ -21,7 +21,16 @@ def column_to_list(df: DataFrame, col_name: str) -> list[Any]: :rtype: List[Any] """ pyarrow_kv = ("spark.sql.execution.arrow.pyspark.enabled", "true") + + if "pyspark" not in sys.modules: + raise ImportError + + # sparksession from df is not available in older versions of pyspark + if sys.modules["pyspark"].__version__ < "3.3.0": + return df.select(col_name).rdd.flatMap(lambda x: x).collect() + spark_config = df.sparkSession.sparkContext.getConf().getAll() + pyarrow_enabled: bool = pyarrow_kv in spark_config pyarrow_valid = pyarrow_enabled and sys.modules["pyarrow"] >= "0.17.0"