diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index 956c62794bc43..ad945da81baa6 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -282,6 +282,11 @@ "`` should be one the values from PandasUDFType, got " ] }, + "INVALID_RETURN_TYPE_FOR_ARROW_UDF": { + "message": [ + "Grouped and Cogrouped map Arrow UDF should return StructType for , got ." + ] + }, "INVALID_RETURN_TYPE_FOR_PANDAS_UDF": { "message": [ "Pandas UDF should return StructType for , got ." diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py index 33bbb0a2fd24b..9ffdbb2187112 100644 --- a/python/pyspark/sql/udf.py +++ b/python/pyspark/sql/udf.py @@ -288,10 +288,12 @@ def returnType(self) -> DataType: }, ) else: - raise TypeError( - "Invalid return type for grouped map Arrow " - "UDFs or at groupby.applyInArrow: return type must be a " - "StructType." + raise PySparkTypeError( + error_class="INVALID_RETURN_TYPE_FOR_ARROW_UDF", + message_parameters={ + "eval_type": "SQL_GROUPED_MAP_ARROW_UDF", + "return_type": str(self._returnType_placeholder), + }, ) elif self.evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF: if isinstance(self._returnType_placeholder, StructType): @@ -326,9 +328,12 @@ def returnType(self) -> DataType: }, ) else: - raise TypeError( - "Invalid return type in cogroup.applyInArrow: " - "return type must be a StructType." + raise PySparkTypeError( + error_class="INVALID_RETURN_TYPE_FOR_ARROW_UDF", + message_parameters={ + "eval_type": "SQL_COGROUPED_MAP_ARROW_UDF", + "return_type": str(self._returnType_placeholder), + }, ) elif self.evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF: try: