From c274c5a1aecd7a03c98e523152ef73fb9afb7b3a Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Mon, 2 Sep 2024 08:06:14 +0200 Subject: [PATCH] [SPARK-49476][SQL] Fix nullability of base64 function ### What changes were proposed in this pull request? Fix the nullability of the `Base64` expression to be based on the child's nullability, and not always be nullable. ### Why are the changes needed? https://github.com/apache/spark/pull/47303 had a side effect of changing the nullability by the switch to using `StaticInvoke`. This was also backported to Spark 3.5.2 and caused schema mismatch errors for stateful streams when we upgraded. This restores the previous behavior which is supported by StaticInvoke through the `returnNullable` argument. If the child is non-nullable, we know the result will be non-nullable. ### Does this PR introduce _any_ user-facing change? Restores the nullability of the `Base64` expression to what is was in Spark 3.5.1 and earlier. ### How was this patch tested? New UT ### Was this patch authored or co-authored using generative AI tooling? No Closes #47941 from Kimahriman/base64-nullability. Lead-authored-by: Adam Binford Co-authored-by: Maxim Gekk Signed-off-by: Max Gekk --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 3 ++- .../sql/catalyst/expressions/StringExpressionsSuite.scala | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 5a30bbdf846a2..6ccd5a451eafc 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -2714,7 +2714,8 @@ case class Base64(child: Expression, chunkBase64: Boolean) dataType, "encode", Seq(child, Literal(chunkBase64, BooleanType)), - Seq(BinaryType, BooleanType)) + Seq(BinaryType, BooleanType), + returnNullable = false) override def toString: String = s"$prettyName($child)" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 7210979f08462..b39820f0d317d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -467,6 +467,13 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val b = $"b".binary.at(0) val bytes = Array[Byte](1, 2, 3, 4) + assert(!Base64(Literal(bytes)).nullable) + assert(Base64(Literal.create(null, BinaryType)).nullable) + assert(Base64(Literal(bytes).castNullable()).nullable) + assert(!UnBase64(Literal("AQIDBA==")).nullable) + assert(UnBase64(Literal.create(null, StringType)).nullable) + assert(UnBase64(Literal("AQIDBA==").castNullable()).nullable) + checkEvaluation(Base64(Literal(bytes)), "AQIDBA==", create_row("abdef")) checkEvaluation(Base64(UnBase64(Literal("AQIDBA=="))), "AQIDBA==", create_row("abdef")) checkEvaluation(Base64(UnBase64(Literal(""))), "", create_row("abdef"))