From fa515aef13dceb63588a7d4c291650ec61b48f48 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Wed, 27 Dec 2023 23:28:32 +0800 Subject: [PATCH] empty string to 0 in lower version Signed-off-by: Haoyang Li --- .../apache/spark/sql/rapids/shims/GpuAscii.scala | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala index 52b75e178f4..2c5224bfb55 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala @@ -44,8 +44,20 @@ case class GpuAscii(child: Expression) extends GpuUnaryExpression with ImplicitC override def inputTypes: Seq[AbstractDataType] = Seq(StringType) override def doColumnar(input: GpuColumnVector): ColumnVector = { + val emptyMask = withResource(Scalar.fromString("")) { emptyScalar => + input.getBase.equalTo(emptyScalar) + } + val emptyReplaced = withResource(emptyMask) { _ => + // replace empty strings with 'NUL' (which will convert to ascii 0) + withResource(Scalar.fromString('\u0000'.toString)) { zeroScalar => + emptyMask.ifElse(zeroScalar, input.getBase) + } + } // convert to byte lists - val firstBytes = withResource(input.getBase.asByteList) { bytes => + val byteLists = withResource(emptyReplaced) { _ => + emptyReplaced.asByteList() + } + val firstBytes = withResource(byteLists) { bytes => bytes.extractListElement(0) } val firstBytesInt = withResource(firstBytes) { _ =>