From 265b1e592eb979613288b34dfc132e5edfc70964 Mon Sep 17 00:00:00 2001
From: "Hongbin Ma (Mahone)" <mahongbin@apache.org>
Date: Tue, 21 May 2024 13:50:31 +0800
Subject: [PATCH 1/4] refine UT framework to promote GPU evaluation

Signed-off-by: Hongbin Ma (Mahone) <mahongbin@apache.org>
---
 .../com/nvidia/spark/rapids/RapidsConf.scala  |   9 ++
 .../com/nvidia/spark/rapids/RapidsMeta.scala  |   2 +-
 .../suites/RapidsJsonExpressionsSuite.scala   |  42 +++++++
 .../utils/RapidsSQLTestsBaseTrait.scala       |   2 +
 .../sql/rapids/utils/RapidsTestSettings.scala |  31 ++---
 .../sql/rapids/utils/RapidsTestsTrait.scala   | 119 ++++++------------
 6 files changed, 111 insertions(+), 94 deletions(-)
 create mode 100644 tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonExpressionsSuite.scala

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index 6dd0441f76f..f7ff38cb193 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -1485,6 +1485,13 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern")
     .stringConf
     .createWithDefault(false.toString)
 
+  val FOLDABLE_NON_LIT_ALLOWED = conf("spark.rapids.sql.test.isFoldableNonLitAllowed")
+    .doc("Only to be used in tests. If `true` the foldable expressions that are not literals " +
+      "will be allowed")
+    .internal()
+    .booleanConf
+    .createWithDefault(false)
+
   val TEST_CONF = conf("spark.rapids.sql.test.enabled")
     .doc("Intended to be used by unit tests, if enabled all operations must run on the " +
       "GPU or an error happens.")
@@ -2428,6 +2435,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val isTestEnabled: Boolean = get(TEST_CONF)
 
+  lazy val isFoldableNonLitAllowed: Boolean = get(FOLDABLE_NON_LIT_ALLOWED)
+
   /**
    * Convert a string value to the injection configuration OomInjection.
    *
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala
index 5cb170968a6..a876ea6c9e0 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala
@@ -1108,7 +1108,7 @@ abstract class BaseExprMeta[INPUT <: Expression](
     case _ => ExpressionContext.getRegularOperatorContext(this)
   }
 
-  val isFoldableNonLitAllowed: Boolean = false
+  val isFoldableNonLitAllowed: Boolean = conf.isFoldableNonLitAllowed
 
   // There are 4 levels of timezone check in GPU plan tag phase:
   //    Level 1: Check whether an expression is related to timezone. This is achieved by
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonExpressionsSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonExpressionsSuite.scala
new file mode 100644
index 00000000000..2e0c7528eba
--- /dev/null
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonExpressionsSuite.scala
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "330"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.suites
+
+import org.apache.spark.sql.catalyst.expressions.JsonExpressionsSuite
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.rapids.utils.RapidsTestsTrait
+
+class RapidsJsonExpressionsSuite extends JsonExpressionsSuite with RapidsTestsTrait {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonTuple", "true")
+    SQLConf.get.setConfString("spark.rapids.sql.expression.GetJsonObject", "true")
+    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonToStructs", "true")
+    SQLConf.get.setConfString("spark.rapids.sql.expression.StructsToJson", "true")
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonTuple")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.GetJsonObject")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonToStructs")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.StructsToJson")
+  }
+}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala
index 540c70a2ee1..e384a0c1ea5 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala
@@ -121,6 +121,8 @@ object RapidsSQLTestsBaseTrait {
         "org.apache.spark.sql.rapids.ExecutionPlanCaptureCallback")
       .set("spark.sql.warehouse.dir", warehouse)
       .set("spark.sql.cache.serializer", "com.nvidia.spark.ParquetCachedBatchSerializer")
+      .set("spark.sql.session.timeZone", "UTC")
+      .set("spark.rapids.sql.explain", "ALL")
       .setAppName("rapids spark plugin running Vanilla Spark UT")
 
     conf
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index 4faede8269a..ca15b0f0259 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -19,7 +19,7 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.utils
 
-import org.apache.spark.sql.rapids.suites.{RapidsCastSuite, RapidsDataFrameAggregateSuite, RapidsJsonFunctionsSuite, RapidsJsonSuite, RapidsMathFunctionsSuite, RapidsRegexpExpressionsSuite, RapidsStringExpressionsSuite, RapidsStringFunctionsSuite}
+import org.apache.spark.sql.rapids.suites.{RapidsCastSuite, RapidsDataFrameAggregateSuite, RapidsJsonExpressionsSuite, RapidsJsonFunctionsSuite, RapidsJsonSuite, RapidsMathFunctionsSuite, RapidsRegexpExpressionsSuite, RapidsStringExpressionsSuite, RapidsStringFunctionsSuite}
 
 // Some settings' line length exceeds 100
 // scalastyle:off line.size.limit
@@ -41,6 +41,22 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("SPARK-17641: collect functions should not collect null values", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772"))
     .exclude("SPARK-19471: AggregationIterator does not initialize the generated result projection before using it", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772"))
     .exclude("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10801"))
+  enableSuite[RapidsJsonExpressionsSuite]
+    .exclude("from_json - invalid data", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("from_json - input=empty array, schema=struct, output=single row with null", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("from_json - input=empty object, schema=struct, output=single row with null", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("SPARK-20549: from_json bad UTF-8", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("from_json with timestamp", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("to_json - array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("to_json - array with single empty row", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("to_json - empty array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("to_json with timestamp", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("SPARK-21513: to_json support map[string, struct] to json", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("SPARK-21513: to_json support map[struct, struct] to json", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("SPARK-21513: to_json support map[string, integer] to json", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("to_json - array with maps", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("to_json - array with single map", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
+    .exclude("from_json missing fields", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
   enableSuite[RapidsJsonFunctionsSuite]
   enableSuite[RapidsJsonSuite]
     .exclude("Casting long as timestamp", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
@@ -58,24 +74,11 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("SPARK-37360: Timestamp type inference for a mix of TIMESTAMP_NTZ and TIMESTAMP_LTZ", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
   enableSuite[RapidsMathFunctionsSuite]
   enableSuite[RapidsRegexpExpressionsSuite]
-    .exclude("RegexReplace", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10774"))
-    .exclude("RegexExtract", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10774"))
-    .exclude("RegexExtractAll", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10774"))
-    .exclude("SPLIT", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10774"))
   enableSuite[RapidsStringExpressionsSuite]
     .exclude("SPARK-22498: Concat should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("SPARK-22549: ConcatWs should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("SPARK-22550: Elt should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
-    .exclude("StringComparison", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
-    .exclude("Substring", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
-    .exclude("ascii for string", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
-    .exclude("base64/unbase64 for string", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
-    .exclude("encode/decode for string", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("SPARK-22603: FormatString should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
-    .exclude("LOCATE", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
-    .exclude("LPAD/RPAD", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
-    .exclude("REPEAT", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
-    .exclude("length for string / binary", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("ParseUrl", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
   enableSuite[RapidsStringFunctionsSuite]
 }
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
index 08e10f4d4fd..1f8a775825a 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
@@ -20,28 +20,29 @@ spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.utils
 
 import java.io.File
+import java.util.TimeZone
 
 import com.nvidia.spark.rapids.{GpuProjectExec, TestStats}
 import org.apache.commons.io.{FileUtils => fu}
 import org.apache.commons.math3.util.Precision
 import org.scalactic.TripleEqualsSupport.Spread
 import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.sql.{Column, DataFrame, Row, SparkSession}
+import org.apache.spark.sql.{Column, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.ResolveTimeZone
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
-import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, ConvertToLocalRelation, NullPropagation}
+import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, ConvertToLocalRelation}
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData, TypeUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.utils.RapidsQueryTestUtil.isNaNOrInf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
+
 
 trait RapidsTestsTrait extends RapidsTestsCommonTrait {
 
+  val originalTimeZone = TimeZone.getDefault
+
   override def beforeAll(): Unit = {
     // prepare working paths
     val basePathDir = new File(basePath)
@@ -54,9 +55,12 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
     super.beforeAll()
     initializeSession()
     _spark.sparkContext.setLogLevel("WARN")
+    TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
   }
 
   override def afterAll(): Unit = {
+    TimeZone.setDefault(originalTimeZone)
+
     try {
       super.afterAll()
     } finally {
@@ -91,12 +95,17 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
         .config(
           SQLConf.OPTIMIZER_EXCLUDED_RULES.key,
           ConvertToLocalRelation.ruleName +
-            "," + ConstantFolding.ruleName + "," + NullPropagation.ruleName)
+            "," + ConstantFolding.ruleName)
         .config("spark.rapids.sql.enabled", "true")
         .config("spark.plugins", "com.nvidia.spark.SQLPlugin")
         .config("spark.sql.queryExecutionListeners",
           "org.apache.spark.sql.rapids.ExecutionPlanCaptureCallback")
         .config("spark.sql.warehouse.dir", warehouse)
+        .config("spark.sql.session.timeZone","UTC")
+        .config("spark.rapids.sql.explain", "ALL")
+        .config("spark.rapids.sql.test.isFoldableNonLitAllowed", "true")
+        // uncomment below config to run `strict mode`, where fallback to CPU is treated as fail
+        // .config("spark.rapids.sql.test.enabled", "true")
         .appName("rapids spark plugin running Vanilla Spark UT")
 
       _spark = sparkBuilder
@@ -115,31 +124,20 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
     val expr = resolver.resolveTimeZones(expression)
     assert(expr.resolved)
 
-    if (canConvertToDataFrame(inputRow)) {
-      rapidsCheckExpression(expr, expected, inputRow)
-    } else {
-      logWarning(s"The status of this unit test is not guaranteed.")
-      val catalystValue = CatalystTypeConverters.convertToCatalyst(expected)
-      checkEvaluationWithoutCodegen(expr, catalystValue, inputRow)
-      checkEvaluationWithMutableProjection(expr, catalystValue, inputRow)
-      if (GenerateUnsafeProjection.canSupport(expr.dataType)) {
-        checkEvaluationWithUnsafeProjection(expr, catalystValue, inputRow)
-      }
-      checkEvaluationWithOptimization(expr, catalystValue, inputRow)
-    }
+    rapidsCheckExpression(expr, expected, inputRow)
   }
 
   /**
    * Sort map data by key and return the sorted key array and value array.
    *
    * @param input
-   *   : input map data.
+   * : input map data.
    * @param kt
-   *   : key type.
+   * : key type.
    * @param vt
-   *   : value type.
+   * : value type.
    * @return
-   *   the sorted key array and value array.
+   * the sorted key array and value array.
    */
   private def getSortedArrays(
       input: MapData,
@@ -202,7 +200,7 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
       case (result: Double, expected: Double) =>
         if (
           (isNaNOrInf(result) || isNaNOrInf(expected))
-          || (result == -0.0) || (expected == -0.0)
+            || (result == -0.0) || (expected == -0.0)
         ) {
           java.lang.Double.doubleToRawLongBits(result) ==
             java.lang.Double.doubleToRawLongBits(expected)
@@ -221,20 +219,23 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
     RapidsTestConstants.SUPPORTED_DATA_TYPE.acceptsType(expr.dataType)
   }
 
-  def rapidsCheckExpression(expression: Expression, expected: Any, inputRow: InternalRow): Unit = {
-    val df = if (inputRow != EmptyRow && inputRow != InternalRow.empty) {
-      convertInternalRowToDataFrame(inputRow)
-    } else {
-      val schema = StructType(StructField("a", IntegerType, nullable = true) :: Nil)
-      val empData = Seq(Row(1))
-      _spark.createDataFrame(_spark.sparkContext.parallelize(empData), schema)
+  def rapidsCheckExpression(origExpr: Expression, expected: Any, inputRow: InternalRow): Unit = {
+    // many of of the expressions in RAPIDS do not support
+    // vectorized parameters. (e.g. regexp_replace)
+    // So we downgrade all expression
+    // evaluation to use scalar parameters.
+    // In a follow-up issue we'll take care of the expressions
+    // those already support vectorized paramters.
+    val expression = origExpr.transformUp {
+      case BoundReference(ordinal, dataType, _) =>
+        Literal(inputRow.asInstanceOf[GenericInternalRow].get(ordinal, dataType), dataType)
     }
-    val resultDF = df.select(Column(expression))
+    val resultDF = _spark.range(0, 1).select(Column(expression))
     val result = resultDF.collect()
     TestStats.testUnitNumber = TestStats.testUnitNumber + 1
     if (
       checkDataTypeSupported(expression) &&
-      expression.children.forall(checkDataTypeSupported)
+        expression.children.forall(checkDataTypeSupported)
     ) {
       val projectTransformer = resultDF.queryExecution.executedPlan.collect {
         case p: GpuProjectExec => p
@@ -254,13 +255,13 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
     if (
       !(checkResult(result.head.get(0), expected, expression.dataType, expression.nullable)
         || checkResult(
-          CatalystTypeConverters.createToCatalystConverter(expression.dataType)(
-            result.head.get(0)
-          ), // decimal precision is wrong from value
-          CatalystTypeConverters.convertToCatalyst(expected),
-          expression.dataType,
-          expression.nullable
-        ))
+        CatalystTypeConverters.createToCatalystConverter(expression.dataType)(
+          result.head.get(0)
+        ), // decimal precision is wrong from value
+        CatalystTypeConverters.convertToCatalyst(expected),
+        expression.dataType,
+        expression.nullable
+      ))
     ) {
       val input = if (inputRow == EmptyRow) "" else s", input: $inputRow"
       fail(
@@ -292,44 +293,4 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
     }
     true
   }
-
-  def convertInternalRowToDataFrame(inputRow: InternalRow): DataFrame = {
-    val structFileSeq = new ArrayBuffer[StructField]()
-    val values = inputRow match {
-      case genericInternalRow: GenericInternalRow =>
-        genericInternalRow.values
-      case _ => throw new UnsupportedOperationException("Unsupported InternalRow.")
-    }
-    values.foreach {
-      case boolean: java.lang.Boolean =>
-        structFileSeq.append(StructField("bool", BooleanType, boolean == null))
-      case short: java.lang.Short =>
-        structFileSeq.append(StructField("i16", ShortType, short == null))
-      case byte: java.lang.Byte =>
-        structFileSeq.append(StructField("i8", ByteType, byte == null))
-      case integer: java.lang.Integer =>
-        structFileSeq.append(StructField("i32", IntegerType, integer == null))
-      case long: java.lang.Long =>
-        structFileSeq.append(StructField("i64", LongType, long == null))
-      case float: java.lang.Float =>
-        structFileSeq.append(StructField("fp32", FloatType, float == null))
-      case double: java.lang.Double =>
-        structFileSeq.append(StructField("fp64", DoubleType, double == null))
-      case utf8String: UTF8String =>
-        structFileSeq.append(StructField("str", StringType, utf8String == null))
-      case byteArr: Array[Byte] =>
-        structFileSeq.append(StructField("vbin", BinaryType, byteArr == null))
-      case decimal: Decimal =>
-        structFileSeq.append(
-          StructField("dec", DecimalType(decimal.precision, decimal.scale), decimal == null))
-      case null =>
-        structFileSeq.append(StructField("null", IntegerType, nullable = true))
-      case unsupported @ _ =>
-        throw new UnsupportedOperationException(s"Unsupported type: ${unsupported.getClass}")
-    }
-    val fields = structFileSeq.toSeq
-    _spark.internalCreateDataFrame(
-      _spark.sparkContext.parallelize(Seq(inputRow)),
-      StructType(fields))
-  }
 }

From 6780184a9fff54e418385756f8ff238d506b728b Mon Sep 17 00:00:00 2001
From: "Hongbin Ma (Mahone)" <mahongbin@apache.org>
Date: Tue, 21 May 2024 15:07:14 +0800
Subject: [PATCH 2/4] enable some exprs for json

Signed-off-by: Hongbin Ma (Mahone) <mahongbin@apache.org>
---
 .../suites/RapidsJsonFunctionsSuite.scala     | 19 ++++++++++++++++++-
 .../sql/rapids/suites/RapidsJsonSuite.scala   | 16 ++++++++++++++++
 .../utils/RapidsSQLTestsBaseTrait.scala       |  4 ++++
 .../sql/rapids/utils/RapidsTestSettings.scala |  5 +++++
 .../sql/rapids/utils/RapidsTestsTrait.scala   |  2 ++
 5 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala
index 43150c0df4b..40087aeaad9 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala
@@ -20,6 +20,23 @@ spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
 import org.apache.spark.sql.JsonFunctionsSuite
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsTrait
 
-class RapidsJsonFunctionsSuite extends JsonFunctionsSuite with RapidsSQLTestsTrait {}
+class RapidsJsonFunctionsSuite extends JsonFunctionsSuite with RapidsSQLTestsTrait {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonTuple", "true")
+    SQLConf.get.setConfString("spark.rapids.sql.expression.GetJsonObject", "true")
+    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonToStructs", "true")
+    SQLConf.get.setConfString("spark.rapids.sql.expression.StructsToJson", "true")
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonTuple")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.GetJsonObject")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonToStructs")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.StructsToJson")
+  }
+}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala
index 6d244c67ad0..32203f708f9 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala
@@ -31,6 +31,22 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class RapidsJsonSuite extends JsonSuite with RapidsSQLTestsBaseTrait {
 
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonTuple", "true")
+    SQLConf.get.setConfString("spark.rapids.sql.expression.GetJsonObject", "true")
+    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonToStructs", "true")
+    SQLConf.get.setConfString("spark.rapids.sql.expression.StructsToJson", "true")
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonTuple")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.GetJsonObject")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonToStructs")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.StructsToJson")
+  }
+
   /** Returns full path to the given file in the resource folder */
   override protected def testFile(fileName: String): String = {
     getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + "/" + fileName
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala
index e384a0c1ea5..6529ef42d00 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala
@@ -123,6 +123,10 @@ object RapidsSQLTestsBaseTrait {
       .set("spark.sql.cache.serializer", "com.nvidia.spark.ParquetCachedBatchSerializer")
       .set("spark.sql.session.timeZone", "UTC")
       .set("spark.rapids.sql.explain", "ALL")
+      // uncomment below config to run `strict mode`, where fallback to CPU is treated as fail
+      // .set("spark.rapids.sql.test.enabled", "true")
+      // .set("spark.rapids.sql.test.allowedNonGpu",
+      // "SerializeFromObjectExec,DeserializeToObjectExec,ExternalRDDScanExec")
       .setAppName("rapids spark plugin running Vanilla Spark UT")
 
     conf
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index ca15b0f0259..40ee67a4c5c 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -58,6 +58,11 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("to_json - array with single map", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
     .exclude("from_json missing fields", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
   enableSuite[RapidsJsonFunctionsSuite]
+    .exclude("from_json invalid json", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10852"))
+    .exclude("to_json - array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10852"))
+    .exclude("to_json - map", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10852"))
+    .exclude("to_json - array of primitive types", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10852"))
+    .exclude("SPARK-33134: return partial results only for root JSON objects", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10852"))
   enableSuite[RapidsJsonSuite]
     .exclude("Casting long as timestamp", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
     .exclude("Write timestamps correctly with timestampFormat option and timeZone option", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
index 1f8a775825a..44b791124c2 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
@@ -106,6 +106,8 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
         .config("spark.rapids.sql.test.isFoldableNonLitAllowed", "true")
         // uncomment below config to run `strict mode`, where fallback to CPU is treated as fail
         // .config("spark.rapids.sql.test.enabled", "true")
+        // .config("spark.rapids.sql.test.allowedNonGpu",
+        // "SerializeFromObjectExec,DeserializeToObjectExec,ExternalRDDScanExec")
         .appName("rapids spark plugin running Vanilla Spark UT")
 
       _spark = sparkBuilder

From e900d50b1ba66d71b2c74700a1b112d908498513 Mon Sep 17 00:00:00 2001
From: "Hongbin Ma (Mahone)" <mahongbin@apache.org>
Date: Tue, 21 May 2024 16:43:38 +0800
Subject: [PATCH 3/4] exclude flaky tests

Signed-off-by: Hongbin Ma (Mahone) <mahongbin@apache.org>
---
 .../org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index 40ee67a4c5c..d0d67c41f0e 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -80,6 +80,9 @@ class RapidsTestSettings extends BackendTestSettings {
   enableSuite[RapidsMathFunctionsSuite]
   enableSuite[RapidsRegexpExpressionsSuite]
   enableSuite[RapidsStringExpressionsSuite]
+    .exclude("concat", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
+    .exclude("string substring_index function", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
+    .exclude("format_number / FormatNumber", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("SPARK-22498: Concat should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("SPARK-22549: ConcatWs should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("SPARK-22550: Elt should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))

From 7237cb601ff9e8097deab6e95c968f6cb4c863a7 Mon Sep 17 00:00:00 2001
From: "Hongbin Ma (Mahone)" <mahongbin@apache.org>
Date: Wed, 22 May 2024 11:51:42 +0800
Subject: [PATCH 4/4] fix review comments

Signed-off-by: Hongbin Ma (Mahone) <mahongbin@apache.org>
---
 .../suites/RapidsJsonExpressionsSuite.scala   | 22 ++--------
 .../suites/RapidsJsonFunctionsSuite.scala     | 22 ++--------
 .../sql/rapids/suites/RapidsJsonSuite.scala   | 22 ++--------
 .../rapids/utils/RapidsJsonConfTrait.scala    | 42 +++++++++++++++++++
 .../sql/rapids/utils/RapidsTestsTrait.scala   | 10 ++---
 5 files changed, 55 insertions(+), 63 deletions(-)
 create mode 100644 tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsJsonConfTrait.scala

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonExpressionsSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonExpressionsSuite.scala
index 2e0c7528eba..eb5fdc535e8 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonExpressionsSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonExpressionsSuite.scala
@@ -20,23 +20,7 @@ spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
 import org.apache.spark.sql.catalyst.expressions.JsonExpressionsSuite
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.rapids.utils.RapidsTestsTrait
+import org.apache.spark.sql.rapids.utils.{RapidsJsonConfTrait, RapidsTestsTrait}
 
-class RapidsJsonExpressionsSuite extends JsonExpressionsSuite with RapidsTestsTrait {
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonTuple", "true")
-    SQLConf.get.setConfString("spark.rapids.sql.expression.GetJsonObject", "true")
-    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonToStructs", "true")
-    SQLConf.get.setConfString("spark.rapids.sql.expression.StructsToJson", "true")
-  }
-
-  override def afterAll(): Unit = {
-    super.afterAll()
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonTuple")
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.GetJsonObject")
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonToStructs")
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.StructsToJson")
-  }
-}
+class RapidsJsonExpressionsSuite
+  extends JsonExpressionsSuite with RapidsTestsTrait with RapidsJsonConfTrait {}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala
index 40087aeaad9..ebddc498202 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala
@@ -20,23 +20,7 @@ spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
 import org.apache.spark.sql.JsonFunctionsSuite
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.rapids.utils.RapidsSQLTestsTrait
+import org.apache.spark.sql.rapids.utils.{RapidsJsonConfTrait, RapidsSQLTestsTrait}
 
-class RapidsJsonFunctionsSuite extends JsonFunctionsSuite with RapidsSQLTestsTrait {
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonTuple", "true")
-    SQLConf.get.setConfString("spark.rapids.sql.expression.GetJsonObject", "true")
-    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonToStructs", "true")
-    SQLConf.get.setConfString("spark.rapids.sql.expression.StructsToJson", "true")
-  }
-
-  override def afterAll(): Unit = {
-    super.afterAll()
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonTuple")
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.GetJsonObject")
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonToStructs")
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.StructsToJson")
-  }
-}
+class RapidsJsonFunctionsSuite
+  extends JsonFunctionsSuite with RapidsSQLTestsTrait with RapidsJsonConfTrait {}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala
index 32203f708f9..3e9f685dfdc 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala
@@ -24,29 +24,13 @@ import org.apache.spark.sql.execution.datasources.{InMemoryFileIndex, NoopCache}
 import org.apache.spark.sql.execution.datasources.json.JsonSuite
 import org.apache.spark.sql.execution.datasources.v2.json.JsonScanBuilder
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait
+import org.apache.spark.sql.rapids.utils.{RapidsJsonConfTrait, RapidsSQLTestsBaseTrait}
 import org.apache.spark.sql.sources
 import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class RapidsJsonSuite extends JsonSuite with RapidsSQLTestsBaseTrait {
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonTuple", "true")
-    SQLConf.get.setConfString("spark.rapids.sql.expression.GetJsonObject", "true")
-    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonToStructs", "true")
-    SQLConf.get.setConfString("spark.rapids.sql.expression.StructsToJson", "true")
-  }
-
-  override def afterAll(): Unit = {
-    super.afterAll()
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonTuple")
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.GetJsonObject")
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonToStructs")
-    SQLConf.get.unsetConf("spark.rapids.sql.expression.StructsToJson")
-  }
-
+class RapidsJsonSuite
+  extends JsonSuite with RapidsSQLTestsBaseTrait with RapidsJsonConfTrait {
   /** Returns full path to the given file in the resource folder */
   override protected def testFile(fileName: String): String = {
     getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + "/" + fileName
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsJsonConfTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsJsonConfTrait.scala
new file mode 100644
index 00000000000..a1324deb21b
--- /dev/null
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsJsonConfTrait.scala
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "330"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.utils
+
+import org.scalatest.{BeforeAndAfterAll, Suite}
+
+import org.apache.spark.sql.internal.SQLConf
+
+trait RapidsJsonConfTrait extends BeforeAndAfterAll { this: Suite =>
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonTuple", true.toString)
+    SQLConf.get.setConfString("spark.rapids.sql.expression.GetJsonObject", true.toString)
+    SQLConf.get.setConfString("spark.rapids.sql.expression.JsonToStructs", true.toString)
+    SQLConf.get.setConfString("spark.rapids.sql.expression.StructsToJson", true.toString)
+  }
+
+  override def afterAll(): Unit = {
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonTuple")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.GetJsonObject")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.JsonToStructs")
+    SQLConf.get.unsetConf("spark.rapids.sql.expression.StructsToJson")
+    super.afterAll()
+  }
+}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
index 44b791124c2..1e5cf9277e9 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
@@ -222,12 +222,10 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
   }
 
   def rapidsCheckExpression(origExpr: Expression, expected: Any, inputRow: InternalRow): Unit = {
-    // many of of the expressions in RAPIDS do not support
-    // vectorized parameters. (e.g. regexp_replace)
-    // So we downgrade all expression
-    // evaluation to use scalar parameters.
-    // In a follow-up issue we'll take care of the expressions
-    // those already support vectorized paramters.
+    // many of the expressions in RAPIDS do not support vectorized parameters(e.g. regexp_replace).
+    // So we downgrade all expression evaluation to use scalar parameters.
+    // In a follow-up issue (https://github.com/NVIDIA/spark-rapids/issues/10859),
+    // we'll take care of the expressions those already support vectorized parameters.
     val expression = origExpr.transformUp {
       case BoundReference(ordinal, dataType, _) =>
         Literal(inputRow.asInstanceOf[GenericInternalRow].get(ordinal, dataType), dataType)