From b2c8b3069ef4f5288a5964af0da6f6b23a769e6b Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 11 Dec 2024 10:35:36 -0800 Subject: [PATCH] Revert "[SPARK-48898][SQL] Set nullability correctly in the Variant schema" This reverts commit 9394b35ed7bb57a343ab6038dc1c1345db104604. --- .../parquet/SparkShreddingUtils.scala | 5 +--- .../sql/VariantWriteShreddingSuite.scala | 30 ------------------- 2 files changed, 1 insertion(+), 34 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala index 507633abfb285..41244e20c369f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala @@ -81,11 +81,8 @@ case object SparkShreddingUtils { StructField(TypedValueFieldName, arrayShreddingSchema, nullable = true) ) case StructType(fields) => - // The field name level is always non-nullable: Variant null values are represented in the - // "value" columna as "00", and missing values are represented by setting both "value" and - // "typed_value" to null. val objectShreddingSchema = StructType(fields.map(f => - f.copy(dataType = variantShreddingSchema(f.dataType, false), nullable = false))) + f.copy(dataType = variantShreddingSchema(f.dataType, false)))) Seq( StructField(VariantValueFieldName, BinaryType, nullable = true), StructField(TypedValueFieldName, objectShreddingSchema, nullable = true) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala index 9022d8cfdca49..a62c6e4462464 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala @@ -67,36 +67,6 @@ class VariantWriteShreddingSuite extends SparkFunSuite with ExpressionEvalHelper private val emptyMetadata: Array[Byte] = parseJson("null").getMetadata - test("variantShreddingSchema") { - // Validate the schema produced by SparkShreddingUtils.variantShreddingSchema for a few simple - // cases. - // metadata is always non-nullable. - assert(SparkShreddingUtils.variantShreddingSchema(IntegerType) == - StructType(Seq( - StructField("metadata", BinaryType, nullable = false), - StructField("value", BinaryType, nullable = true), - StructField("typed_value", IntegerType, nullable = true)))) - - val fieldA = StructType(Seq( - StructField("value", BinaryType, nullable = true), - StructField("typed_value", TimestampNTZType, nullable = true))) - val arrayType = ArrayType(StructType(Seq( - StructField("value", BinaryType, nullable = true), - StructField("typed_value", StringType, nullable = true)))) - val fieldB = StructType(Seq( - StructField("value", BinaryType, nullable = true), - StructField("typed_value", arrayType, nullable = true))) - val objectType = StructType(Seq( - StructField("a", fieldA, nullable = false), - StructField("b", fieldB, nullable = false))) - val structSchema = DataType.fromDDL("a timestamp_ntz, b array") - assert(SparkShreddingUtils.variantShreddingSchema(structSchema) == - StructType(Seq( - StructField("metadata", BinaryType, nullable = false), - StructField("value", BinaryType, nullable = true), - StructField("typed_value", objectType, nullable = true)))) - } - test("shredding as fixed numeric types") { /* Cast integer to any wider numeric type. */ testWithSchema("1", IntegerType, Row(emptyMetadata, null, 1))