diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala index 773494f418659..d5668cc721750 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala @@ -47,7 +47,6 @@ private[sql] trait SqlApiConf { def stackTracesInDataFrameContext: Int def dataFrameQueryContextEnabled: Boolean def legacyAllowUntypedScalaUDFs: Boolean - def allowReadingUnknownCollations: Boolean } private[sql] object SqlApiConf { @@ -60,7 +59,6 @@ private[sql] object SqlApiConf { SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY } val DEFAULT_COLLATION: String = SqlApiConfHelper.DEFAULT_COLLATION - val ALLOW_READING_UNKNOWN_COLLATIONS: String = SqlApiConfHelper.ALLOW_READING_UNKNOWN_COLLATIONS def get: SqlApiConf = SqlApiConfHelper.getConfGetter.get()() @@ -89,5 +87,4 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf { override def stackTracesInDataFrameContext: Int = 1 override def dataFrameQueryContextEnabled: Boolean = true override def legacyAllowUntypedScalaUDFs: Boolean = false - override def allowReadingUnknownCollations: Boolean = false } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala index c8d6f395d4506..13ef13e5894e0 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala @@ -33,8 +33,6 @@ private[sql] object SqlApiConfHelper { val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone" val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = "spark.sql.session.localRelationCacheThreshold" val DEFAULT_COLLATION: String = "spark.sql.session.collation.default" - val ALLOW_READING_UNKNOWN_COLLATIONS: String = - "spark.sql.collation.allowReadingUnknownCollations" val confGetter: AtomicReference[() => SqlApiConf] = { new AtomicReference[() => SqlApiConf](() => DefaultSqlApiConf) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala index 4cf7d8efb96a5..036de22b4189a 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -27,7 +27,7 @@ import org.json4s.JsonAST.JValue import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ -import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkThrowable} +import org.apache.spark.{SparkIllegalArgumentException, SparkThrowable} import org.apache.spark.annotation.Stable import org.apache.spark.sql.catalyst.analysis.SqlApiAnalysis import org.apache.spark.sql.catalyst.parser.DataTypeParser @@ -340,17 +340,8 @@ object DataType { fields.collect { case (fieldPath, JString(collation)) => collation.split("\\.", 2) match { case Array(provider: String, collationName: String) => - try { - CollationFactory.assertValidProvider(provider) - fieldPath -> collationName - } catch { - case e: SparkException - if e.getCondition == "COLLATION_INVALID_PROVIDER" && - SqlApiConf.get.allowReadingUnknownCollations => - // If the collation provider is unknown and the config for reading such - // collations is enabled, return the UTF8_BINARY collation. - fieldPath -> "UTF8_BINARY" - } + CollationFactory.assertValidProvider(provider) + fieldPath -> collationName } }.toMap @@ -359,16 +350,7 @@ object DataType { } private def stringTypeWithCollation(collationName: String): StringType = { - try { - StringType(CollationFactory.collationNameToId(collationName)) - } catch { - case e: SparkException - if e.getCondition == "COLLATION_INVALID_NAME" && - SqlApiConf.get.allowReadingUnknownCollations => - // If the collation name is unknown and the config for reading such collations is enabled, - // return the UTF8_BINARY collation. - StringType(CollationFactory.UTF8_BINARY_COLLATION_ID) - } + StringType(CollationFactory.collationNameToId(collationName)) } protected[types] def buildFormattedString( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 123759c6c8b80..ba0a37541e490 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -778,15 +778,6 @@ object SQLConf { .booleanConf .createWithDefault(Utils.isTesting) - val ALLOW_READING_UNKNOWN_COLLATIONS = - buildConf(SqlApiConfHelper.ALLOW_READING_UNKNOWN_COLLATIONS) - .internal() - .doc("Enables spark to read unknown collation name as UTF8_BINARY. If the config is " + - "not enabled, when spark encounters an unknown collation name, it will throw an error.") - .version("4.0.0") - .booleanConf - .createWithDefault(false) - val DEFAULT_COLLATION = buildConf(SqlApiConfHelper.DEFAULT_COLLATION) .doc("Sets default collation to use for string literals, parameter markers or the string" + @@ -5582,8 +5573,6 @@ class SQLConf extends Serializable with Logging with SqlApiConf { } } - override def allowReadingUnknownCollations: Boolean = getConf(ALLOW_READING_UNKNOWN_COLLATIONS) - def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED) def adaptiveExecutionLogLevel: String = getConf(ADAPTIVE_EXECUTION_LOG_LEVEL) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index d5fc4d87bb6ad..f6d8f2a66e202 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -23,13 +23,11 @@ import org.json4s.jackson.JsonMethods import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException} import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util.{CollationFactory, StringConcat} -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearMonthIntervalTypes} -class DataTypeSuite extends SparkFunSuite with SQLHelper { +class DataTypeSuite extends SparkFunSuite { private val UNICODE_COLLATION_ID = CollationFactory.collationNameToId("UNICODE") @@ -878,90 +876,6 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper { } } - test("string field with invalid collation name") { - val collationProviders = Seq("spark", "icu") - collationProviders.foreach { provider => - val json = - s""" - |{ - | "type": "struct", - | "fields": [ - | { - | "name": "c1", - | "type": "string", - | "nullable": false, - | "metadata": { - | "${DataType.COLLATIONS_METADATA_KEY}": { - | "c1": "$provider.INVALID" - | } - | } - | } - | ] - |} - |""".stripMargin - - // Check that the exception will be thrown in case of invalid collation name and - // UNKNOWN_COLLATION_NAME config not enabled. - checkError( - exception = intercept[SparkException] { - DataType.fromJson(json) - }, - condition = "COLLATION_INVALID_NAME", - parameters = Map( - "proposals" -> "id", - "collationName" -> "INVALID")) - - // Check that the exception will not be thrown in case of invalid collation name and - // UNKNOWN_COLLATION_NAME enabled, but UTF8_BINARY collation will be returned. - withSQLConf(SQLConf.ALLOW_READING_UNKNOWN_COLLATIONS.key -> "true") { - val dataType = DataType.fromJson(json) - assert(dataType === StructType( - StructField("c1", StringType(CollationFactory.UTF8_BINARY_COLLATION_ID), false) :: Nil)) - } - } - } - - test("string field with invalid collation provider") { - val json = - s""" - |{ - | "type": "struct", - | "fields": [ - | { - | "name": "c1", - | "type": "string", - | "nullable": false, - | "metadata": { - | "${DataType.COLLATIONS_METADATA_KEY}": { - | "c1": "INVALID.INVALID" - | } - | } - | } - | ] - |} - |""".stripMargin - - - // Check that the exception will be thrown in case of invalid collation name and - // UNKNOWN_COLLATION_NAME config not enabled. - checkError( - exception = intercept[SparkException] { - DataType.fromJson(json) - }, - condition = "COLLATION_INVALID_PROVIDER", - parameters = Map( - "supportedProviders" -> "spark, icu", - "provider" -> "INVALID")) - - // Check that the exception will not be thrown in case of invalid collation name and - // UNKNOWN_COLLATION_NAME enabled, but UTF8_BINARY collation will be returned. - withSQLConf(SQLConf.ALLOW_READING_UNKNOWN_COLLATIONS.key -> "true") { - val dataType = DataType.fromJson(json) - assert(dataType === StructType( - StructField("c1", StringType(CollationFactory.UTF8_BINARY_COLLATION_ID), false) :: Nil)) - } - } - test("non string field has collation metadata") { val json = s""" @@ -1109,42 +1023,6 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper { assert(parsedWithCollations === ArrayType(StringType(unicodeCollationId))) } - test("parse array type with invalid collation metadata") { - val utf8BinaryCollationId = CollationFactory.UTF8_BINARY_COLLATION_ID - val arrayJson = - s""" - |{ - | "type": "array", - | "elementType": "string", - | "containsNull": true - |} - |""".stripMargin - - val collationsMap = Map("element" -> "INVALID") - - // Parse without collations map - assert(DataType.parseDataType(JsonMethods.parse(arrayJson)) === ArrayType(StringType)) - - // Check that the exception will be thrown in case of invalid collation name and - // UNKNOWN_COLLATION_NAME config not enabled. - checkError( - exception = intercept[SparkException] { - DataType.parseDataType(JsonMethods.parse(arrayJson), collationsMap = collationsMap) - }, - condition = "COLLATION_INVALID_NAME", - parameters = Map( - "proposals" -> "id", - "collationName" -> "INVALID")) - - // Check that the exception will not be thrown in case of invalid collation name and - // UNKNOWN_COLLATION_NAME enabled, but UTF8_BINARY collation will be returned. - withSQLConf(SQLConf.ALLOW_READING_UNKNOWN_COLLATIONS.key -> "true") { - val dataType = DataType.parseDataType( - JsonMethods.parse(arrayJson), collationsMap = collationsMap) - assert(dataType === ArrayType(StringType(utf8BinaryCollationId))) - } - } - test("parse map type with collation metadata") { val unicodeCollationId = CollationFactory.collationNameToId("UNICODE") val mapJson = @@ -1168,44 +1046,6 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper { MapType(StringType(unicodeCollationId), StringType(unicodeCollationId))) } - test("parse map type with invalid collation metadata") { - val utf8BinaryCollationId = CollationFactory.UTF8_BINARY_COLLATION_ID - val mapJson = - s""" - |{ - | "type": "map", - | "keyType": "string", - | "valueType": "string", - | "valueContainsNull": true - |} - |""".stripMargin - - val collationsMap = Map("key" -> "INVALID", "value" -> "INVALID") - - // Parse without collations map - assert(DataType.parseDataType(JsonMethods.parse(mapJson)) === MapType(StringType, StringType)) - - // Check that the exception will be thrown in case of invalid collation name and - // UNKNOWN_COLLATION_NAME config not enabled. - checkError( - exception = intercept[SparkException] { - DataType.parseDataType(JsonMethods.parse(mapJson), collationsMap = collationsMap) - }, - condition = "COLLATION_INVALID_NAME", - parameters = Map( - "proposals" -> "id", - "collationName" -> "INVALID")) - - // Check that the exception will not be thrown in case of invalid collation name and - // UNKNOWN_COLLATION_NAME enabled, but UTF8_BINARY collation will be returned. - withSQLConf(SQLConf.ALLOW_READING_UNKNOWN_COLLATIONS.key -> "true") { - val dataType = DataType.parseDataType( - JsonMethods.parse(mapJson), collationsMap = collationsMap) - assert(dataType === MapType( - StringType(utf8BinaryCollationId), StringType(utf8BinaryCollationId))) - } - } - test("SPARK-48680: Add CharType and VarcharType to DataTypes JAVA API") { assert(DataTypes.createCharType(1) === CharType(1)) assert(DataTypes.createVarcharType(100) === VarcharType(100))