Skip to content

Commit

Permalink
#65 Fix default record id prefix being null
Browse files Browse the repository at this point in the history
  • Loading branch information
Zejnilovic authored Oct 31, 2024
1 parent f9fbb49 commit 0ee16c8
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ package za.co.absa.standardization.config
import za.co.absa.standardization.RecordIdGeneration

trait MetadataColumnsConfig {
val addColumns: Boolean
val prefix: String
val recordIdStrategy: RecordIdGeneration.IdType
def addColumns: Boolean
def prefix: String
def recordIdStrategy: RecordIdGeneration.IdType

val infoDateColumn = prefix + "_info_date"
val infoDateColumnString = s"${infoDateColumn}_string"
val reportDateFormat = "yyyy-MM-dd"
val infoVersionColumn = prefix + "_info_version"
val recordId = prefix + "_record_id"
def reportDateFormat = "yyyy-MM-dd"

def infoDateColumn = prefix + "_info_date"
def infoDateColumnString = s"${infoDateColumn}_string"
def infoVersionColumn = prefix + "_info_version"
def recordId = prefix + "_record_id"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright 2021 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.standardization.config

import org.scalatest.funsuite.AnyFunSuiteLike
import za.co.absa.standardization.RecordIdGeneration
import za.co.absa.standardization.types.CommonTypeDefaults

class MetadataColumnsConfigTest extends AnyFunSuiteLike {

test("Test DefaultStandardizationConfig") {
val conf = DefaultStandardizationConfig
assert(conf.errorColumn == "errCol")
assert(!conf.failOnInputNotPerSchema)
assert(conf.timezone == "UTC")

assert(conf.errorCodes.castError == "E00000")
assert(conf.errorCodes.nullError == "E00002")
assert(conf.errorCodes.typeError == "E00006")
assert(conf.errorCodes.schemaError == "E00007")

assert(conf.metadataColumns.addColumns)
assert(conf.metadataColumns.prefix == "standardization")
assert(conf.metadataColumns.recordIdStrategy == RecordIdGeneration.IdType.TrueUuids)
assert(conf.metadataColumns.reportDateFormat == "yyyy-MM-dd")
assert(conf.metadataColumns.infoDateColumn == "standardization_info_date")
assert(conf.metadataColumns.infoDateColumnString == "standardization_info_date_string")
assert(conf.metadataColumns.infoVersionColumn == "standardization_info_version")
assert(conf.metadataColumns.recordId == "standardization_record_id")

assert(conf.typeDefaults == CommonTypeDefaults)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import za.co.absa.spark.commons.implicits.DataFrameImplicits.DataFrameEnhancemen
import za.co.absa.spark.commons.test.SparkTestBase
import za.co.absa.spark.commons.utils.JsonUtils
import za.co.absa.standardization.RecordIdGeneration.IdType.NoId
import za.co.absa.standardization.config.{BasicMetadataColumnsConfig, BasicStandardizationConfig, ErrorCodesConfig}
import za.co.absa.standardization.config.{BasicMetadataColumnsConfig, BasicStandardizationConfig, DefaultStandardizationConfig, ErrorCodesConfig}
import za.co.absa.standardization.types.{CommonTypeDefaults, TypeDefaults}
import za.co.absa.standardization.udf.UDFLibrary
import za.co.absa.standardization._
Expand Down Expand Up @@ -133,8 +133,9 @@ class StandardizationInterpreterSuite extends AnyFunSuite with SparkTestBase wit
Seq(
StructField("yourRef", StringType, nullable = false))), nullable = false)))

val standardizedDF = Standardization.standardize(orig, schema, stdConfig)
val standardizedDF = Standardization.standardize(orig, schema)

assert(standardizedDF.schema.treeString.contains("standardization_record_id"))
assertResult(exp)(standardizedDF.as[MyWrapperStd].collect().toList)
}

Expand Down Expand Up @@ -170,8 +171,9 @@ class StandardizationInterpreterSuite extends AnyFunSuite with SparkTestBase wit
StructField("errCol",
ArrayType(
ErrorMessage.errorColSchema, containsNull = false)))
.add(StructField("standardization_record_id", StringType, nullable = false))

val standardizedDF = Standardization.standardize(sourceDF, stdExpectedSchema, stdConfig)
val standardizedDF = Standardization.standardize(sourceDF, stdExpectedSchema)

logger.debug(standardizedDF.schema.treeString)
logger.debug(expectedSchema.treeString)
Expand Down

0 comments on commit 0ee16c8

Please sign in to comment.