diff --git a/app/src/main/scala/io/github/datacatering/datacaterer/core/generator/BatchDataProcessor.scala b/app/src/main/scala/io/github/datacatering/datacaterer/core/generator/BatchDataProcessor.scala index f2f1e7c..85d14ca 100644 --- a/app/src/main/scala/io/github/datacatering/datacaterer/core/generator/BatchDataProcessor.scala +++ b/app/src/main/scala/io/github/datacatering/datacaterer/core/generator/BatchDataProcessor.scala @@ -68,14 +68,17 @@ class BatchDataProcessor(connectionConfigsByName: Map[String, Map[String, String s"new-num-records=${additionalDf.count()}, actual-num-records=$dfRecordCount") } - while (targetNumRecords != dfRecordCount && retries < maxRetries) { - retries += 1 - generateAdditionalRecords() - } - if (targetNumRecords != dfRecordCount && retries == maxRetries) { - LOGGER.warn("Unable to reach expected number of records due to reaching max retries. " + - s"Can be due to limited number of potential unique records, " + - s"target-num-records=$targetNumRecords, actual-num-records=${dfRecordCount}") + //if random amount of records, don't try to regenerate more records + if (s.count.generator.isEmpty && s.count.perColumn.exists(_.generator.isEmpty)) { + while (targetNumRecords != dfRecordCount && retries < maxRetries) { + retries += 1 + generateAdditionalRecords() + } + if (targetNumRecords != dfRecordCount && retries == maxRetries) { + LOGGER.warn("Unable to reach expected number of records due to reaching max retries. " + + s"Can be due to limited number of potential unique records, " + + s"target-num-records=$targetNumRecords, actual-num-records=${dfRecordCount}") + } } trackRecordsPerStep = trackRecordsPerStep ++ Map(recordStepName -> stepRecords.copy(currentNumRecords = dfRecordCount)) diff --git a/app/src/main/scala/io/github/datacatering/datacaterer/core/util/UniqueFieldsUtil.scala b/app/src/main/scala/io/github/datacatering/datacaterer/core/util/UniqueFieldsUtil.scala index 32be44d..35e2202 100644 --- a/app/src/main/scala/io/github/datacatering/datacaterer/core/util/UniqueFieldsUtil.scala +++ b/app/src/main/scala/io/github/datacatering/datacaterer/core/util/UniqueFieldsUtil.scala @@ -100,8 +100,10 @@ class UniqueFieldsUtil(plan: Plan, executableTasks: List[(TaskSummary, Task)])(i val uniqueKeys = step.gatherUniqueFields val uniqueKeyUf = if (uniqueKeys.nonEmpty) uniqueKeys.map(u => UniqueFields(t._1.dataSourceName, step.name, List(u))) else List() val allKeys = primaryKeyUf ++ uniqueKeyUf - LOGGER.debug(s"Found unique fields that require unique values, " + - s"data-source-name=${t._1.dataSourceName}, step-name=${step.name}, columns=${allKeys.map(_.columns).mkString(",")}") + if (allKeys.nonEmpty) { + LOGGER.debug(s"Found unique fields that require unique values, " + + s"data-source-name=${t._1.dataSourceName}, step-name=${step.name}, columns=${allKeys.map(_.columns).mkString(",")}") + } allKeys }) }) diff --git a/app/src/test/resources/sample/task/postgres/postgres-balance-transaction-task.yaml b/app/src/test/resources/sample/task/postgres/postgres-balance-transaction-task.yaml index 7243efe..1697365 100644 --- a/app/src/test/resources/sample/task/postgres/postgres-balance-transaction-task.yaml +++ b/app/src/test/resources/sample/task/postgres/postgres-balance-transaction-task.yaml @@ -13,7 +13,6 @@ steps: type: "regex" options: regex: "ACC1[0-9]{5,10}" - isUnique: true - name: "create_time" type: "timestamp" - name: "account_status" @@ -39,9 +38,6 @@ steps: schema: fields: - name: "account_number" - generator: - options: - isUnique: true - name: "create_time" type: "timestamp" - name: "transaction_id"