Skip to content

Commit

Permalink
Add in method for API to generate all combinations at step level
Browse files Browse the repository at this point in the history
  • Loading branch information
pflooky committed Sep 12, 2024
1 parent 8d1973f commit de87220
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 8 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.github.datacatering.datacaterer.api.connection

import io.github.datacatering.datacaterer.api.model.Constants.{ENABLE_DATA_VALIDATION, FORMAT}
import io.github.datacatering.datacaterer.api.model.Constants.{ALL_COMBINATIONS, ENABLE_DATA_VALIDATION, FORMAT}
import io.github.datacatering.datacaterer.api.{ConnectionConfigWithTaskBuilder, CountBuilder, FieldBuilder, GeneratorBuilder, MetadataSourceBuilder, SchemaBuilder, StepBuilder, TaskBuilder, TasksBuilder, ValidationBuilder, WaitConditionBuilder}
import io.github.datacatering.datacaterer.api.model.{Step, Task}

Expand Down Expand Up @@ -56,6 +56,11 @@ trait ConnectionTaskBuilder[T] {
this
}

def allCombinations(enable: Boolean): ConnectionTaskBuilder[T] = {
this.step = Some(getStep.option(ALL_COMBINATIONS, enable.toString))
this
}

def numPartitions(numPartitions: Int): ConnectionTaskBuilder[T] = {
this.step = Some(getStep.numPartitions(numPartitions))
this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ object Constants {
lazy val HTTP_PARAMETER_TYPE = "httpParamType"
lazy val POST_SQL_EXPRESSION = "postSqlExpression"

//step options
lazy val ALL_COMBINATIONS = "allCombinations"

//field labels
lazy val LABEL_NAME = "name"
lazy val LABEL_USERNAME = "username"
Expand Down Expand Up @@ -209,6 +212,7 @@ object Constants {
"spark.sql.cbo.planStats.enabled" -> "true",
"spark.sql.legacy.allowUntypedScalaUDF" -> "true",
"spark.sql.legacy.allowParameterlessCount" -> "true",
"spark.sql.legacy.allowParameterlessCount" -> "true",
"spark.sql.statistics.histogram.enabled" -> "true",
"spark.sql.shuffle.partitions" -> "10",
"spark.sql.catalog.postgres" -> "",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.github.datacatering.datacaterer.api

import io.github.datacatering.datacaterer.api.model.Constants.FOREIGN_KEY_DELIMITER
import io.github.datacatering.datacaterer.api.model.Constants.{ALL_COMBINATIONS, FOREIGN_KEY_DELIMITER}
import io.github.datacatering.datacaterer.api.connection.FileBuilder
import io.github.datacatering.datacaterer.api.model.{DataCatererConfiguration, ExpressionValidation, ForeignKeyRelation, PauseWaitCondition}
import org.junit.runner.RunWith
Expand Down Expand Up @@ -215,4 +215,14 @@ class PlanBuilderTest extends AnyFunSuite {
assert(fk.head._2.isEmpty)
assert(fk.head._3.size == 1)
}

test("Can create a step that will generate records for all combinations") {
val jsonTask = ConnectionConfigWithTaskBuilder().file("my_json", "json")
.allCombinations(true)

assert(jsonTask.step.isDefined)
assert(jsonTask.step.get.step.options.nonEmpty)
assert(jsonTask.step.get.step.options.contains(ALL_COMBINATIONS))
assert(jsonTask.step.get.step.options(ALL_COMBINATIONS).equalsIgnoreCase("true"))
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.github.datacatering.datacaterer.core.generator

import io.github.datacatering.datacaterer.api.model.Constants.{ONE_OF_GENERATOR, SQL_GENERATOR}
import io.github.datacatering.datacaterer.api.model.Constants.{ALL_COMBINATIONS, ONE_OF_GENERATOR, SQL_GENERATOR}
import io.github.datacatering.datacaterer.api.model.{Field, PerColumnCount, Step}
import io.github.datacatering.datacaterer.core.exception.InvalidStepCountGeneratorConfigurationException
import io.github.datacatering.datacaterer.core.generator.provider.DataGenerator
Expand Down Expand Up @@ -36,7 +36,7 @@ class DataGeneratorFactory(faker: Faker)(implicit val sparkSession: SparkSession
}

private def generateDataViaSql(dataGenerators: List[DataGenerator[_]], step: Step, indexedDf: DataFrame): DataFrame = {
val allRecordsDf = if (step.options.contains("allCombinations") && step.options("allCombinations").equalsIgnoreCase("true")) {
val allRecordsDf = if (step.options.contains(ALL_COMBINATIONS) && step.options(ALL_COMBINATIONS).equalsIgnoreCase("true")) {
generateCombinationRecords(dataGenerators, indexedDf)
} else {
val genSqlExpression = dataGenerators.map(dg => s"${dg.generateSqlExpressionWrapper} AS `${dg.structField.name}`")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.github.datacatering.datacaterer.core.generator

import io.github.datacatering.datacaterer.api.model.Constants.{MAXIMUM_LENGTH, MINIMUM_LENGTH, ONE_OF_GENERATOR, RANDOM_GENERATOR, REGEX_GENERATOR, SQL_GENERATOR}
import io.github.datacatering.datacaterer.api.model.Constants.{ALL_COMBINATIONS, MAXIMUM_LENGTH, MINIMUM_LENGTH, ONE_OF_GENERATOR, RANDOM_GENERATOR, REGEX_GENERATOR, SQL_GENERATOR}
import io.github.datacatering.datacaterer.api.model.{Count, Field, Generator, PerColumnCount, Schema, Step}
import io.github.datacatering.datacaterer.core.util.SparkSuite
import net.datafaker.Faker
Expand Down Expand Up @@ -95,7 +95,7 @@ class DataGeneratorFactoryTest extends SparkSuite {

test("Can generate data with all possible oneOf combinations enabled in step") {
val step = Step("transaction", "parquet", Count(),
Map("path" -> "sample/output/parquet/transactions", "allCombinations" -> "true"), schema)
Map("path" -> "sample/output/parquet/transactions", ALL_COMBINATIONS -> "true"), schema)

val df = dataGeneratorFactory.generateDataForStep(step, "parquet", 0, 15)
df.cache()
Expand All @@ -111,7 +111,7 @@ class DataGeneratorFactoryTest extends SparkSuite {
Some(Generator(ONE_OF_GENERATOR, Map("oneOf" -> List("open", "closed", "suspended")))))
val fieldsWithStatus = Some(schema.fields.get ++ List(statusField))
val step = Step("transaction", "parquet", Count(),
Map("path" -> "sample/output/parquet/transactions", "allCombinations" -> "true"), schema.copy(fields = fieldsWithStatus))
Map("path" -> "sample/output/parquet/transactions", ALL_COMBINATIONS -> "true"), schema.copy(fields = fieldsWithStatus))

val df = dataGeneratorFactory.generateDataForStep(step, "parquet", 0, 15)
df.cache()
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
groupId=io.github.data-catering
version=0.11.9
version=0.11.10

scalaVersion=2.12
scalaSpecificVersion=2.12.19
Expand Down

0 comments on commit de87220

Please sign in to comment.