src/resources/sparkConf.properties

# Origin cluster credentials (use "host + port" OR "secure-connect-bundle" but not both)
spark.origin.host                                 localhost
spark.origin.port                                 9042
#spark.origin.scb                                 file:///aaa/bbb/secure-connect-enterprise.zip
spark.origin.username                             some-username
spark.origin.password                             some-secret-password
spark.origin.keyspaceTable                        test.a1

# Target cluster credentials (use "host + port" OR "secure-connect-bundle" but not both)
#spark.target.host                                localhost
#spark.target.port                                9042
spark.target.scb                                  file:///aaa/bbb/secure-connect-enterprise.zip
spark.target.username                             client-id
spark.target.password                             client-secret
spark.target.keyspaceTable                        test.a2

# Add 'missing' rows (during 'Validation') in 'Target' from 'Origin'. N/A for 'Migration'
spark.target.autocorrect.missing                  false
# Update 'mismatched' rows (during 'Validation') in 'Target' to match 'Origin'. N/A for 'Migration'
spark.target.autocorrect.mismatch                 false

# Read & Write rate-limits(rows/second). Higher value will improve performance and put more load on cluster
spark.readRateLimit                               20000
spark.writeRateLimit                              20000

# Used to split Cassandra token-range into slices and migrate random slices one at a time
# 10K splits usually works for tables up to 100GB (uncompressed) with balanced token distribution
# For larger tables, test on 1% volume (using param coveragePercent) and increase the number-of-splits as needed
spark.numSplits                                   10000

# Use a value of 1 (disable batching) when primary-key and partition-key are same
# For tables with high avg count of rows/partition, use higher value to improve performance
spark.batchSize                                   10

# Below 'query' properties are set based on table schema
spark.query.origin                                comma-separated-partition-key,comma-separated-clustering-key,comma-separated-other-columns
spark.query.origin.partitionKey                   comma-separated-partition-key
spark.query.target.id                             comma-separated-partition-key,comma-separated-clustering-key
# Comma separated numeric data-type mapping (e.g. 'text' will map to '0') for all columns listed in "spark.query.origin"
spark.query.types                                 9,1,4,3
#############################################################################################################
# Following are the supported data types and their corresponding [Cassandra data-types] mapping
#  0: ascii, text, varchar
#  1: int
#  2: bigint, counter
#  3: double
#  4: timestamp
#  5: map (separate type by %) - Example: 5%1%0 for map<int, text>
#  6: list (separate type by %) - Example: 6%0 for list<text>
#  7: blob
#  8: set (separate type by %) - Example: 8%0 for set<text>
#  9: uuid, timeuuid
# 10: boolean
# 11: tuple
# 12: float
# 13: tinyint
# 14: decimal
# 15: date
# 16: UDT [any user-defined-type created using 'CREATE TYPE']
# 17: varint
# 18: time
# 19: smallint
# Note: Ignore "Frozen" while mapping Collections (Map/List/Set) - Example: 5%1%0 for frozen<map<int, text>>
#############################################################################################################

# ENABLE ONLY IF COLUMN NAMES ON TARGET IS DIFFERENT FROM ORIGIN (SCHEMA & DATA-TYPES MUST BE SAME)
#spark.query.target                                comma-separated-partition-key,comma-separated-clustering-key,comma-separated-other-columns

# The tool adds TTL & Writetime at row-level (not field-level).
# The largest TTL & Writetime values are used if multiple indexes are listed (comma separated)
# Comma separated column indexes from "spark.query.origin" used to find largest TTL or Writetime
spark.query.ttl.cols                              2,3
spark.query.writetime.cols                        2,3

# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE ROWS BASED ON CQL FILTER
#spark.query.condition

# ENABLE ONLY IF IT IS A COUNTER TABLE
#spark.counterTable                                false
#spark.counterTable.cql
#spark.counterTable.cql.index                      0

# ENABLE ONLY IF YOU WANT TO FILTER BASED ON WRITE-TIME (values must be in microseconds)
#spark.origin.writeTimeStampFilter                 false
#spark.origin.minWriteTimeStampFilter              0
#spark.origin.maxWriteTimeStampFilter              4102444800000000

# ENABLE ONLY IF retries needed (Retry a slice of token-range if an exception occurs)
#spark.maxRetries                                  0

# ENABLE ONLY IF YOU WANT TO MIGRATE/VALIDATE SOME % OF ROWS (NOT 100%)
#spark.coveragePercent                             100

# ENABLE ONLY IF WANT LOG STATS MORE OR LESS FREQUENTLY THAN DEFAULT
#spark.printStatsAfter                             100000

# ENABLE ONLY IF YOU WANT TO USE READ AND/OR WRITE CONSISTENCY OTHER THAN LOCAL_QUORUM
#spark.consistency.read                            LOCAL_QUORUM
#spark.consistency.write                           LOCAL_QUORUM

# ENABLE ONLY IF YOU WANT TO REDUCE FETCH-SIZE TO AVOID FrameTooLongException
#spark.read.fetch.sizeInRows                       1000

# ENABLE ONLY IF YOU WANT TO USE CUSTOM FIXED WRITETIME VALUE ON TARGET
#spark.target.custom.writeTime                     0

# ENABLE ONLY TO SKIP recs greater than 10MB from Origin (to avoid Astra Guardrail error)
#spark.fieldGuardraillimitMB                       10

# ENABLE ONLY TO count of recs greater than 10MB from Origin needed
#spark.origin.checkTableforColSize                 false
#spark.origin.checkTableforColSize.cols            partition-key,clustering-key
#spark.origin.checkTableforColSize.cols.types      9,1

# ENABLE ONLY TO filter data from Origin
#spark.origin.FilterData                           false
#spark.origin.FilterColumn                         test
#spark.origin.FilterColumnIndex                    2
#spark.origin.FilterColumnType                     6%16
#spark.origin.FilterColumnValue                    test

# ONLY USE if SSL is enabled on origin Cassandra/DSE (e.g. Azure Cosmos Cassandra DB)
#spark.origin.ssl.enabled                          true

# ONLY USE if SSL clientAuth is enabled on origin Cassandra/DSE
#spark.origin.trustStore.path
#spark.origin.trustStore.password
#spark.origin.trustStore.type                     JKS
#spark.origin.keyStore.path
#spark.origin.keyStore.password
#spark.origin.enabledAlgorithms                   TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA

# ONLY USE if SSL is enabled on target Cassandra/DSE
#spark.target.ssl.enabled                          true

# ONLY USE if SSL clientAuth is enabled on target Cassandra/DSE
#spark.target.trustStore.path
#spark.target.trustStore.password
#spark.target.trustStore.type                     JKS
#spark.target.keyStore.path
#spark.target.keyStore.password
#spark.target.enabledAlgorithms                   TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA