From 913b9487179884849449f6403794a4ad30839684 Mon Sep 17 00:00:00 2001 From: beajohnson Date: Tue, 23 Apr 2024 12:12:31 -0700 Subject: [PATCH] dsbulk set up --- modules/ROOT/nav.adoc | 5 + modules/ROOT/pages/bulk-command-line.adoc | 468 ++++++++++++++++++ modules/ROOT/pages/bulk-migrate.adoc | 64 +++ modules/ROOT/pages/dsbulk-examples.adoc | 89 ++++ modules/ROOT/pages/dsbulk-migrator.adoc | 20 +- modules/ROOT/pages/dsbulk-prereqs.adoc | 10 + .../ROOT/pages/migrate-and-validate-data.adoc | 4 +- 7 files changed, 648 insertions(+), 12 deletions(-) create mode 100644 modules/ROOT/pages/bulk-command-line.adoc create mode 100644 modules/ROOT/pages/bulk-migrate.adoc create mode 100644 modules/ROOT/pages/dsbulk-examples.adoc create mode 100644 modules/ROOT/pages/dsbulk-prereqs.adoc diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index f6cb01fd..e99ee75e 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -24,6 +24,11 @@ * xref:change-read-routing.adoc[] //phase 5 * xref:connect-clients-to-target.adoc[] +* DSBulk Migrator +** xref:dsbulk-prereqs.adoc[] +** xref:bulk-migrate.adoc[] +** xref:bulk-command-line.adoc[] +** xref:dsbulk-examples.adoc[] * Troubleshooting ** xref:troubleshooting.adoc[] ** xref:troubleshooting-tips.adoc[] diff --git a/modules/ROOT/pages/bulk-command-line.adoc b/modules/ROOT/pages/bulk-command-line.adoc new file mode 100644 index 00000000..4858efe4 --- /dev/null +++ b/modules/ROOT/pages/bulk-command-line.adoc @@ -0,0 +1,468 @@ += Command-line options + +* xref:#dsbulk-live[Live migration command-line options] +* xref:#dsbulk-script[Script generation command-line options] +* xref:#dsbulk-ddl[DDL generation command-line options] +* xref:#getting-help-with-dsbulk-migrator[Getting {dsbulk-migrator} help] +* xref:#dsbulk-examples[{dsbulk-migrator} examples] + + +[[dsbulk-live]] +== Live migration command-line options + +The following options are available for the `migrate-live` command. +Most options have sensible default values and do not need to be specified, unless you want to override the default value. + +[cols="2,8,14"] +|=== + +| `-c` +| `--dsbulk-cmd=CMD` +| The external DSBulk command to use. +Ignored if the embedded DSBulk is being used. +The default is simply `dsbulk`, assuming that the command is available through the `PATH` variable contents. + +| `-d` +| `--data-dir=PATH` +| The directory where data will be exported to and imported from. +The default is a `data` subdirectory in the current working directory. +The data directory will be created if it does not exist. +Tables will be exported and imported in subdirectories of the data directory specified here. +There will be one subdirectory per keyspace in the data directory, then one subdirectory per table in each keyspace directory. + +| `-e` +| `--dsbulk-use-embedded` +| Use the embedded DSBulk version instead of an external one. +The default is to use an external DSBulk command. + +| +| `--export-bundle=PATH` +| The path to a secure connect bundle to connect to the Origin cluster, if that cluster is a {company} {astra_db} cluster. +Options `--export-host` and `--export-bundle` are mutually exclusive. + +| +| `--export-consistency=CONSISTENCY` +| The consistency level to use when exporting data. +The default is `LOCAL_QUORUM`. + +| +| `--export-dsbulk-option=OPT=VALUE` +| An extra DSBulk option to use when exporting. +Any valid DSBulk option can be specified here, and it will passed as is to the DSBulk process. +DSBulk options, including driver options, must be passed as `--long.option.name=`. +Short options are not supported. + +| +| `--export-host=HOST[:PORT]` +| The host name or IP and, optionally, the port of a node from the Origin cluster. +If the port is not specified, it will default to `9042`. +This option can be specified multiple times. +Options `--export-host` and `--export-bundle` are mutually exclusive. + +| +| `--export-max-concurrent-files=NUM\|AUTO` +| The maximum number of concurrent files to write to. +Must be a positive number or the special value `AUTO`. +The default is `AUTO`. + +| +| `--export-max-concurrent-queries=NUM\|AUTO` +| The maximum number of concurrent queries to execute. +Must be a positive number or the special value `AUTO`. +The default is `AUTO`. + +| +| `--export-max-records=NUM` +| The maximum number of records to export for each table. +Must be a positive number or `-1`. +The default is `-1` (export the entire table). + +| +| `--export-password` +| The password to use to authenticate against the Origin cluster. +Options `--export-username` and `--export-password` must be provided together, or not at all. +Omit the parameter value to be prompted for the password interactively. + +| +| `--export-splits=NUM\|NC` +| The maximum number of token range queries to generate. +Use the `NC` syntax to specify a multiple of the number of available cores. +For example, `8C` = 8 times the number of available cores. +The default is `8C`. +This is an advanced setting; you should rarely need to modify the default value. + +| +| `--export-username=STRING` +| The username to use to authenticate against the Origin cluster. +Options `--export-username` and `--export-password` must be provided together, or not at all. + +| `-h` +| `--help` +| Displays this help text. + +| +| `--import-bundle=PATH` +| The path to a secure connect bundle to connect to the Target cluster, if it's a {company} {astra_db} cluster. +Options `--import-host` and `--import-bundle` are mutually exclusive. + +| +| `--import-consistency=CONSISTENCY` +| The consistency level to use when importing data. +The default is `LOCAL_QUORUM`. + +| +| `--import-default-timestamp=` +| The default timestamp to use when importing data. +Must be a valid instant in ISO-8601 syntax. +The default is `1970-01-01T00:00:00Z`. + +| +| `--import-dsbulk-option=OPT=VALUE` +| An extra DSBulk option to use when importing. +Any valid DSBulk option can be specified here, and it will passed as is to the DSBulk process. +DSBulk options, including driver options, must be passed as `--long.option.name=`. +Short options are not supported. + +| +| `--import-host=HOST[:PORT]` +| The host name or IP and, optionally, the port of a node from the Target cluster. +If the port is not specified, it will default to `9042`. +This option can be specified multiple times. +Options `--import-host` and `--import-bundle` are mutually exclusive. + +| +| `--import-max-concurrent-files=NUM\|AUTO` +| The maximum number of concurrent files to read from. +Must be a positive number or the special value `AUTO`. +The default is `AUTO`. + +| +| `--import-max-concurrent-queries=NUM\|AUTO` +| The maximum number of concurrent queries to execute. +Must be a positive number or the special value `AUTO`. +The default is `AUTO`. + +| +| `--import-max-errors=NUM` +| The maximum number of failed records to tolerate when importing data. +The default is `1000`. +Failed records will appear in a `load.bad` file in the DSBulk operation directory. + +| +| `--import-password` +| The password to use to authenticate against the Target cluster. +Options `--import-username` and `--import-password` must be provided together, or not at all. +Omit the parameter value to be prompted for the password interactively. + +| +| `--import-username=STRING` +| The username to use to authenticate against the Target cluster. Options `--import-username` and `--import-password` must be provided together, or not at all. + +| `-k` +| `--keyspaces=REGEX` +| A regular expression to select keyspaces to migrate. +The default is to migrate all keyspaces except system keyspaces, DSE-specific keyspaces, and the OpsCenter keyspace. +Case-sensitive keyspace names must be entered in their exact case. + +| `-l` +| `--dsbulk-log-dir=PATH` +| The directory where DSBulk should store its logs. +The default is a `logs` subdirectory in the current working directory. +This subdirectory will be created if it does not exist. +Each DSBulk operation will create a subdirectory in the log directory specified here. + +| +| `--max-concurrent-ops=NUM` +| The maximum number of concurrent operations (exports and imports) to carry. +The default is `1`. +Set this to higher values to allow exports and imports to occur concurrently. +For example, with a value of `2`, each table will be imported as soon as it is exported, while the next table is being exported. + +| +| `--skip-truncate-confirmation` +| Skip truncate confirmation before actually truncating tables. +Only applicable when migrating counter tables, ignored otherwise. + +| `-t` +| `--tables=REGEX` +| A regular expression to select tables to migrate. +The default is to migrate all tables in the keyspaces that were selected for migration with `--keyspaces`. +Case-sensitive table names must be entered in their exact case. + +| +| `--table-types=regular\|counter\|all` +| The table types to migrate. +The default is `all`. + +| +| `--truncate-before-export` +| Truncate tables before the export instead of after. +The default is to truncate after the export. +Only applicable when migrating counter tables, ignored otherwise. + +| `-w` +| `--dsbulk-working-dir=PATH` +| The directory where DSBulk should be executed. +Ignored if the embedded DSBulk is being used. +If unspecified, it defaults to the current working directory. + +|=== + + +[[dsbulk-script]] +== Script generation command-line options + +The following options are available for the `generate-script` command. +Most options have sensible default values and do not need to be specified, unless you want to override the default value. + + +[cols="2,8,14"] +|=== + +| `-c` +| `--dsbulk-cmd=CMD` +| The DSBulk command to use. +The default is simply `dsbulk`, assuming that the command is available through the `PATH` variable contents. + +| `-d` +| `--data-dir=PATH` +| The directory where data will be exported to and imported from. +The default is a `data` subdirectory in the current working directory. +The data directory will be created if it does not exist. + +| +| `--export-bundle=PATH` +| The path to a secure connect bundle to connect to the Origin cluster, if that cluster is a {company} {astra_db} cluster. +Options `--export-host` and `--export-bundle` are mutually exclusive. + +| +| `--export-consistency=CONSISTENCY` +| The consistency level to use when exporting data. +The default is `LOCAL_QUORUM`. + +| +| `--export-dsbulk-option=OPT=VALUE` +| An extra DSBulk option to use when exporting. +Any valid DSBulk option can be specified here, and it will passed as is to the DSBulk process. +DSBulk options, including driver options, must be passed as `--long.option.name=`. +Short options are not supported. + +| +| `--export-host=HOST[:PORT]` +| The host name or IP and, optionally, the port of a node from the Origin cluster. +If the port is not specified, it will default to `9042`. +This option can be specified multiple times. +Options `--export-host` and `--export-bundle` are mutually exclusive. + +| +| `--export-max-concurrent-files=NUM\|AUTO` +| The maximum number of concurrent files to write to. +Must be a positive number or the special value `AUTO`. +The default is `AUTO`. + +| +| `--export-max-concurrent-queries=NUM\|AUTO` +| The maximum number of concurrent queries to execute. +Must be a positive number or the special value `AUTO`. +The default is `AUTO`. + +| +| `--export-max-records=NUM` +| The maximum number of records to export for each table. +Must be a positive number or `-1`. +The default is `-1` (export the entire table). + +| +| `--export-password` +| The password to use to authenticate against the Origin cluster. +Options `--export-username` and `--export-password` must be provided together, or not at all. +Omit the parameter value to be prompted for the password interactively. + +| +| `--export-splits=NUM\|NC` +| The maximum number of token range queries to generate. +Use the `NC` syntax to specify a multiple of the number of available cores. +For example, `8C` = 8 times the number of available cores. +The default is `8C`. +This is an advanced setting. +You should rarely need to modify the default value. + +| +| `--export-username=STRING` +| The username to use to authenticate against the Origin cluster. +Options `--export-username` and `--export-password` must be provided together, or not at all. + +| `-h` +| `--help` +| Displays this help text. + +| +| `--import-bundle=PATH` +| The path to a secure connect bundle to connect to the Target cluster, if it's a {company} {astra_db} cluster. +Options `--import-host` and `--import-bundle` are mutually exclusive. + +| +| `--import-consistency=CONSISTENCY` +| The consistency level to use when importing data. +The default is `LOCAL_QUORUM`. + +| +| `--import-default-timestamp=` +| The default timestamp to use when importing data. +Must be a valid instant in ISO-8601 syntax. +The default is `1970-01-01T00:00:00Z`. + +| +| `--import-dsbulk-option=OPT=VALUE` +| An extra DSBulk option to use when importing. +Any valid DSBulk option can be specified here, and it will passed as is to the DSBulk process. +DSBulk options, including driver options, must be passed as `--long.option.name=`. +Short options are not supported. + +| +| `--import-host=HOST[:PORT]` +| The host name or IP and, optionally, the port of a node from the Target cluster. +If the port is not specified, it will default to `9042`. +This option can be specified multiple times. +Options `--import-host` and `--import-bundle` are mutually exclusive. + +| +| `--import-max-concurrent-files=NUM\|AUTO` +| The maximum number of concurrent files to read from. +Must be a positive number or the special value `AUTO`. +The default is `AUTO`. + +| +| `--import-max-concurrent-queries=NUM\|AUTO` +| The maximum number of concurrent queries to execute. +Must be a positive number or the special value `AUTO`. +The default is `AUTO`. + +| +| `--import-max-errors=NUM` +| The maximum number of failed records to tolerate when importing data. +The default is `1000`. +Failed records will appear in a `load.bad` file in the DSBulk operation directory. + +| +| `--import-password` +| The password to use to authenticate against the Target cluster. +Options `--import-username` and `--import-password` must be provided together, or not at all. +Omit the parameter value to be prompted for the password interactively. + +| +| `--import-username=STRING` +| The username to use to authenticate against the Target cluster. +Options `--import-username` and `--import-password` must be provided together, or not at all. + +| `-k` +| `--keyspaces=REGEX` +| A regular expression to select keyspaces to migrate. +The default is to migrate all keyspaces except system keyspaces, DSE-specific keyspaces, and the OpsCenter keyspace. +Case-sensitive keyspace names must be entered in their exact case. + +| `-l` +| `--dsbulk-log-dir=PATH` +| The directory where DSBulk should store its logs. +The default is a `logs` subdirectory in the current working directory. +This subdirectory will be created if it does not exist. +Each DSBulk operation will create a subdirectory in the log directory specified here. + +| `-t` +| `--tables=REGEX` +| A regular expression to select tables to migrate. +The default is to migrate all tables in the keyspaces that were selected for migration with `--keyspaces`. +Case-sensitive table names must be entered in their exact case. + +| +| `--table-types=regular\|counter\|all` +| The table types to migrate. The default is `all`. + +|=== + + + +[[dsbulk-ddl]] +== DDL generation command-line options + +The following options are available for the `generate-ddl` command. +Most options have sensible default values and do not need to be specified, unless you want to override the default value. + +[cols="2,8,14"] +|=== + +| `-a` +| `--optimize-for-astra` +| Produce CQL scripts optimized for {company} {astra_db}. +{astra_db} does not allow some options in DDL statements. +Using this {dsbulk-migrator} command option, forbidden {astra_db} options will be omitted from the generated CQL files. + +| `-d` +| `--data-dir=PATH` +| The directory where data will be exported to and imported from. +The default is a `data` subdirectory in the current working directory. +The data directory will be created if it does not exist. + +| +| `--export-bundle=PATH` +| The path to a secure connect bundle to connect to the Origin cluster, if that cluster is a {company} {astra_db} cluster. +Options `--export-host` and `--export-bundle` are mutually exclusive. + +| +| `--export-host=HOST[:PORT]` +| The host name or IP and, optionally, the port of a node from the Origin cluster. +If the port is not specified, it will default to `9042`. +This option can be specified multiple times. +Options `--export-host` and `--export-bundle` are mutually exclusive. + +| +| `--export-password` +| The password to use to authenticate against the Origin cluster. +Options `--export-username` and `--export-password` must be provided together, or not at all. +Omit the parameter value to be prompted for the password interactively. + +| +| `--export-username=STRING` +| The username to use to authenticate against the Origin cluster. +Options `--export-username` and `--export-password` must be provided together, or not at all. + +| `-h` +| `--help` +| Displays this help text. + +| `-k` +| `--keyspaces=REGEX` +| A regular expression to select keyspaces to migrate. +The default is to migrate all keyspaces except system keyspaces, DSE-specific keyspaces, and the OpsCenter keyspace. +Case-sensitive keyspace names must be entered in their exact case. + +| `-t` +| `--tables=REGEX` +| A regular expression to select tables to migrate. +The default is to migrate all tables in the keyspaces that were selected for migration with `--keyspaces`. +Case-sensitive table names must be entered in their exact case. + +| +| `--table-types=regular\|counter\|all` +| The table types to migrate. +The default is `all`. + +|=== + + +[[getting-help-with-dsbulk-migrator]] +== Getting help with {dsbulk-migrator} + +Use the following command to display the available {dsbulk-migrator} commands: + +[source,bash] +---- +java -jar /path/to/dsbulk-migrator-embedded-dsbulk.jar --help +---- + +For individual command help and each one's options: + +[source,bash] +---- +java -jar /path/to/dsbulk-migrator-embedded-dsbulk.jar COMMAND --help +---- \ No newline at end of file diff --git a/modules/ROOT/pages/bulk-migrate.adoc b/modules/ROOT/pages/bulk-migrate.adoc new file mode 100644 index 00000000..75f4989f --- /dev/null +++ b/modules/ROOT/pages/bulk-migrate.adoc @@ -0,0 +1,64 @@ += Migrate data + +With xref:dsbulk-prereqs.adoc[prerequisites installed], it's time to build, test, and run the DSBulk Migrator. + +Building {dsbulk-migrator} is accomplished with Maven. First, clone the git repo to your local machine. +Example: + +[source,bash] +---- +cd ~/github +git clone git@github.com:datastax/dsbulk-migrator.git +cd dsbulk-migrator +---- + +Then run: + +[source,bash] +---- +mvn clean package +---- + +The build produces two distributable fat jars: + +* `dsbulk-migrator--embedded-driver.jar` : contains an embedded Java driver; suitable for live migrations using an external DSBulk or for script generation. +This jar is NOT suitable for live migrations using an embedded DSBulk since no DSBulk classes are present. +* `dsbulk-migrator--embedded-dsbulk.jar`: contains an embedded DSBulk and an embedded Java driver and is suitable for all operations. +This jar is much bigger than the previous one due to the presence of DSBulk classes. + +[[testing-dsbulk-migrator]] +== Testing {dsbulk-migrator} + +The project contains a few integration tests. +Run them with: + +[source,bash] +---- +mvn clean verify +---- + +[NOTE] +==== +The integration tests require https://github.com/datastax/simulacron[Simulacron]. +Be sure to meet all the https://github.com/datastax/simulacron#prerequisites[Simulacron prerequisites] before running the +tests. +==== + +[[running-dsbulk-migrator]] +== Running {dsbulk-migrator} + +Launch the {dsbulk-migrator} tool: + +[source,bash] +---- +java -jar /path/to/dsbulk-migrator.jar { migrate-live | generate-script | generate-ddl } [OPTIONS] +---- + +In a live migration, the options effectively configure DSBulk and to connect to the clusters. + +When generating a migration script, most options serve as default values in the generated scripts. +Even when generating scripts, this tool still needs to access the Origin cluster to gather metadata about the tables to migrate. + +When generating a DDL file, only a few options are meaningful. +Because standard DSBulk is not used, and the import cluster is never contacted, import options and DSBulk-related options are ignored. +The tool still needs to access the Origin cluster to gather metadata about the keyspaces and tables for which to generate DDL statements. diff --git a/modules/ROOT/pages/dsbulk-examples.adoc b/modules/ROOT/pages/dsbulk-examples.adoc new file mode 100644 index 00000000..d4990b91 --- /dev/null +++ b/modules/ROOT/pages/dsbulk-examples.adoc @@ -0,0 +1,89 @@ += Case examples + +[NOTE] +==== +These examples show sample `username` and `password` values that are for demonstration purposes only. +Do not use these values in your environment. +==== + +== Generate migration script + +Generate a migration script to migrate from an existing Origin cluster to a Target {astra_db} cluster: + +[source,bash] +---- + java -jar target/dsbulk-migrator--embedded-driver.jar migrate-live \ + --data-dir=/path/to/data/dir \ + --dsbulk-cmd=${DSBULK_ROOT}/bin/dsbulk \ + --dsbulk-log-dir=/path/to/log/dir \ + --export-host=my-origin-cluster.com \ + --export-username=user1 \ + --export-password=s3cr3t \ + --import-bundle=/path/to/bundle \ + --import-username=user1 \ + --import-password=s3cr3t +---- + +== Migrate live using external DSBulk install + +Migrate live from an existing Origin cluster to a Target {astra_db} cluster using an external DSBulk installation. +Passwords are prompted interactively: + +[source,bash] +---- + java -jar target/dsbulk-migrator--embedded-driver.jar migrate-live \ + --data-dir=/path/to/data/dir \ + --dsbulk-cmd=${DSBULK_ROOT}/bin/dsbulk \ + --dsbulk-log-dir=/path/to/log/dir \ + --export-host=my-origin-cluster.com \ + --export-username=user1 \ + --export-password # password will be prompted \ + --import-bundle=/path/to/bundle \ + --import-username=user1 \ + --import-password # password will be prompted +---- + +== Migrate live using embedded DSBulk install + +Migrate live from an existing Origin cluster to a Target {astra_db} cluster using the embedded DSBulk installation. +Passwords are prompted interactively. +In this example, additional DSBulk options are passed. + +[NOTE] +==== +You must use the `dsbulk-migrator--embedded-dsbulk.jar` fat jar here. +Otherwise, an error is raised because no embedded DSBulk can be found. +==== + +[source,bash] +---- + java -jar target/dsbulk-migrator--embedded-dsbulk.jar migrate-live \ + --data-dir=/path/to/data/dir \ + --dsbulk-use-embedded \ + --dsbulk-log-dir=/path/to/log/dir \ + --export-host=my-origin-cluster.com \ + --export-username=user1 \ + --export-password # password will be prompted \ + --export-dsbulk-option "--connector.csv.maxCharsPerColumn=65536" \ + --export-dsbulk-option "--executor.maxPerSecond=1000" \ + --import-bundle=/path/to/bundle \ + --import-username=user1 \ + --import-password # password will be prompted \ + --import-dsbulk-option "--connector.csv.maxCharsPerColumn=65536" \ + --import-dsbulk-option "--executor.maxPerSecond=1000" +---- + + +== Generate DDL to recreate Origin schema in Target + +Generate DDL files to recreate the Origin schema in a Target {astra_db} cluster: + +[source,bash] +---- + java -jar target/dsbulk-migrator--embedded-driver.jar generate-ddl \ + --data-dir=/path/to/data/dir \ + --export-host=my-origin-cluster.com \ + --export-username=user1 \ + --export-password=s3cr3t \ + --optimize-for-astra +---- diff --git a/modules/ROOT/pages/dsbulk-migrator.adoc b/modules/ROOT/pages/dsbulk-migrator.adoc index b3406605..495da716 100644 --- a/modules/ROOT/pages/dsbulk-migrator.adoc +++ b/modules/ROOT/pages/dsbulk-migrator.adoc @@ -62,16 +62,15 @@ Launch the {dsbulk-migrator} tool: java -jar /path/to/dsbulk-migrator.jar { migrate-live | generate-script | generate-ddl } [OPTIONS] ---- -When doing a live migration, the options are used to effectively configure DSBulk and to connect to +In live migration, the options effectively configure DSBulk and to connect to the clusters. When generating a migration script, most options serve as default values in the generated scripts. -Note however that, even when generating scripts, this tool still needs to access the Origin cluster -in order to gather metadata about the tables to migrate. +Even when generating scripts, this tool still needs to access the Origin cluster to gather metadata about the tables to migrate. When generating a DDL file, only a few options are meaningful. Because standard DSBulk is not used, and the import cluster is never contacted, import options and DSBulk-related options are ignored. -The tool still needs to access the Origin cluster in order to gather metadata about the keyspaces and tables for which to generate DDL statements. +The tool still needs to access the Origin cluster to gather metadata about the keyspaces and tables for which to generate DDL statements. [[dsbulk-migrator-reference]] == {dsbulk-migrator} reference @@ -595,6 +594,12 @@ Migrate live from an existing Origin cluster to a Target {astra_db} cluster usin Passwords will be prompted interactively. In this example, additional DSBulk options are passed. +[NOTE] +==== +In this example, you must use the `dsbulk-migrator--embedded-dsbulk.jar` fat jar. +Otherwise, an error is raised because no embedded DSBulk can be found. +==== + [source,bash] ---- java -jar target/dsbulk-migrator--embedded-dsbulk.jar migrate-live \ @@ -613,11 +618,6 @@ In this example, additional DSBulk options are passed. --import-dsbulk-option "--executor.maxPerSecond=1000" ---- -[NOTE] -==== -In the example above, you must use the `dsbulk-migrator--embedded-dsbulk.jar` fat jar. -Otherwise, an error will be raised because no embedded DSBulk can be found. -==== === Generate DDL to recreate Origin schema in Target @@ -631,4 +631,4 @@ Generate DDL files to recreate the Origin schema in a Target {astra_db} cluster: --export-username=user1 \ --export-password=s3cr3t \ --optimize-for-astra ----- +---- \ No newline at end of file diff --git a/modules/ROOT/pages/dsbulk-prereqs.adoc b/modules/ROOT/pages/dsbulk-prereqs.adoc new file mode 100644 index 00000000..594a4356 --- /dev/null +++ b/modules/ROOT/pages/dsbulk-prereqs.adoc @@ -0,0 +1,10 @@ += Prerequisites + +Use {dsbulk-migrator} to perform simple migration of smaller data quantities, where data validation (other than post-migration row counts) is not necessary. Enure you have the prerequisites to + +* Install or switch to Java 11. +* Install https://maven.apache.org/download.cgi[Maven] 3.9.x. +* Optionally install https://docs.datastax.com/en/dsbulk/docs/installing/install.html[DSBulk Loader, window="_blank"], if you elect to reference your own external installation of DSBulk, instead of the embedded DSBulk that's in {dsbulk-migrator}. +* Install https://github.com/datastax/simulacron#prerequisites[Simulacron] 0.12.x and its prerequisites, for integration tests. + +With prerequisites installed, you can begin to xref:bulk-migrate.adoc[migrate data], use the xref:bulk-command-line.adoc[command line options], or use the xref:dsbulk-examples.adoc[case examples]! \ No newline at end of file diff --git a/modules/ROOT/pages/migrate-and-validate-data.adoc b/modules/ROOT/pages/migrate-and-validate-data.adoc index 558cf889..7e339965 100644 --- a/modules/ROOT/pages/migrate-and-validate-data.adoc +++ b/modules/ROOT/pages/migrate-and-validate-data.adoc @@ -8,7 +8,7 @@ This topic introduces two open-source data migration tools that you can use duri For full details, see these topics: * xref:cassandra-data-migrator.adoc[{cstar-data-migrator}] -* xref:dsbulk-migrator.adoc[{dsbulk-migrator}] +* xref:dsbulk-prereqs.adoc[{dsbulk-migrator}] These tools provide sophisticated features that help you migrate your data from any Cassandra **Origin** (Apache Cassandra®, {company} Enterprise (DSE), {company} {astra_db}) to any Cassandra **Target** (Apache Cassandra, DSE, {company} {astra_db}). @@ -87,4 +87,4 @@ For extensive usage and reference details, see xref:cassandra-data-migrator.adoc * `generate-ddl` reads the schema from Origin and generates CQL files to recreate it in an {astra_db} cluster used as Target. -For extensive usage and reference details, see xref:dsbulk-migrator.adoc[{dsbulk-migrator}]. +For extensive usage and reference details, see xref:bulk-command-line.adoc[{dsbulk-migrator} command-line options].