From 2e14b6fe04b3f54fdf8160bc6c6cfe475c661433 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Thu, 5 Dec 2024 16:23:26 +0800 Subject: [PATCH] ci: add test workflow for docker-based replication (#254) * ci: add test workflow for docker-based replication * doc: add notes to README --- .github/workflows/replication-test.yml | 140 ++++++++++++++++++ README.md | 13 +- devtools/replica-setup-mysql/checker.sh | 6 +- devtools/replica-setup-mysql/replica_setup.sh | 15 +- devtools/replica-setup-mysql/snapshot.sh | 2 +- .../replica-setup-mysql/start_replication.sh | 7 - docker/Dockerfile | 17 ++- docker/entrypoint.sh | 55 +++++-- 8 files changed, 211 insertions(+), 44 deletions(-) create mode 100644 .github/workflows/replication-test.yml diff --git a/.github/workflows/replication-test.yml b/.github/workflows/replication-test.yml new file mode 100644 index 00000000..2caba56a --- /dev/null +++ b/.github/workflows/replication-test.yml @@ -0,0 +1,140 @@ +name: Docker Replica Mode Test + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + test-replication: + runs-on: ubuntu-latest + strategy: + matrix: + source: ['mysql', 'postgres'] + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: | + # Only install DuckDB for data comparison + curl -LJO https://github.com/duckdb/duckdb/releases/latest/download/duckdb_cli-linux-amd64.zip + unzip duckdb_cli-linux-amd64.zip + chmod +x duckdb + sudo mv duckdb /usr/local/bin + + - name: Start source ${{ matrix.source }} database + run: | + if [ "${{ matrix.source }}" = "mysql" ]; then + docker run -d --name source-db -p 3306:3306 \ + -e MYSQL_ROOT_PASSWORD=root \ + -e MYSQL_DATABASE=test \ + mysql:lts + + # Wait for MySQL to be ready + until docker exec source-db mysql -uroot -proot -e "SELECT 1"; do + sleep 1 + done + + # Create test data + docker exec source-db mysql -uroot -proot test -e " + CREATE TABLE items (id INT PRIMARY KEY, name VARCHAR(50)); + INSERT INTO items VALUES (1, 'test1'), (2, 'test2');" + + else + docker run -d --name source-db -p 5432:5432 \ + -e POSTGRES_PASSWORD=postgres \ + -e POSTGRES_DB=test \ + postgres:latest \ + -c wal_level=logical + + # Wait for PostgreSQL to be ready + until docker exec source-db pg_isready; do + sleep 1 + done + + # Create test data + docker exec source-db psql -U postgres test -c " + CREATE TABLE items (id INT PRIMARY KEY, name VARCHAR(50)); + INSERT INTO items VALUES (1, 'test1'), (2, 'test2');" + fi + + - name: Start MyDuck Server in replica mode + run: | + if [ "${{ matrix.source }}" = "mysql" ]; then + SOURCE_DSN="mysql://root:root@host.docker.internal:3306" + else + SOURCE_DSN="postgres://postgres:postgres@host.docker.internal:5432/test" + fi + + docker run -d --name myduck \ + --add-host=host.docker.internal:host-gateway \ + -p 13306:3306 \ + -p 15432:5432 \ + --env=SETUP_MODE=REPLICA \ + --env=SOURCE_DSN="$SOURCE_DSN" \ + apecloud/myduckserver:latest + + # Wait for MyDuck to be ready + sleep 10 + + - name: Verify initial replication + run: | + # Query source data + if [ "${{ matrix.source }}" = "mysql" ]; then + docker exec source-db mysql -uroot -proot test \ + -e "SELECT * FROM items ORDER BY id;" > source_data.csv + else + docker exec source-db psql -U postgres -h 127.0.0.1 test \ + -c "\COPY (SELECT * FROM items ORDER BY id) TO STDOUT WITH CSV;" | tee source_data.csv + fi + + # Query MyDuck data through Postgres interface + docker exec myduck psql -U postgres -h 127.0.0.1 \ + -c "\COPY (SELECT * FROM items ORDER BY id) TO STDOUT WITH CSV;" | tee myduck_data.csv + + # Compare data using DuckDB + duckdb --csv -c " + CREATE TABLE source AS FROM 'source_data.csv'; + CREATE TABLE myduck AS FROM 'myduck_data.csv'; + SELECT COUNT(*) FROM ( + SELECT * FROM source EXCEPT SELECT * FROM myduck + ) diff;" | tail -n 1 | tee diff_count.txt + + # Verify no differences + if grep -q '^0$' diff_count.txt; then + echo 'Initial replication verification successful' + else + echo 'Initial replication verification failed' + exit 1 + fi + + - name: Test replication of new data + run: | + # Insert new data in source + if [ "${{ matrix.source }}" = "mysql" ]; then + docker exec source-db mysql -uroot -proot test \ + -e "INSERT INTO items VALUES (3, 'test3');" + else + docker exec source-db psql -U postgres test \ + -c "INSERT INTO items VALUES (3, 'test3');" + fi + + # Wait for replication + sleep 5 + + # Verify new data was replicated + docker exec myduck psql -t -U postgres -h 127.0.0.1 -c \ + "SELECT COUNT(*) FROM items WHERE id = 3;" | tr -d ' ' | tee count.txt + + if grep -q '^1$' count.txt; then + echo 'Replication of new data verified successfully' + else + echo 'Replication of new data verification failed' + exit 1 + fi + + - name: Cleanup + if: always() + run: | + docker rm -f source-db myduck || true \ No newline at end of file diff --git a/README.md b/README.md index 039a64bb..3ee564c4 100644 --- a/README.md +++ b/README.md @@ -122,15 +122,17 @@ psql -h 127.0.0.1 -p 15432 -U postgres We have integrated a setup tool in the Docker image that helps replicate data from your primary (MySQL|Postgres) server to MyDuck Server. The tool is available via the `SETUP_MODE` environment variable. In `REPLICA` mode, the container will start MyDuck Server, dump a snapshot of your primary (MySQL|Postgres) server, and start replicating data in real-time. +> [!NOTE] +> Supported primary database versions: MySQL>=8.0 and PostgreSQL>=13. In addition to the default settings, +logical replication must be enabled for PostgreSQL by setting `wal_level=logical`. +> For MySQL, GTID-based replication (`gtid_mode=ON` and `enforce_gtid_consistency=ON`) is recommended but not required. + ```bash -docker run \ +docker run -d --name myduck \ -p 13306:3306 \ -p 15432:5432 \ - --privileged \ - --workdir=/home/admin \ --env=SETUP_MODE=REPLICA \ --env=SOURCE_DSN="://:@:/" - --detach=true \ apecloud/myduckserver:latest ``` `SOURCE_DSN` specifies the connection string to the primary database server, which can be either MySQL or PostgreSQL. @@ -141,6 +143,9 @@ docker run \ - **PostgreSQL Primary:** Use the `postgres` URI scheme, e.g., `--env=SOURCE_DSN=postgres://postgres:password@example.com:5432` +> [!NOTE] +> To replicate from a server running on the host machine, use `host.docker.internal` as the hostname instead of `localhost` or `127.0.0.1`. On Linux, you must also add `--add-host=host.docker.internal:host-gateway` to the `docker run` command. + ### Connecting to Cloud MySQL & Postgres MyDuck Server supports setting up replicas from common cloud-based MySQL & Postgres offerings. For more information, please refer to the [replica setup guide](docs/tutorial/replica-setup-rds.md). diff --git a/devtools/replica-setup-mysql/checker.sh b/devtools/replica-setup-mysql/checker.sh index 26f8379f..4a17f1fb 100644 --- a/devtools/replica-setup-mysql/checker.sh +++ b/devtools/replica-setup-mysql/checker.sh @@ -13,7 +13,7 @@ check_server_params() { echo "Checking MySQL server parameters..." # Retrieve the required MySQL server variables using mysqlsh - result=$(mysqlsh --uri="$SOURCE_DSN" $NO_PASSWORD_OPTION --sql -e " + result=$(mysqlsh --uri="$SOURCE_DSN" $SOURCE_NO_PASSWORD_OPTION --sql -e " SHOW VARIABLES WHERE variable_name IN ('binlog_format', 'enforce_gtid_consistency', 'gtid_mode', 'gtid_strict_mode', 'log_bin'); ") @@ -65,7 +65,7 @@ check_user_privileges() { echo "Checking privileges for the current user '$SOURCE_USER'..." # Check the user grants for the currently authenticated user using mysqlsh - result=$(mysqlsh --uri "$SOURCE_DSN" $NO_PASSWORD_OPTION --sql -e " + result=$(mysqlsh --uri "$SOURCE_DSN" $SOURCE_NO_PASSWORD_OPTION --sql -e " SHOW GRANTS FOR CURRENT_USER(); ") @@ -98,7 +98,7 @@ check_mysql_config() { # Function to check if source MySQL server is empty check_if_source_mysql_is_empty() { # Run the query using mysqlsh and capture the output - OUTPUT=$(mysqlsh --uri "$SOURCE_DSN" $NO_PASSWORD_OPTION --sql -e "SHOW DATABASES;" 2>/dev/null) + OUTPUT=$(mysqlsh --uri "$SOURCE_DSN" $SOURCE_NO_PASSWORD_OPTION --sql -e "SHOW DATABASES;" 2>/dev/null) check_command "retrieving database list" diff --git a/devtools/replica-setup-mysql/replica_setup.sh b/devtools/replica-setup-mysql/replica_setup.sh index 503e5670..db055649 100644 --- a/devtools/replica-setup-mysql/replica_setup.sh +++ b/devtools/replica-setup-mysql/replica_setup.sh @@ -1,7 +1,7 @@ #!/bin/bash usage() { - echo "Usage: $0 --mysql_host --mysql_port --mysql_user --mysql_password [--myduck_host ] [--myduck_port ] [--myduck_user ] [--myduck_password ] [--myduck_in_docker ]" + echo "Usage: $0 --mysql_host --mysql_port --mysql_user --mysql_password [--myduck_host ] [--myduck_port ] [--myduck_user ] [--myduck_password ]" exit 1 } @@ -10,7 +10,6 @@ MYDUCK_PORT=${MYDUCK_PORT:-3306} MYDUCK_USER=${MYDUCK_USER:-root} MYDUCK_PASSWORD=${MYDUCK_PASSWORD:-} MYDUCK_SERVER_ID=${MYDUCK_SERVER_ID:-2} -MYDUCK_IN_DOCKER=${MYDUCK_IN_DOCKER:-false} GTID_MODE="ON" while [[ $# -gt 0 ]]; do @@ -51,10 +50,6 @@ while [[ $# -gt 0 ]]; do MYDUCK_SERVER_ID="$2" shift 2 ;; - --myduck_in_docker) - MYDUCK_IN_DOCKER="$2" - shift 2 - ;; *) echo "Unknown parameter: $1" usage @@ -62,11 +57,11 @@ while [[ $# -gt 0 ]]; do esac done -# if MYDUCK_PASSWORD is empty, set NO_PASSWORD_OPTION to "--no-password" -if [[ -z "$MYDUCK_PASSWORD" ]]; then - NO_PASSWORD_OPTION="--no-password" +# if SOURCE_PASSWORD is empty, set SOURCE_NO_PASSWORD_OPTION to "--no-password" +if [[ -z "$SOURCE_PASSWORD" ]]; then + SOURCE_NO_PASSWORD_OPTION="--no-password" else - NO_PASSWORD_OPTION="" + SOURCE_NO_PASSWORD_OPTION="" fi # Check if all parameters are set diff --git a/devtools/replica-setup-mysql/snapshot.sh b/devtools/replica-setup-mysql/snapshot.sh index 4856b9b4..192e4aa1 100644 --- a/devtools/replica-setup-mysql/snapshot.sh +++ b/devtools/replica-setup-mysql/snapshot.sh @@ -36,7 +36,7 @@ echo "Thread count set to: $THREAD_COUNT" echo "Copying data from MySQL to MyDuck..." # Run mysqlsh command and capture the output -output=$(mysqlsh --uri "$SOURCE_DSN" $NO_PASSWORD_OPTION -- util copy-instance "mysql://${MYDUCK_USER}:${MYDUCK_PASSWORD}@${MYDUCK_HOST}:${MYDUCK_PORT}" --users false --consistent false --ignore-existing-objects true --handle-grant-errors ignore --threads $THREAD_COUNT --bytesPerChunk 256M --ignore-version true) +output=$(mysqlsh --uri "$SOURCE_DSN" $SOURCE_NO_PASSWORD_OPTION -- util copy-instance "mysql://${MYDUCK_USER}:${MYDUCK_PASSWORD}@${MYDUCK_HOST}:${MYDUCK_PORT}" --users false --consistent false --ignore-existing-objects true --handle-grant-errors ignore --threads $THREAD_COUNT --bytesPerChunk 256M --ignore-version true) if [[ $GTID_MODE == "ON" ]]; then # Extract the EXECUTED_GTID_SET from this output: diff --git a/devtools/replica-setup-mysql/start_replication.sh b/devtools/replica-setup-mysql/start_replication.sh index cfe650af..389deab3 100644 --- a/devtools/replica-setup-mysql/start_replication.sh +++ b/devtools/replica-setup-mysql/start_replication.sh @@ -11,13 +11,6 @@ OS=$(uname -s) # fi # fi -if [[ "${MYDUCK_IN_DOCKER}" == "true" && "$OS" == "Darwin" && - ("${SOURCE_HOST}" == "127.0.0.1" || "${SOURCE_HOST}" == "localhost" || "${SOURCE_HOST}" == "0.0.0.0") ]]; then - SOURCE_HOST_FOR_REPLICA="host.docker.internal" -else - SOURCE_HOST_FOR_REPLICA="${SOURCE_HOST}" -fi - # Use the EXECUTED_GTID_SET variable from the previous steps if [ $GTID_MODE == "ON" ] && [ ! -z "$EXECUTED_GTID_SET" ]; then mysqlsh --sql --host=${MYDUCK_HOST} --port=${MYDUCK_PORT} --user=root --no-password </dev/null + rm -f "${PID_FILE}" + fi } # Function to run replica setup run_replica_setup() { case "$SOURCE_TYPE" in MYSQL) - echo "Creating replica with MySQL server at $SOURCE_DSN..." + echo "Replicating MySQL primary server: DSN=$SOURCE_DSN ..." cd "$MYSQL_REPLICA_SETUP_PATH" || { echo "Error: Could not change directory to ${MYSQL_REPLICA_SETUP_PATH}"; exit 1; } ;; POSTGRES) - echo "Creating replica with Postgres server at $SOURCE_DSN..." + echo "Replicating PostgreSQL primary server: DSN=$SOURCE_DSN ..." cd "$POSTGRES_REPLICA_SETUP_PATH" || { echo "Error: Could not change directory to ${POSTGRES_REPLICA_SETUP_PATH}"; exit 1; @@ -151,6 +182,9 @@ check_process_alive() { # Handle the setup_mode setup() { + # Setup signal handlers + trap cleanup SIGTERM SIGINT SIGQUIT + if [ -n "$LOG_LEVEL" ]; then export LOG_LEVEL="-loglevel $LOG_LEVEL" fi @@ -164,7 +198,7 @@ setup() { run_server_in_foreground ;; "REPLICA") - echo "Starting MyDuck Server and running replica setup in REPLICA mode..." + echo "Starting MyDuck Server in REPLICA mode..." run_server_in_background wait_for_my_duck_server_ready run_replica_setup @@ -180,10 +214,9 @@ setup while [[ "$SETUP_MODE" == "REPLICA" ]]; do # Check if the processes have started - check_process_alive "$PID_FILE" "MyDuck Server" - MY_DUCK_SERVER_STATUS=$? - if (( MY_DUCK_SERVER_STATUS != 0 )); then - echo "MyDuck Server is not running. Exiting..." + if ! check_process_alive "$PID_FILE" "MyDuck Server"; then + echo "CRITICAL: MyDuck Server process died unexpectedly." + cleanup exit 1 fi