From 06ba5fe990710e5700299740bcd8aafbfd2cd00d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Thu, 20 Feb 2025 12:39:27 +0000 Subject: [PATCH] [DOP-22425] Do not run tests as admin DB users --- .env.docker | 10 ++- .env.local | 10 ++- .github/workflows/test-clickhouse.yml | 29 +++--- .github/workflows/test-ftp.yml | 22 ++--- .github/workflows/test-ftps.yml | 18 ++-- .github/workflows/test-greenplum.yml | 43 ++++----- .github/workflows/test-hdfs.yml | 20 ++--- .github/workflows/test-hive.yml | 3 +- .github/workflows/test-kafka.yml | 52 +++-------- .github/workflows/test-local-fs.yml | 3 +- .github/workflows/test-mongodb.yml | 24 ++--- .github/workflows/test-mssql.yml | 21 ++--- .github/workflows/test-mysql.yml | 26 +++--- .github/workflows/test-oracle.yml | 25 +++--- .github/workflows/test-postgres.yml | 41 +++------ .github/workflows/test-s3.yml | 28 +++--- .github/workflows/test-samba.yml | 20 ++--- .github/workflows/test-sftp.yml | 25 +++--- .github/workflows/test-teradata.yml | 3 +- .github/workflows/test-webdav.yml | 22 ++--- CONTRIBUTING.rst | 7 -- docker-compose.yml | 89 ++++++++++++++++--- docker/ftp/on_post_init.sh | 2 +- docker/ftps/on_post_init.sh | 9 ++ docker/greenplum/custom_entrypoint.sh | 86 ++++++++++++++++++ docker/greenplum/initdb.d/create_user.sh | 23 +++++ docker/greenplum/initdb.d/fix_pg_hba.sh | 8 ++ docker/mongodb/create_user.sh | 8 ++ docker/oracle/processes.sql | 5 ++ docker/oracle/user_permissions.sql | 6 ++ .../db_connection/mongodb/connection.py | 17 ++-- onetl/connection/file_connection/ftps.py | 30 ++++--- tests/fixtures/processing/oracle.py | 27 +----- .../test_mongodb_unit.py | 12 +-- 34 files changed, 448 insertions(+), 326 deletions(-) create mode 100755 docker/ftps/on_post_init.sh create mode 100755 docker/greenplum/custom_entrypoint.sh create mode 100755 docker/greenplum/initdb.d/create_user.sh create mode 100755 docker/greenplum/initdb.d/fix_pg_hba.sh create mode 100755 docker/mongodb/create_user.sh create mode 100644 docker/oracle/processes.sql create mode 100644 docker/oracle/user_permissions.sql diff --git a/.env.docker b/.env.docker index 5fbfdc977..54eb19c77 100644 --- a/.env.docker +++ b/.env.docker @@ -1,10 +1,11 @@ TZ=UTC +SPARK_EXTERNAL_IP=onetl # Greenplum ONETL_GP_HOST=greenplum ONETL_GP_PORT=5432 -ONETL_GP_DATABASE=postgres -ONETL_GP_USER=gpadmin +ONETL_GP_DATABASE=onetl +ONETL_GP_USER=onetl ONETL_GP_PASSWORD=123UsedForTestOnly@! # ClickHouse @@ -25,9 +26,12 @@ ONETL_KAFKA_PASSWORD=123UsedForTestOnly@! # Mongo ONETL_MONGO_HOST=mongodb ONETL_MONGO_PORT=27017 -ONETL_MONGO_DB=admin +ONETL_MONGO_DB=onetl ONETL_MONGO_USER=onetl ONETL_MONGO_PASSWORD=123UsedForTestOnly@! +ONETL_MONGO_ROOT_DB=admin +ONETL_MONGO_ROOT_USER=admin +ONETL_MONGO_ROOT_PASSWORD=123AdminUserForTests@! # MSSQL ONETL_MSSQL_HOST=mssql diff --git a/.env.local b/.env.local index e4e1193c1..da208d138 100644 --- a/.env.local +++ b/.env.local @@ -1,10 +1,11 @@ export TZ=UTC +export SPARK_EXTERNAL_IP=$(docker network inspect onetl_onetl --format '{{ (index .IPAM.Config 0).Gateway }}') # Greenplum export ONETL_GP_HOST=localhost export ONETL_GP_PORT=5433 -export ONETL_GP_DATABASE=postgres -export ONETL_GP_USER=gpadmin +export ONETL_GP_DATABASE=onetl +export ONETL_GP_USER=onetl export ONETL_GP_PASSWORD=123UsedForTestOnly@! # ClickHouse @@ -25,9 +26,12 @@ export ONETL_KAFKA_PASSWORD=123UsedForTestOnly@! # Mongo export ONETL_MONGO_HOST=localhost export ONETL_MONGO_PORT=27017 -export ONETL_MONGO_DB=admin +export ONETL_MONGO_DB=onetl export ONETL_MONGO_USER=onetl export ONETL_MONGO_PASSWORD=123UsedForTestOnly@! +export ONETL_MONGO_ROOT_DB=admin +export ONETL_MONGO_ROOT_USER=admin +export ONETL_MONGO_ROOT_PASSWORD=123AdminUserForTests@! # MSSQL export ONETL_MSSQL_HOST=localhost diff --git a/.github/workflows/test-clickhouse.yml b/.github/workflows/test-clickhouse.yml index 4a210cc93..db4493cfd 100644 --- a/.github/workflows/test-clickhouse.yml +++ b/.github/workflows/test-clickhouse.yml @@ -32,25 +32,18 @@ jobs: test-clickhouse: name: Run Clickhouse tests (server=${{ inputs.clickhouse-version }}, spark=${{ inputs.spark-version }}, pydantic=${{ inputs.pydantic-version }}, java=${{ inputs.java-version }}, python=${{ inputs.python-version }}, os=${{ inputs.os }}) runs-on: ${{ inputs.os }} - services: - clickhouse: - image: ${{ inputs.clickhouse-image }}:${{ inputs.clickhouse-version }} - env: - TZ: UTC - # yandex/clickhouse-server doesn't read these options at all, so we using default values - CLICKHOUSE_DB: default - CLICKHOUSE_USER: default - CLICKHOUSE_PASSWORD: 123UsedForTestOnly@! - CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 - ports: - - 8123:8123 - - 9001:9000 - options: --sysctl net.ipv6.conf.all.disable_ipv6=1 steps: - name: Checkout code uses: actions/checkout@v4 + - name: Start Clickhouse + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile clickhouse up -d --wait --wait-timeout 200 + env: + CLICKHOUSE_IMAGE: ${{ inputs.clickhouse-image }}:${{ inputs.clickhouse-version }} + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -92,8 +85,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m clickhouse - name: Dump Clickhouse logs on failure @@ -110,6 +102,11 @@ jobs: name: container-logs-clickhouse-${{ inputs.clickhouse-version }}-spark-${{ inputs.spark-version }} path: logs/* + - name: Shutdown Clickhouse + if: always() + run: | + docker compose --profile all down -v --remove-orphans + - name: Upload coverage results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-ftp.yml b/.github/workflows/test-ftp.yml index e91c31e34..0be0c079d 100644 --- a/.github/workflows/test-ftp.yml +++ b/.github/workflows/test-ftp.yml @@ -28,6 +28,13 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Start FTP + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile ftp up -d --wait --wait-timeout 200 + env: + FTP_IMAGE: chonjay21/ftps:${{ inputs.ftp-version }} + - name: Set up Python ${{ inputs.python-version }} uses: actions/setup-python@v5 with: @@ -50,21 +57,10 @@ jobs: run: | pip install -I -r requirements/core.txt -r requirements/ftp.txt -r requirements/tests/base.txt -r requirements/tests/pydantic-${{ inputs.pydantic-version }}.txt - # Replace with Github Actions' services after https://github.com/chonjay21/docker-ftp/pull/3 - # Cannot use services because we need to mount config file from the repo, but services start before checkout. - # See https://github.com/orgs/community/discussions/25792 - - name: Start FTP - run: | - docker compose down -v --remove-orphans - docker compose up -d ftp --wait --wait --wait-timeout 200 - env: - FTP_IMAGE: chonjay21/ftps:${{ inputs.ftp-version }} - - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m ftp - name: Dump FTP logs on failure @@ -86,7 +82,7 @@ jobs: - name: Shutdown FTP if: always() run: | - docker compose down -v --remove-orphans + docker compose --profile all down -v --remove-orphans - name: Upload coverage results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/test-ftps.yml b/.github/workflows/test-ftps.yml index 89eff2e95..3bfd5e54f 100644 --- a/.github/workflows/test-ftps.yml +++ b/.github/workflows/test-ftps.yml @@ -28,6 +28,11 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Start FTPS + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile ftps up -d --wait --wait-timeout 200 + - name: Set up Python ${{ inputs.python-version }} uses: actions/setup-python@v5 with: @@ -49,22 +54,13 @@ jobs: - name: Install dependencies run: | pip install -I -r requirements/core.txt -r requirements/ftp.txt -r requirements/tests/base.txt -r requirements/tests/pydantic-${{ inputs.pydantic-version }}.txt - - # Replace with Github Actions' services after https://github.com/chonjay21/docker-ftps/pull/3 - # Cannot use services because we need to mount config file from the repo, but services start before checkout. - # See https://github.com/orgs/community/discussions/25792 - - name: Start FTPS - run: | - docker compose down -v --remove-orphans - docker compose up -d ftps --wait --wait --wait-timeout 200 env: FTPS_IMAGE: chonjay21/ftps:${{ inputs.ftps-version }} - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m ftps - name: Dump FTPS logs on failure @@ -84,7 +80,7 @@ jobs: - name: Shutdown FTPS if: always() run: | - docker compose down -v --remove-orphans + docker compose --profile all down -v --remove-orphans - name: Upload coverage results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/test-greenplum.yml b/.github/workflows/test-greenplum.yml index 6bdccf19f..0f8720583 100644 --- a/.github/workflows/test-greenplum.yml +++ b/.github/workflows/test-greenplum.yml @@ -38,33 +38,18 @@ jobs: if: github.repository == 'MobileTeleSystems/onetl' # prevent running on forks name: Run Greenplum tests (server=${{ inputs.greenplum-version }}, spark=${{ inputs.spark-version }}, pydantic=${{ inputs.pydantic-version }}, java=${{ inputs.java-version }}, python=${{ inputs.python-version }}, os=${{ inputs.os }}) runs-on: ${{ inputs.os }} - services: - greenplum: - image: andruche/greenplum:${{ inputs.greenplum-version }} - env: - TZ: UTC - ports: - - 5433:5432 - # TODO: remove after https://github.com/andruche/docker-greenplum/pull/2 - options: --sysctl net.ipv6.conf.all.disable_ipv6=1 - pgbouncer-transaction-gp: - image: bitnami/pgbouncer:latest - env: - TZ: UTC - PGBOUNCER_DATABASE: postgres - PGBOUNCER_POOL_MODE: transaction - POSTGRESQL_HOST: greenplum - POSTGRESQL_PORT: 5432 - POSTGRESQL_DATABASE: postgres - POSTGRESQL_USERNAME: gpadmin - POSTGRESQL_PASSWORD: 123UsedForTestOnly@! - ports: - - 6433:6432 steps: - name: Checkout code uses: actions/checkout@v4 + - name: Start Grenplum + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile greenplum up -d --wait --wait-timeout 200 + env: + GREENPLUM_IMAGE: andruche/greenplum:${{ inputs.greenplum-version }} + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -111,8 +96,7 @@ jobs: - name: Wait for Greenplum to be ready run: | - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local # Greenplum init script is running very late sleep 30 @@ -123,8 +107,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local export ONETL_GP_PACKAGE_VERSION=${{ inputs.package-version }} ./pytest_runner.sh -m greenplum env: @@ -134,8 +117,7 @@ jobs: - name: Run tests with PGBouncer run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local export ONETL_GP_PACKAGE_VERSION=${{ inputs.package-version }} export ONETL_GP_PORT=6433 # Run only some basic tests @@ -158,6 +140,11 @@ jobs: name: container-logs-greenplum-${{ inputs.greenplum-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }} path: logs/* + - name: Shutdown Greenplum + if: always() + run: | + docker compose --profile all down -v --remove-orphans + - name: Upload coverage results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-hdfs.yml b/.github/workflows/test-hdfs.yml index 98398f520..3e45ae6da 100644 --- a/.github/workflows/test-hdfs.yml +++ b/.github/workflows/test-hdfs.yml @@ -34,6 +34,11 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Start HDFS + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile hdfs up -d --wait --wait-timeout 200 + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -69,24 +74,13 @@ jobs: - name: Install dependencies run: | pip install -I -r requirements/core.txt -r requirements/kerberos.txt -r requirements/hdfs.txt -r requirements/tests/base.txt -r requirements/tests/spark-${{ inputs.spark-version }}.txt -r requirements/tests/pydantic-${{ inputs.pydantic-version }}.txt - - # Cannot use services because we need to mount config file from the repo, but services start before checkout. - # See https://github.com/orgs/community/discussions/25792 - - name: Start HDFS - run: | - docker compose down -v --remove-orphans - docker compose up -d hdfs --wait --wait-timeout 200 & - wait_pid=$! - docker compose logs -f hdfs & - wait $wait_pid env: HDFS_IMAGE: mtsrus/hadoop:${{ inputs.hadoop-version }} - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local echo "127.0.0.1 hdfs" | sudo tee -a /etc/hosts ./pytest_runner.sh -m hdfs @@ -107,7 +101,7 @@ jobs: - name: Shutdown HDFS if: always() run: | - docker compose down -v --remove-orphans + docker compose --profile all down -v --remove-orphans - name: Upload coverage results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/test-hive.yml b/.github/workflows/test-hive.yml index 7c74ed7ae..1bd768ca3 100644 --- a/.github/workflows/test-hive.yml +++ b/.github/workflows/test-hive.yml @@ -72,8 +72,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m hive - name: Upload coverage results diff --git a/.github/workflows/test-kafka.yml b/.github/workflows/test-kafka.yml index bdfcae46e..d8bd865b1 100644 --- a/.github/workflows/test-kafka.yml +++ b/.github/workflows/test-kafka.yml @@ -30,47 +30,17 @@ jobs: name: Run Kafka tests (server=${{ inputs.kafka-version }}, spark=${{ inputs.spark-version }}, pydantic=${{ inputs.pydantic-version }}, java=${{ inputs.java-version }}, python=${{ inputs.python-version }}, os=${{ inputs.os }}) runs-on: ${{ inputs.os }} - services: - zookeeper: - image: bitnami/zookeeper:3.8 - env: - TZ: UTC - ALLOW_ANONYMOUS_LOGIN: 'yes' - options: >- - --health-cmd "nc -z localhost 2181 || exit" - --health-interval 10s - --health-timeout 5s - --health-retries 5 - - kafka: - image: bitnami/kafka:${{ inputs.kafka-version }} - env: - TZ: UTC - ALLOW_PLAINTEXT_LISTENER: 'yes' - KAFKA_ENABLE_KRAFT: 'no' - KAFKA_CLIENT_USERS: onetl - KAFKA_CLIENT_PASSWORDS: 123UsedForTestOnly@! - KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE: 'true' - KAFKA_CFG_ZOOKEEPER_CONNECT: zookeeper:2181 - KAFKA_ZOOKEEPER_BOOTSTRAP_SCRAM_USERS: 'true' - KAFKA_CFG_INTER_BROKER_LISTENER_NAME: INTERNAL_PLAINTEXT_ANONYMOUS - KAFKA_CFG_LISTENERS: INTERNAL_PLAINTEXT_ANONYMOUS://:9092,EXTERNAL_PLAINTEXT_ANONYMOUS://:9093,INTERNAL_PLAINTEXT_SASL://:9094,EXTERNAL_PLAINTEXT_SASL://:9095 - KAFKA_CFG_ADVERTISED_LISTENERS: INTERNAL_PLAINTEXT_ANONYMOUS://kafka:9092,EXTERNAL_PLAINTEXT_ANONYMOUS://localhost:9093,INTERNAL_PLAINTEXT_SASL://kafka:9094,EXTERNAL_PLAINTEXT_SASL://localhost:9095 - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: INTERNAL_PLAINTEXT_ANONYMOUS:PLAINTEXT,EXTERNAL_PLAINTEXT_ANONYMOUS:PLAINTEXT,INTERNAL_PLAINTEXT_SASL:SASL_PLAINTEXT,EXTERNAL_PLAINTEXT_SASL:SASL_PLAINTEXT - KAFKA_CFG_SASL_ENABLED_MECHANISMS: PLAIN,SCRAM-SHA-256,SCRAM-SHA-512 - ports: - - 9093:9093 - - 9095:9095 - options: >- - --health-cmd "kafka-topics.sh --bootstrap-server 127.0.0.1:9092 --list" - --health-interval 10s - --health-timeout 5s - --health-retries 5 - steps: - name: Checkout code uses: actions/checkout@v4 + - name: Start Kafka + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile kafka up -d --wait --wait-timeout 200 + env: + KAFKA_IMAGE: bitnami/kafka:${{ inputs.kafka-version }} + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -112,8 +82,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m kafka - name: Dump Kafka logs on failure @@ -130,6 +99,11 @@ jobs: name: container-logs-kafka-${{ inputs.kafka-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }} path: logs/* + - name: Shutdown Kafka + if: always() + run: | + docker compose --profile all down -v --remove-orphans + - name: Upload coverage results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-local-fs.yml b/.github/workflows/test-local-fs.yml index 27e2fc03d..7b5535785 100644 --- a/.github/workflows/test-local-fs.yml +++ b/.github/workflows/test-local-fs.yml @@ -72,8 +72,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m local_fs - name: Upload coverage results diff --git a/.github/workflows/test-mongodb.yml b/.github/workflows/test-mongodb.yml index 8f11b3091..e90cb7328 100644 --- a/.github/workflows/test-mongodb.yml +++ b/.github/workflows/test-mongodb.yml @@ -29,20 +29,18 @@ jobs: test-mongodb: name: Run MongoDB tests (server=${{ inputs.mongodb-version }}, spark=${{ inputs.spark-version }}, pydantic=${{ inputs.pydantic-version }}, java=${{ inputs.java-version }}, python=${{ inputs.python-version }}, os=${{ inputs.os }}) runs-on: ${{ inputs.os }} - services: - mongodb: - image: mongo:${{ inputs.mongodb-version }} - env: - TZ: UTC - MONGO_INITDB_ROOT_USERNAME: onetl - MONGO_INITDB_ROOT_PASSWORD: 123UsedForTestOnly@! - ports: - - 27017:27017 steps: - name: Checkout code uses: actions/checkout@v4 + - name: Start MongoDB + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile mongodb up -d --wait --wait-timeout 200 + env: + MONGODB_IMAGE: mongo:${{ inputs.mongodb-version }} + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -84,8 +82,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m mongodb - name: Dump MongoDB logs on failure @@ -102,6 +99,11 @@ jobs: name: container-logs-mongodb-${{ inputs.mongodb-version }} path: logs/* + - name: Shutdown MongoDB + if: always() + run: | + docker compose --profile all down -v --remove-orphans + - name: Upload coverage results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-mssql.yml b/.github/workflows/test-mssql.yml index 2980ece05..5606e5a7f 100644 --- a/.github/workflows/test-mssql.yml +++ b/.github/workflows/test-mssql.yml @@ -34,6 +34,13 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Start MSSQL + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile mssql up -d --wait --wait-timeout 200 + env: + MSSQL_IMAGE: mcr.microsoft.com/mssql/server:${{ inputs.mssql-version }} + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -79,20 +86,10 @@ jobs: run: | pip install -I -r requirements/core.txt -r requirements/tests/base.txt -r requirements/tests/mssql.txt -r requirements/tests/spark-${{ inputs.spark-version }}.txt -r requirements/tests/pydantic-${{ inputs.pydantic-version }}.txt - # Cannot use services because we need to mount config file from the repo, but services start before checkout. - # See https://github.com/orgs/community/discussions/25792 - - name: Start MSSQL - run: | - docker compose down -v --remove-orphans - docker compose up -d mssql --wait --wait --wait-timeout 200 - env: - MSSQL_IMAGE: mcr.microsoft.com/mssql/server:${{ inputs.mssql-version }} - - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m mssql - name: Dump MSSQL logs on failure @@ -112,7 +109,7 @@ jobs: - name: Shutdown MSSQL if: always() run: | - docker compose down -v --remove-orphans + docker compose --profile all down -v --remove-orphans - name: Upload coverage results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/test-mysql.yml b/.github/workflows/test-mysql.yml index 616ba5c70..b45d03ea3 100644 --- a/.github/workflows/test-mysql.yml +++ b/.github/workflows/test-mysql.yml @@ -29,22 +29,18 @@ jobs: test-mysql: name: Run MySQL tests (server=${{ inputs.mysql-version }}, spark=${{ inputs.spark-version }}, pydantic=${{ inputs.pydantic-version }}, java=${{ inputs.java-version }}, python=${{ inputs.python-version }}, os=${{ inputs.os }}) runs-on: ${{ inputs.os }} - services: - mysql: - image: mysql:${{ inputs.mysql-version }} - env: - TZ: UTC - MYSQL_ROOT_PASSWORD: 123RootUserForTestsOnly@! - MYSQL_DATABASE: onetl - MYSQL_USER: onetl - MYSQL_PASSWORD: 123UsedForTestOnly@! - ports: - - 3306:3306 steps: - name: Checkout code uses: actions/checkout@v4 + - name: Start MySQL + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile mysql up -d --wait --wait-timeout 200 + env: + MYSQL_IMAGE: mysql:${{ inputs.mysql-version }} + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -86,8 +82,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m mysql - name: Dump MySQL logs on failure @@ -104,6 +99,11 @@ jobs: name: container-logs-mysql-${{ inputs.mysql-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }} path: logs/* + - name: Shutdown MySQL + if: always() + run: | + docker compose --profile all down -v --remove-orphans + - name: Upload coverage results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-oracle.yml b/.github/workflows/test-oracle.yml index 43feb1fa8..d42fcee27 100644 --- a/.github/workflows/test-oracle.yml +++ b/.github/workflows/test-oracle.yml @@ -36,21 +36,18 @@ jobs: test-oracle: name: Run Oracle tests (server=${{ inputs.oracle-version }}, spark=${{ inputs.spark-version }}, pydantic=${{ inputs.pydantic-version }}, java=${{ inputs.java-version }}, python=${{ inputs.python-version }}, os=${{ inputs.os }}) runs-on: ${{ inputs.os }} - services: - oracle: - image: ${{ inputs.oracle-image }}:${{ inputs.oracle-version }} - env: - TZ: UTC - ORACLE_PASSWORD: 123AdminUserForTests@! - APP_USER: onetl - APP_USER_PASSWORD: 123UsedForTestOnly@! - ports: - - 1522:1521 steps: - name: Checkout code uses: actions/checkout@v4 + - name: Start Oracle + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile oracle up -d --wait --wait-timeout 200 + env: + ORACLE_IMAGE: ${{ inputs.oracle-image }}:${{ inputs.oracle-version }} + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -108,8 +105,7 @@ jobs: export PATH=${ONETL_ORA_CLIENT_PATH}:${PATH} mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local export "ONETL_ORA_SERVICE_NAME=${{ inputs.db-name }}" ./pytest_runner.sh -m oracle @@ -127,6 +123,11 @@ jobs: name: container-logs-oracle-${{ inputs.oracle-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }} path: logs/* + - name: Shutdown Oracle + if: always() + run: | + docker compose --profile all down -v --remove-orphans + - name: Upload coverage results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-postgres.yml b/.github/workflows/test-postgres.yml index ec45edd64..1c6705cb5 100644 --- a/.github/workflows/test-postgres.yml +++ b/.github/workflows/test-postgres.yml @@ -29,34 +29,18 @@ jobs: test-postgres: name: Run Postgres tests (server=${{ inputs.postgres-version }}, spark=${{ inputs.spark-version }}, pydantic=${{ inputs.pydantic-version }}, java=${{ inputs.java-version }}, python=${{ inputs.python-version }}, os=${{ inputs.os }}) runs-on: ${{ inputs.os }} - services: - postgres: - image: postgres:${{ inputs.postgres-version }} - env: - TZ: UTC - POSTGRES_USER: onetl - POSTGRES_DB: onetl - POSTGRES_PASSWORD: 123UsedForTestOnly@! - ports: - - 5432:5432 - pgbouncer-transaction-pg: - image: bitnami/pgbouncer:latest - env: - TZ: UTC - PGBOUNCER_DATABASE: onetl - PGBOUNCER_POOL_MODE: transaction - POSTGRESQL_HOST: postgres - POSTGRESQL_PORT: 5432 - POSTGRESQL_DATABASE: onetl - POSTGRESQL_USERNAME: onetl - POSTGRESQL_PASSWORD: 123UsedForTestOnly@! - ports: - - 6432:6432 steps: - name: Checkout code uses: actions/checkout@v4 + - name: Start Postgres + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile postgres up -d --wait --wait-timeout 200 + env: + POSTGRES_IMAGE: postgres:${{ inputs.postgres-version }} + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -98,15 +82,13 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m postgres - name: Run tests with PGBouncer run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local export ONETL_PG_PORT=6432 # Run only some basic tests ./pytest_runner.sh -m postgres -k "tests_core_integration or tests_db_connection_integration" @@ -125,6 +107,11 @@ jobs: name: container-logs-postgres-${{ inputs.postgres-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }} path: logs/* + - name: Shutdown Postgres + if: always() + run: | + docker compose --profile all down -v --remove-orphans + - name: Upload coverage results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-s3.yml b/.github/workflows/test-s3.yml index 6568e19cf..c27daebf3 100644 --- a/.github/workflows/test-s3.yml +++ b/.github/workflows/test-s3.yml @@ -29,24 +29,18 @@ jobs: test-s3: name: Run S3 tests (server=${{ inputs.minio-version }}, spark=${{ inputs.spark-version }}, pydantic=${{ inputs.pydantic-version }}, java=${{ inputs.java-version }}, python=${{ inputs.python-version }}, os=${{ inputs.os }}) runs-on: ${{ inputs.os }} - services: - s3: - image: bitnami/minio:${{ inputs.minio-version }} - env: - TZ: UTC - # MINIO_ACCESS_KEY and MINIO_SECRET_KEY are deprecated by new Minio version, but used by old version. - # so we keeping both - MINIO_ACCESS_KEY: onetl - MINIO_ROOT_USER: onetl - MINIO_SECRET_KEY: 123UsedForTestOnly@! - MINIO_ROOT_PASSWORD: 123UsedForTestOnly@! - ports: - - 9010:9000 steps: - name: Checkout code uses: actions/checkout@v4 + - name: Start S3 + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile s3 up -d --wait --wait-timeout 200 + env: + S3_IMAGE: bitnami/minio:${{ inputs.minio-version }} + - name: Set up Java ${{ inputs.java-version }} uses: actions/setup-java@v4 with: @@ -88,8 +82,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m s3 - name: Dump S3 logs on failure @@ -106,6 +99,11 @@ jobs: name: container-logs-s3-${{ inputs.minio-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }} path: logs/* + - name: Shutdown S3 + if: always() + run: | + docker compose --profile all down -v --remove-orphans + - name: Upload coverage results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-samba.yml b/.github/workflows/test-samba.yml index f493b0a55..ed55ffc01 100644 --- a/.github/workflows/test-samba.yml +++ b/.github/workflows/test-samba.yml @@ -28,6 +28,13 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Start Samba + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile samba up -d --wait --wait-timeout 200 + env: + SAMBA_IMAGE: elswork/samba:${{ inputs.server-version }} + - name: Set up Python ${{ inputs.python-version }} uses: actions/setup-python@v5 with: @@ -50,19 +57,10 @@ jobs: run: | pip install -I -r requirements/core.txt -r requirements/samba.txt -r requirements/tests/base.txt -r requirements/tests/pydantic-${{ inputs.pydantic-version }}.txt - # Replace with Github Actions' because of custom parameter for samba container start - - name: Start Samba - run: | - docker compose down -v --remove-orphans - docker compose up -d samba - env: - SAMBA_IMAGE: elswork/samba:${{ inputs.server-version }} - - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m samba - name: Dump Samba logs on failure @@ -82,7 +80,7 @@ jobs: - name: Shutdown Samba if: always() run: | - docker compose down -v --remove-orphans + docker compose --profile all down -v --remove-orphans - name: Upload coverage results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/test-sftp.yml b/.github/workflows/test-sftp.yml index c3b905318..8cd333eb6 100644 --- a/.github/workflows/test-sftp.yml +++ b/.github/workflows/test-sftp.yml @@ -23,21 +23,18 @@ jobs: test-sftp: name: Run SFTP tests (server=${{ inputs.openssh-version }}, pydantic=${{ inputs.pydantic-version }}, python=${{ inputs.python-version }}, os=${{ inputs.os }}) runs-on: ${{ inputs.os }} - services: - sftp: - image: linuxserver/openssh-server:${{ inputs.openssh-version }} - env: - TZ: UTC - USER_NAME: onetl - PASSWORD_ACCESS: 'true' - USER_PASSWORD: 123UsedForTestOnly@! - ports: - - 2222:2222 steps: - name: Checkout code uses: actions/checkout@v4 + - name: Start SFTP + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile sftp up -d --wait --wait-timeout 200 + env: + SFTP_IMAGE: linuxserver/openssh-server:${{ inputs.openssh-version }} + - name: Set up Python ${{ inputs.python-version }} uses: actions/setup-python@v5 with: @@ -63,8 +60,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m sftp - name: Dump SFTP logs on failure @@ -81,6 +77,11 @@ jobs: name: container-logs-sftp-${{ inputs.openssh-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }} path: logs/* + - name: Shutdown SFTP + if: always() + run: | + docker compose --profile all down -v --remove-orphans + - name: Upload coverage results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-teradata.yml b/.github/workflows/test-teradata.yml index 482e5fbb9..e4eea89dd 100644 --- a/.github/workflows/test-teradata.yml +++ b/.github/workflows/test-teradata.yml @@ -72,8 +72,7 @@ jobs: - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m teradata - name: Upload coverage results diff --git a/.github/workflows/test-webdav.yml b/.github/workflows/test-webdav.yml index dcc5cc61b..0e21802ed 100644 --- a/.github/workflows/test-webdav.yml +++ b/.github/workflows/test-webdav.yml @@ -28,6 +28,13 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Start WebDAV + run: | + docker compose --profile all down -v --remove-orphans + docker compose --profile webdav up -d --wait --wait-timeout 200 + env: + WEBDAV_IMAGE: chonjay21/webdav:${{ inputs.webdav-version }} + - name: Set up Python ${{ inputs.python-version }} uses: actions/setup-python@v5 with: @@ -56,21 +63,10 @@ jobs: run: | pip install -I -r requirements/core.txt -r requirements/webdav.txt -r requirements/tests/base.txt -r requirements/tests/pydantic-${{ inputs.pydantic-version }}.txt - # Replace with Github Actions' services after https://github.com/chonjay21/docker-webdav/pull/3 - # Cannot use services because we need to mount config file from the repo, but services start before checkout. - # See https://github.com/orgs/community/discussions/25792 - - name: Start WebDAV - run: | - docker compose down -v --remove-orphans - docker compose up -d webdav - env: - WEBDAV_IMAGE: chonjay21/webdav:${{ inputs.webdav-version }} - - name: Run tests run: | mkdir reports/ || echo "Directory exists" - sed '/^$/d' ./.env.local | sed '/^#/d' | sed 's/^/export /' > ./env - source ./env + source .env.local ./pytest_runner.sh -m webdav - name: Dump WebDAV logs on failure @@ -90,7 +86,7 @@ jobs: - name: Shutdown WebDAV if: always() run: | - docker compose down -v --remove-orphans + docker compose --profile all down -v --remove-orphans - name: Upload coverage results uses: actions/upload-artifact@v4 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index c820f7408..eedba1f3e 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -177,13 +177,6 @@ Without docker-compose * Download `VMware Greenplum connector for Spark `_ * Either move it to ``~/.ivy2/jars/``, or pass file path to ``CLASSPATH`` * Set environment variable ``ONETL_GP_PACKAGE_VERSION=local``. - * On Linux, you may have to set environment variable ``SPARK_EXTERNAL_IP`` to IP of ``onetl_onetl`` network gateway: - - .. code:: bash - - export SPARK_EXTERNAL_IP=$(docker network inspect onetl_onetl --format '{{ (index .IPAM.Config 0).Gateway }}') - - This is because in some cases Spark does not properly detect hsot machine IP address, so Greenplum segments cannot connect to Spark executors. Start all containers with dependencies: diff --git a/docker-compose.yml b/docker-compose.yml index 22cf5611b..2b6830972 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.9' - # NOTE: local development only services: onetl: @@ -29,15 +27,23 @@ services: restart: unless-stopped environment: TZ: UTC + POSTGRESQL_DATABASE: onetl + POSTGRESQL_USERNAME: onetl + POSTGRESQL_PASSWORD: 123UsedForTestOnly@! ports: - 5433:5432 + volumes: + - ./docker/greenplum/custom_entrypoint.sh:/entrypoint.sh + - ./docker/greenplum/initdb.d/:/container-entrypoint-initdb.d/ networks: - onetl extra_hosts: - host.docker.internal:host-gateway - # TODO: remove after https://github.com/andruche/docker-greenplum/pull/2 - sysctls: - - net.ipv6.conf.all.disable_ipv6=1 + healthcheck: + test: ["CMD", "/usr/local/gpdb/bin/pg_isready"] + interval: 10s + timeout: 5s + retries: 10 profiles: [greenplum, db, all] pgbouncer-transaction-gp: @@ -49,13 +55,18 @@ services: PGBOUNCER_POOL_MODE: transaction POSTGRESQL_HOST: greenplum POSTGRESQL_PORT: 5432 - POSTGRESQL_DATABASE: postgres - POSTGRESQL_USERNAME: gpadmin + POSTGRESQL_DATABASE: onetl + POSTGRESQL_USERNAME: onetl POSTGRESQL_PASSWORD: 123UsedForTestOnly@! ports: - 6433:6432 networks: - onetl + healthcheck: + test: ["CMD-SHELL", "PGPASSWORD=$$POSTGRESQL_PASSWORD psql -h $$POSTGRESQL_HOST -p $$POSTGRESQL_PORT -U $$POSTGRESQL_USERNAME -d $$POSTGRESQL_DATABASE -tc 'SELECT 1;' | grep -q 1"] + interval: 10s + timeout: 5s + retries: 10 profiles: [greenplum, db, all] clickhouse: @@ -75,10 +86,16 @@ services: - onetl sysctls: - net.ipv6.conf.all.disable_ipv6=1 + healthcheck: + test: ["CMD-SHELL", 'wget --quiet --no-verbose --tries=1 --spider --header "X-ClickHouse-User: $$CLICKHOUSE_USER" --header "X-ClickHouse-Key: $$CLICKHOUSE_PASSWORD" "http://localhost:8123/?query=SELECT%201&database=$$CLICKHOUSE_DB" || exit 1'] + interval: 10s + timeout: 5s + retries: 10 profiles: [clickhouse, jdbc, db, all] zookeeper: image: ${ZOOKEEPER_IMAGE:-bitnami/zookeeper:3.8} + restart: unless-stopped ports: - 2181:2181 networks: @@ -132,8 +149,16 @@ services: - 27017:27017 environment: TZ: UTC - MONGO_INITDB_ROOT_USERNAME: onetl - MONGO_INITDB_ROOT_PASSWORD: 123UsedForTestOnly@! + MONGO_INITDB_ROOT_DB: admin + MONGO_INITDB_ROOT_USERNAME: admin + MONGO_INITDB_ROOT_PASSWORD: 123AdminUserForTests@! + volumes: + - ./docker/mongodb/:/docker-entrypoint-initdb.d/ + healthcheck: + test: ["CMD-SHELL", echo 'db.runCommand("ping").ok' | mongosh "$$MONGO_INITDB_ROOT_DB" --username "$$MONGO_INITDB_ROOT_USERNAME" --password "$$MONGO_INITDB_ROOT_PASSWORD" --quiet] + interval: 10s + timeout: 5s + retries: 5 networks: - onetl profiles: [mongodb, db, all] @@ -176,6 +201,11 @@ services: networks: - onetl platform: linux/amd64 + healthcheck: + test: ["CMD-SHELL", "/usr/bin/mysql --user=root --password=\"$$MYSQL_ROOT_PASSWORD\" --execute \"SHOW DATABASES;\""] + interval: 2s + timeout: 20s + retries: 10 profiles: [mysql, jdbc, db, all] postgres: @@ -190,6 +220,11 @@ services: - 5432:5432 networks: - onetl + healthcheck: + test: ["CMD", "pg_isready"] + interval: 10s + timeout: 5s + retries: 10 profiles: [postgres, jdbc, db, all] pgbouncer-transaction-pg: @@ -208,6 +243,11 @@ services: - 6432:6432 networks: - onetl + healthcheck: + test: ["CMD-SHELL", "PGPASSWORD=$$POSTGRESQL_PASSWORD psql -h $$POSTGRESQL_HOST -p $$POSTGRESQL_PORT -U $$POSTGRESQL_USERNAME -d $$POSTGRESQL_DATABASE -tc 'SELECT 1;' | grep -q 1"] + interval: 10s + timeout: 5s + retries: 10 profiles: [postgres, jdbc, db, all] hdfs: @@ -224,10 +264,11 @@ services: - ./docker/hdfs/conf/hadoop/:/var/hadoop/conf/ networks: - onetl + # healthcheck is defined in image profiles: [hdfs, file_df, file, all] oracle: - image: ${ORACLE_IMAGE:-gvenzl/oracle-free:23.3-slim-faststart} + image: ${ORACLE_IMAGE:-gvenzl/oracle-free:slim-faststart} restart: unless-stopped environment: TZ: UTC @@ -236,6 +277,8 @@ services: APP_USER_PASSWORD: 123UsedForTestOnly@! ports: - 1522:1521 + volumes: + - ./docker/oracle/:/container-entrypoint-initdb.d networks: - onetl healthcheck: @@ -265,6 +308,11 @@ services: - ./docker/ftp/on_post_init.sh:/sources/ftps/eventscripts/on_post_init.sh networks: - onetl + healthcheck: + test: ["CMD-SHELL", "curl --silent --show-error --fail -u \"$$APP_USER_NAME:$$APP_USER_PASSWD\" ftp://localhost:21"] + interval: 10s + timeout: 5s + retries: 10 profiles: [ftp, file, all] ftps: @@ -283,10 +331,15 @@ services: - 2122:21 - 30020-30030:30020-30030 volumes: - - ./docker/ftp/on_post_init.sh:/sources/ftps/eventscripts/on_post_init.sh + - ./docker/ftps/on_post_init.sh:/sources/ftps/eventscripts/on_post_init.sh networks: - onetl - profiles: [fts, file, all] + healthcheck: + test: ["CMD-SHELL", "curl --silent --show-error --fail -u \"$$APP_USER_NAME:$$APP_USER_PASSWD\" -k ftps://localhost:21"] + interval: 10s + timeout: 5s + retries: 10 + profiles: [ftps, file, all] samba: image: ${SAMBA_IMAGE:-elswork/samba} @@ -301,6 +354,7 @@ services: entrypoint: ["/custom_entrypoint.sh"] networks: - onetl + # healthcheck is defined in image profiles: [samba, file, all] s3: @@ -319,6 +373,11 @@ services: - 9011:9001 networks: - onetl + healthcheck: + test: ["CMD-SHELL", "curl --silent --show-error --fail http://localhost:9000/minio/health/live"] + interval: 10s + timeout: 5s + retries: 10 profiles: [s3, file_df, file, all] sftp: @@ -336,6 +395,7 @@ services: - 2222:2222 networks: - onetl + # no sshpass in the image, so healthcheck is not available profiles: [sftp, file, all] webdav: @@ -354,6 +414,11 @@ services: - ./docker/webdav/on_post_init.sh:/sources/webdav/eventscripts/on_post_init.sh networks: - onetl + healthcheck: + test: ["CMD-SHELL", "curl --silent --show-error --fail -u \"$$APP_USER_NAME:$$APP_USER_PASSWD\" -k http://localhost:80"] + interval: 10s + timeout: 5s + retries: 10 profiles: [webdav, file, all] networks: diff --git a/docker/ftp/on_post_init.sh b/docker/ftp/on_post_init.sh index ca0254e01..50e55a9e0 100755 --- a/docker/ftp/on_post_init.sh +++ b/docker/ftp/on_post_init.sh @@ -7,6 +7,6 @@ echo "max_per_ip=0" >> /etc/vsftpd/vsftpd.conf # https://serverfault.com/questions/65002/vsftpd-and-implicit-ssl echo "implicit_ssl=NO" >> /etc/vsftpd/vsftpd.conf -# enable anonymous login for both FTP and FTPS +# enable anonymous login echo "anonymous_enable=YES" >> /etc/vsftpd/vsftpd.conf echo "allow_anon_ssl=YES" >> /etc/vsftpd/vsftpd.conf diff --git a/docker/ftps/on_post_init.sh b/docker/ftps/on_post_init.sh new file mode 100755 index 000000000..bdef322d8 --- /dev/null +++ b/docker/ftps/on_post_init.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -e + +# https://serverfault.com/questions/157159/too-many-ftp-connection-causing-error-421 +echo "max_per_ip=0" >> /etc/vsftpd/vsftpd.conf + +# enable anonymous login +echo "anonymous_enable=YES" >> /etc/vsftpd/vsftpd.conf +echo "allow_anon_ssl=YES" >> /etc/vsftpd/vsftpd.conf diff --git a/docker/greenplum/custom_entrypoint.sh b/docker/greenplum/custom_entrypoint.sh new file mode 100755 index 000000000..1628e763d --- /dev/null +++ b/docker/greenplum/custom_entrypoint.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +set -e + +function run_custom_scripts { + SCRIPTS_ROOT="${1}"; + + # Check whether parameter has been passed on + if [ -z "${SCRIPTS_ROOT}" ]; then + echo "No SCRIPTS_ROOT passed on, no scripts will be run."; + return; + fi; + + # Execute custom provided files (only if directory exists and has files in it) + if [ -d "${SCRIPTS_ROOT}" ] && [ -n "$(ls -A "${SCRIPTS_ROOT}")" ]; then + echo -e "\nENTRYPOINT: Executing user-defined scripts..." + run_custom_scripts_recursive "${SCRIPTS_ROOT}" + echo -e "ENTRYPOINT: DONE: Executing user-defined scripts.\n" + fi; +} + +function run_custom_scripts_recursive { + local f; + for f in "${1}"/*; do + case "${f}" in + *.sh) + if [ -x "${f}" ]; then + echo -e "\nENTRYPOINT: running ${f} ..."; run_script_as_gpadmin "${f}"; echo "ENTRYPOINT: DONE: running ${f}" + else + echo -e "\nENTRYPOINT: sourcing ${f} ..."; run_command_as_gpadmin "${f}" echo "ENTRYPOINT: DONE: sourcing ${f}" + fi; + ;; + + *.sql) + echo -e "\nENTRYPOINT: running ${f} ..."; run_command_as_gpadmin psql -f "${f}"; echo "ENTRYPOINT: DONE: running ${f}" + ;; + + *) + if [ -d "${f}" ]; then + echo -e "\nENTRYPOINT: descending into ${f} ..."; run_custom_scripts_recursive "${f}"; echo "ENTRYPOINT: DONE: descending into ${f}" + else + echo -e "\nENTRYPOINT: ignoring ${f}" + fi; + ;; + esac + echo ""; + done +} + +function run_script_as_gpadmin() { + su -w POSTGRESQL_DATABASE -w POSTGRESQL_USERNAME -w POSTGRESQL_PASSWORD - gpadmin "${1}" +} + +function run_command_as_gpadmin() { + su -w POSTGRESQL_DATABASE -w POSTGRESQL_USERNAME -w POSTGRESQL_PASSWORD - gpadmin bash -c "${1}" +} + + +function start_gpadmin() { + /etc/init.d/ssh start; + sleep 1; + run_command_as_gpadmin "gpstart -a" +} + +function stop_gpadmin() { + run_command_as_gpadmin "gpstop -a -M fast" +} + +function output_logs() { + tail -f `ls /data/{master,coordinator}/gpsne-1/{pg_log,log}/gpdb-* | tail -n1` & +} + +function main() { + start_gpadmin + trap "stop_gpadmin" INT TERM + output_logs + + run_custom_scripts "/container-entrypoint-initdb.d" + + # required by trap + while [ "$END" == '' ]; do + sleep 1 + done +} + +main diff --git a/docker/greenplum/initdb.d/create_user.sh b/docker/greenplum/initdb.d/create_user.sh new file mode 100755 index 000000000..cfa3f1ce2 --- /dev/null +++ b/docker/greenplum/initdb.d/create_user.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +psql -d postgres -c "CREATE DATABASE $POSTGRESQL_DATABASE WITH owner = gpadmin;" + +psql -d onetl <> /data/master/gpsne-1/pg_hba.conf +echo "host all all ::0/0 md5" >> /data/master/gpsne-1/pg_hba.conf + +psql -d postgres -c "SELECT pg_reload_conf();" diff --git a/docker/mongodb/create_user.sh b/docker/mongodb/create_user.sh new file mode 100755 index 000000000..e5e3cb8fc --- /dev/null +++ b/docker/mongodb/create_user.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +mongosh "${MONGO_INITDB_ROOT_DB}" \ + --host localhost \ + --port 27017 \ + -u "${MONGO_INITDB_ROOT_USERNAME}" \ + -p "${MONGO_INITDB_ROOT_PASSWORD}" \ + --eval "db.createUser({user: 'onetl', pwd: '123UsedForTestOnly@!', roles:[{role:'dbOwner', db: 'onetl'}]});" diff --git a/docker/oracle/processes.sql b/docker/oracle/processes.sql new file mode 100644 index 000000000..b50d4e6d3 --- /dev/null +++ b/docker/oracle/processes.sql @@ -0,0 +1,5 @@ +-- increase number of processes on XE: +-- see https://dba.stackexchange.com/questions/110819/oracle-intermittently-throws-ora-12516-tnslistener-could-not-find-available-h +ALTER SYSTEM SET processes=150 scope=spfile; +SHUTDOWN IMMEDIATE; +STARTUP; diff --git a/docker/oracle/user_permissions.sql b/docker/oracle/user_permissions.sql new file mode 100644 index 000000000..fd18a51b9 --- /dev/null +++ b/docker/oracle/user_permissions.sql @@ -0,0 +1,6 @@ +-- important to select from v$session used by one of tests +GRANT SELECT ANY DICTIONARY TO onetl; + +-- same, but for Oracle 12.x plus +ALTER SESSION SET container=FREEPDB1; +GRANT SELECT ANY DICTIONARY TO onetl; diff --git a/onetl/connection/db_connection/mongodb/connection.py b/onetl/connection/db_connection/mongodb/connection.py index a5631a26d..4f455cec9 100644 --- a/onetl/connection/db_connection/mongodb/connection.py +++ b/onetl/connection/db_connection/mongodb/connection.py @@ -343,10 +343,11 @@ def pipeline( log_options(log, read_options) + read_options["connection.uri"] = self.connection_url + read_options["database"] = self.database read_options["collection"] = collection if pipeline: read_options["aggregation.pipeline"] = json.dumps(pipeline) - read_options["connection.uri"] = self.connection_url with override_job_description(self.spark, f"{self}.pipeline()"): spark_reader = self.spark.read.format("mongodb").options(**read_options) @@ -414,6 +415,7 @@ def get_min_max_values( log_json(log, hint, "hint") read_options["connection.uri"] = self.connection_url + read_options["database"] = self.database read_options["collection"] = source read_options["aggregation.pipeline"] = json.dumps(pipeline) if hint: @@ -456,6 +458,7 @@ def read_source_as_df( read_options["hint"] = json.dumps(hint) read_options["connection.uri"] = self.connection_url + read_options["database"] = self.database read_options["collection"] = source log.info("|%s| Executing aggregation pipeline:", self.__class__.__name__) @@ -488,6 +491,7 @@ def write_df_to_target( write_options = self.WriteOptions.parse(options) write_options_dict = write_options.dict(by_alias=True, exclude_none=True, exclude={"if_exists"}) write_options_dict["connection.uri"] = self.connection_url + write_options_dict["database"] = self.database write_options_dict["collection"] = target mode = ( "overwrite" @@ -512,11 +516,14 @@ def write_df_to_target( @property def connection_url(self) -> str: - prop = self.extra.dict(by_alias=True) - parameters = "&".join(f"{k}={v}" for k, v in sorted(prop.items())) - parameters = "?" + parameters if parameters else "" + params = self.extra.dict(by_alias=True) + sorted_params = [(k, v) for k, v in sorted(params.items(), key=lambda x: x[0].lower())] + query = parser.urlencode(sorted_params, quote_via=parser.quote) + password = parser.quote(self.password.get_secret_value()) - return f"mongodb://{self.user}:{password}@{self.host}:{self.port}/{self.database}{parameters}" + parsed_url = parser.urlparse(f"mongodb://{self.user}:{password}@{self.host}:{self.port}/") + # do not include /database as it be used as authSource + return parser.urlunparse(parsed_url._replace(query=query)) @validator("spark") def _check_java_class_imported(cls, spark): diff --git a/onetl/connection/file_connection/ftps.py b/onetl/connection/file_connection/ftps.py index 89165a0ec..bede3a010 100644 --- a/onetl/connection/file_connection/ftps.py +++ b/onetl/connection/file_connection/ftps.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2021-2024 MTS PJSC # SPDX-License-Identifier: Apache-2.0 import ftplib # noqa: S402 # nosec +import ssl import textwrap from ftputil import FTPHost @@ -25,20 +26,23 @@ class TLSfix(ftplib.FTP_TLS): # noqa: N801 - """ - Fix for python 3.6+ - https://stackoverflow.com/questions/14659154/ftpes-session-reuse-required - """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._sock = None + + @property + def sock(self): + return self._sock - def ntransfercmd(self, cmd, rest=None): - conn, size = ftplib.FTP.ntransfercmd(self, cmd, rest) # noqa: S321 # nosec - if self._prot_p: - conn = self.context.wrap_socket( - conn, - server_hostname=self.host, - session=self.sock.session, - ) # this is the fix - return conn, size + @sock.setter + def sock(self, value): + """ + Explicitly wrap socket with SSL before sending any command + https://stackoverflow.com/a/36049814/23601543 + """ + if value is not None and not isinstance(value, ssl.SSLSocket): + value = self.context.wrap_socket(value) + self._sock = value class FTPS(FTP): diff --git a/tests/fixtures/processing/oracle.py b/tests/fixtures/processing/oracle.py index a3452c819..0af8ecf39 100644 --- a/tests/fixtures/processing/oracle.py +++ b/tests/fixtures/processing/oracle.py @@ -47,14 +47,6 @@ def user(self) -> str: def password(self) -> str: return os.environ["ONETL_ORA_PASSWORD"] - @property - def root_user(self) -> str: - return os.environ["ONETL_ORA_ROOT_USER"] - - @property - def root_password(self) -> str: - return os.environ["ONETL_ORA_ROOT_PASSWORD"] - @property def host(self) -> str: return os.environ["ONETL_ORA_HOST"] @@ -72,18 +64,13 @@ def url(self) -> str: dsn = cx_Oracle.makedsn(self.host, self.port, sid=self.sid, service_name=self.service_name) return f"oracle://{self.user}:{quote(self.password)}@{dsn}" - def get_dsn(self) -> cx_Oracle.Dsn: + def get_conn(self) -> cx_Oracle.Connection: try: cx_Oracle.init_oracle_client(lib_dir=os.getenv("ONETL_ORA_CLIENT_PATH")) except Exception: logger.debug("cx_Oracle client is already initialized.", exc_info=True) - return cx_Oracle.makedsn(self.host, self.port, sid=self.sid, service_name=self.service_name) - - def get_conn(self) -> cx_Oracle.Connection: - return cx_Oracle.connect(user=self.user, password=self.password, dsn=self.get_dsn()) - - def get_root_conn(self) -> cx_Oracle.Connection: - return cx_Oracle.connect(user=self.root_user, password=self.root_password, dsn=self.get_dsn()) + dsn = cx_Oracle.makedsn(self.host, self.port, sid=self.sid, service_name=self.service_name) + return cx_Oracle.connect(user=self.user, password=self.password, dsn=dsn) def create_schema_ddl( self, @@ -91,18 +78,10 @@ def create_schema_ddl( ) -> str: return f"CREATE SCHEMA AUTHORIZATION {schema}" - def grant_dictionary_ddl(self): - return f"GRANT SELECT ANY DICTIONARY TO {self.user}" - def create_schema( self, schema: str, ) -> None: - with self.get_root_conn().cursor() as cursor: - # this is requires to make queries to v$session - cursor.execute(self.grant_dictionary_ddl()) - self.connection.commit() - with self.connection.cursor() as cursor: cursor.execute(self.create_schema_ddl(schema)) self.connection.commit() diff --git a/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py b/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py index be7b9f930..5cbe22598 100644 --- a/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py +++ b/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py @@ -113,7 +113,7 @@ def test_mongodb(spark_mock): conn = MongoDB( host="host", user="user", - password="password", + password="some@password", database="database", spark=spark_mock, ) @@ -122,10 +122,10 @@ def test_mongodb(spark_mock): assert conn.port == 27017 assert conn.user == "user" assert conn.password != "password" - assert conn.password.get_secret_value() == "password" + assert conn.password.get_secret_value() == "some@password" assert conn.database == "database" - assert conn.connection_url == "mongodb://user:password@host:27017/database" + assert conn.connection_url == "mongodb://user:some%40password@host:27017/" assert conn.instance_url == "mongodb://host:27017/database" assert str(conn) == "MongoDB[host:27017/database]" @@ -167,7 +167,7 @@ def test_mongodb_with_port(spark_mock): assert conn.password.get_secret_value() == "password" assert conn.database == "database" - assert conn.connection_url == "mongodb://user:password@host:12345/database" + assert conn.connection_url == "mongodb://user:password@host:12345/" assert conn.instance_url == "mongodb://host:12345/database" @@ -215,11 +215,11 @@ def test_mongodb_with_extra(spark_mock): user="user", password="password", database="database", - extra={"tls": "true", "opt1": "value1"}, + extra={"tls": "true", "opt1": "value1", "opt2": "value with spaces"}, spark=spark_mock, ) - assert mongo.connection_url == "mongodb://user:password@host:27017/database?opt1=value1&tls=true" + assert mongo.connection_url == "mongodb://user:password@host:27017/?opt1=value1&opt2=value%20with%20spaces&tls=true" def test_mongodb_convert_list_to_str(spark_mock):