diff --git a/.github/actions/save_logs_and_results/action.yml b/.github/actions/save_logs_and_results/action.yml index 0f238835d19..b344c68f2ef 100644 --- a/.github/actions/save_logs_and_results/action.yml +++ b/.github/actions/save_logs_and_results/action.yml @@ -6,7 +6,7 @@ inputs: runs: using: composite steps: - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 name: Upload logs with: name: ${{ inputs.folder }} diff --git a/.github/actions/setup_extension/action.yml b/.github/actions/setup_extension/action.yml index 96b408e7e43..33129f17de6 100644 --- a/.github/actions/setup_extension/action.yml +++ b/.github/actions/setup_extension/action.yml @@ -17,7 +17,7 @@ runs: echo "PG_MAJOR=${{ inputs.pg_major }}" >> $GITHUB_ENV fi shell: bash - - uses: actions/download-artifact@v3.0.1 + - uses: actions/download-artifact@v4.1.8 with: name: build-${{ env.PG_MAJOR }} - name: Install Extension diff --git a/.github/actions/upload_coverage/action.yml b/.github/actions/upload_coverage/action.yml index 0b5f581a6a4..ba80ba63afa 100644 --- a/.github/actions/upload_coverage/action.yml +++ b/.github/actions/upload_coverage/action.yml @@ -21,7 +21,7 @@ runs: mkdir -p /tmp/codeclimate cc-test-reporter format-coverage -t lcov -o /tmp/codeclimate/${{ inputs.flags }}.json lcov.info shell: bash - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 with: path: "/tmp/codeclimate/*.json" - name: codeclimate + name: codeclimate-${{ inputs.flags }} diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3d9b57c370a..fefcc5bcbcd 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -122,7 +122,7 @@ jobs: - name: Build run: "./ci/build-citus.sh" shell: bash - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 with: name: build-${{ env.PG_MAJOR }} path: |- @@ -303,10 +303,12 @@ jobs: check-arbitrary-configs parallel=4 CONFIGS=$TESTS - uses: "./.github/actions/save_logs_and_results" if: always() + with: + folder: ${{ env.PG_MAJOR }}_arbitrary_configs_${{ matrix.parallel }} - uses: "./.github/actions/upload_coverage" if: always() with: - flags: ${{ env.pg_major }}_upgrade + flags: ${{ env.PG_MAJOR }}_arbitrary_configs_${{ matrix.parallel }} codecov_token: ${{ secrets.CODECOV_TOKEN }} test-pg-upgrade: name: PG${{ matrix.old_pg_major }}-PG${{ matrix.new_pg_major }} - check-pg-upgrade @@ -360,6 +362,8 @@ jobs: if: failure() - uses: "./.github/actions/save_logs_and_results" if: always() + with: + folder: ${{ env.old_pg_major }}_${{ env.new_pg_major }}_upgrade - uses: "./.github/actions/upload_coverage" if: always() with: @@ -405,10 +409,12 @@ jobs: done; - uses: "./.github/actions/save_logs_and_results" if: always() + with: + folder: ${{ env.PG_MAJOR }}_citus_upgrade - uses: "./.github/actions/upload_coverage" if: always() with: - flags: ${{ env.pg_major }}_upgrade + flags: ${{ env.PG_MAJOR }}_citus_upgrade codecov_token: ${{ secrets.CODECOV_TOKEN }} upload-coverage: if: always() @@ -424,10 +430,11 @@ jobs: - test-citus-upgrade - test-pg-upgrade steps: - - uses: actions/download-artifact@v3.0.1 + - uses: actions/download-artifact@v4.1.8 with: - name: "codeclimate" - path: "codeclimate" + pattern: codeclimate* + path: codeclimate + merge-multiple: true - name: Upload coverage results to Code Climate run: |- cc-test-reporter sum-coverage codeclimate/*.json -o total.json @@ -525,7 +532,7 @@ jobs: matrix: ${{ fromJson(needs.prepare_parallelization_matrix_32.outputs.json) }} steps: - uses: actions/checkout@v3.5.0 - - uses: actions/download-artifact@v3.0.1 + - uses: actions/download-artifact@v4.1.8 - uses: "./.github/actions/setup_extension" - name: Run minimal tests run: |- diff --git a/.github/workflows/flaky_test_debugging.yml b/.github/workflows/flaky_test_debugging.yml index a666c1cd5df..574cb7813ff 100644 --- a/.github/workflows/flaky_test_debugging.yml +++ b/.github/workflows/flaky_test_debugging.yml @@ -34,7 +34,7 @@ jobs: echo "PG_MAJOR=${PG_MAJOR}" >> $GITHUB_ENV ./ci/build-citus.sh shell: bash - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 with: name: build-${{ env.PG_MAJOR }} path: |- diff --git a/src/backend/distributed/transaction/transaction_recovery.c b/src/backend/distributed/transaction/transaction_recovery.c index f25823b3064..c114a2ed39f 100644 --- a/src/backend/distributed/transaction/transaction_recovery.c +++ b/src/backend/distributed/transaction/transaction_recovery.c @@ -347,7 +347,23 @@ RecoverWorkerTransactions(WorkerNode *workerNode) } systable_endscan(scanDescriptor); - table_close(pgDistTransaction, NoLock); + + /* + * Here we release the lock on pg_dist_transaction while closing it to avoid + * deadlocks that might occur because of trying to acquire a lock on + * pg_dist_authinfo while holding a lock on pg_dist_transaction. Such a scenario + * can only cause a deadlock if another transaction is trying to acquire a strong + * lock on pg_dist_transaction while holding a lock on pg_dist_authinfo. As of + * today, we (implicitly) acquire a strong lock on pg_dist_transaction only when + * upgrading Citus to 11.3-1 and this happens when creating a REPLICA IDENTITY on + * pg_dist_transaction. + * + * And reglardless of the code-path we are in, it should be okay to release the + * lock now because all we do after this point is to abort the prepared + * transactions that are not part of an in-progress distributed transaction and + * releasing the lock before doing so should be just fine. + */ + table_close(pgDistTransaction, RowExclusiveLock); if (!recoveryFailed) { diff --git a/src/test/regress/expected/isolation_create_restore_point.out b/src/test/regress/expected/isolation_create_restore_point.out index 3b1bdf9eb52..dce15a35d5c 100644 --- a/src/test/regress/expected/isolation_create_restore_point.out +++ b/src/test/regress/expected/isolation_create_restore_point.out @@ -147,16 +147,15 @@ recover_prepared_transactions step s2-create-restore: SELECT 1 FROM citus_create_restore_point('citus-test'); - -step s1-commit: - COMMIT; -step s2-create-restore: <... completed> ?column? --------------------------------------------------------------------- 1 (1 row) +step s1-commit: + COMMIT; + starting permutation: s1-begin s1-drop s2-create-restore s1-commit create_reference_table diff --git a/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out b/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out index d71fad887c9..168c64ccaa3 100644 --- a/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out +++ b/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out @@ -28,3 +28,12 @@ SELECT * FROM pg_dist_cleanup; CALL citus_cleanup_orphaned_resources(); NOTICE: cleaned up 1 orphaned resources DROP TABLE table_with_orphaned_shards; +-- Re-enable automatic shard cleanup by maintenance daemon as +-- we have disabled it in upgrade_pg_dist_cleanup_before.sql +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + diff --git a/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out b/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out index a0cf9ceb1ea..dd6c8868e32 100644 --- a/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out +++ b/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out @@ -30,6 +30,23 @@ SELECT COUNT(*) FROM pg_dist_placement WHERE shardstate = 1 AND shardid IN (SELE (1 row) -- create an orphaned placement based on an existing one +-- +-- But before doing that, first disable automatic shard cleanup +-- by maintenance daemon so that we can reliably test the cleanup +-- in upgrade_pg_dist_cleanup_after.sql. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(0.1); + pg_sleep +--------------------------------------------------------------------- + +(1 row) + INSERT INTO pg_dist_placement(placementid, shardid, shardstate, shardlength, groupid) SELECT nextval('pg_dist_placement_placementid_seq'::regclass), shardid, 4, shardlength, 3-groupid FROM pg_dist_placement diff --git a/src/test/regress/spec/isolation_create_restore_point.spec b/src/test/regress/spec/isolation_create_restore_point.spec index 2cdc66f850a..c62a64a449c 100644 --- a/src/test/regress/spec/isolation_create_restore_point.spec +++ b/src/test/regress/spec/isolation_create_restore_point.spec @@ -154,7 +154,10 @@ permutation "s1-begin" "s1-ddl" "s2-create-restore" "s1-commit" // verify that citus_create_restore_point is not blocked by concurrent COPY (only commit) permutation "s1-begin" "s1-copy" "s2-create-restore" "s1-commit" -// verify that citus_create_restore_point is blocked by concurrent recover_prepared_transactions +// verify that citus_create_restore_point is partially blocked by concurrent recover_prepared_transactions. +// In the test output, we won't be able to explicitly observe this since +// recover_prepared_transactions unblocks citus_create_restore_point after in-progress prepared transactions +// are recovered. permutation "s1-begin" "s1-recover" "s2-create-restore" "s1-commit" // verify that citus_create_restore_point is blocked by concurrent DROP TABLE diff --git a/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql b/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql index e84c35b608c..333ac60ca93 100644 --- a/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql +++ b/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql @@ -13,3 +13,8 @@ SELECT COUNT(*) FROM pg_dist_placement WHERE shardid IN (SELECT shardid FROM pg_ SELECT * FROM pg_dist_cleanup; CALL citus_cleanup_orphaned_resources(); DROP TABLE table_with_orphaned_shards; + +-- Re-enable automatic shard cleanup by maintenance daemon as +-- we have disabled it in upgrade_pg_dist_cleanup_before.sql +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); diff --git a/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql b/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql index 62ec8a1fb46..ec0eef353dc 100644 --- a/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql +++ b/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql @@ -16,6 +16,16 @@ SELECT create_distributed_table('table_with_orphaned_shards', 'a'); -- show all 32 placements are active SELECT COUNT(*) FROM pg_dist_placement WHERE shardstate = 1 AND shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='table_with_orphaned_shards'::regclass); -- create an orphaned placement based on an existing one +-- +-- But before doing that, first disable automatic shard cleanup +-- by maintenance daemon so that we can reliably test the cleanup +-- in upgrade_pg_dist_cleanup_after.sql. + +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + +SELECT pg_sleep(0.1); + INSERT INTO pg_dist_placement(placementid, shardid, shardstate, shardlength, groupid) SELECT nextval('pg_dist_placement_placementid_seq'::regclass), shardid, 4, shardlength, 3-groupid FROM pg_dist_placement