From 92f290e9537478f85ff3fe3ab39945c1a49a6c1a Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 20 Mar 2024 17:08:42 +0000 Subject: [PATCH] Relax cleanup check in SnapshotStressTestsIT (#106569) We can't assert no leaked blobs here because today the first cleanup leaves the original `RepositoryData` in place so the second cleanup is not a no-op. Relates #100718 Backport of #100855 to 7.17 --- .../snapshots/SnapshotStressTestsIT.java | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotStressTestsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotStressTestsIT.java index b83b55f224242..6f5bcdfc8dfaf 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotStressTestsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotStressTestsIT.java @@ -809,9 +809,33 @@ private void startCleaner() { Releasables.close(releaseAll); logger.info("--> completed cleanup of [{}]", trackedRepository.repositoryName); final RepositoryCleanupResult result = cleanupRepositoryResponse.result(); - assertThat(Strings.toString(result), result.blobs(), equalTo(0L)); - assertThat(Strings.toString(result), result.bytes(), equalTo(0L)); - startCleaner(); + if (result.bytes() > 0L || result.blobs() > 0L) { + // we could legitimately run into dangling blobs as the result of a shard snapshot failing half-way + // through the snapshot because of a concurrent index-close or -delete. The second round of cleanup on + // the same repository however should always find no more dangling blobs and be a no-op since we block all + // concurrent operations on the repository. + client.admin() + .cluster() + .prepareCleanupRepository(trackedRepository.repositoryName) + .execute(mustSucceed(secondCleanupRepositoryResponse -> { + final RepositoryCleanupResult secondCleanupResult = secondCleanupRepositoryResponse.result(); + if (secondCleanupResult.blobs() == 1) { + // The previous cleanup actually leaves behind a stale index-N blob, so this cleanup removes it + // and reports it in its response. When https://github.com/elastic/elasticsearch/pull/100718 is + // fixed the second cleanup will be a proper no-op and we can remove this lenience -- TODO + } else { + assertThat(Strings.toString(secondCleanupResult), secondCleanupResult.blobs(), equalTo(0L)); + assertThat(Strings.toString(secondCleanupResult), secondCleanupResult.bytes(), equalTo(0L)); + } + Releasables.close(releaseAll); + logger.info("--> completed second cleanup of [{}]", trackedRepository.repositoryName); + startCleaner(); + })); + } else { + Releasables.close(releaseAll); + logger.info("--> completed cleanup of [{}]", trackedRepository.repositoryName); + startCleaner(); + } })); startedCleanup = true;