From 9791abb605caa00f917804ddb53f947232cb215c Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 21:29:24 +0100 Subject: [PATCH] debug cluster tests (#2104) Rearrange cluster shutdown code --- .github/workflows/build-dev.yml | 2 -- .github/workflows/pull-request.yml | 2 -- src/Proto.Cluster/Cluster.cs | 12 ++++++------ src/Proto.Remote/GrpcNet/GrpcNetRemote.cs | 2 +- tests/Proto.Cluster.Tests/ClusterFixture.cs | 6 ++++-- tests/Proto.Cluster.Tests/ClusterTests.cs | 2 +- tests/Proto.Cluster.Tests/RetryOnDeadLetterTests.cs | 2 +- 7 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build-dev.yml b/.github/workflows/build-dev.yml index 74b8c28feb..0aa5bbc397 100644 --- a/.github/workflows/build-dev.yml +++ b/.github/workflows/build-dev.yml @@ -9,8 +9,6 @@ on: env: DOTNET_SYSTEM_CONSOLE_ALLOW_ANSI_COLOR_REDIRECTION: 1 TERM: xterm - OPENTELEMETRY_URL: http://otel.ornell.io:30798 - TRACEVIEW_URL: http://traceview.ornell.io jobs: test-slow: # slow tests that should run in parallel diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index d5200cc0af..855052d79c 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -5,8 +5,6 @@ on: [pull_request] env: DOTNET_SYSTEM_CONSOLE_ALLOW_ANSI_COLOR_REDIRECTION: 1 TERM: xterm - OPENTELEMETRY_URL: http://otel.ornell.io:30798 - TRACEVIEW_URL: http://traceview.ornell.io jobs: build: diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index 688b7185e7..23718d8c68 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -295,10 +295,10 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") Logger.LogInformation("Waiting for two gossip intervals to pass for {Id}", System.Id); // In case provider shutdown is quick, let's wait at least 2 gossip intervals. await Task.Delay((int)Config.GossipInterval.TotalMilliseconds * 2).ConfigureAwait(false); - + Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); // Deregister from configured cluster provider. - await Provider.ShutdownAsync(graceful).ConfigureAwait(false); + await Provider.ShutdownAsync(graceful); if (_clusterKindObserver != null) { @@ -312,19 +312,19 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") _clusterMembersObserver = null; } - // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. - await System.ShutdownAsync(reason).ConfigureAwait(false); - // Shut down the rest of the dependencies in reverse order that they were started. await Gossip.ShutdownAsync().ConfigureAwait(false); if (graceful) { - await IdentityLookup.ShutdownAsync().ConfigureAwait(false); + await IdentityLookup.ShutdownAsync().ConfigureAwait(false); } await Remote.ShutdownAsync(graceful).ConfigureAwait(false); + // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. + await System.ShutdownAsync(reason).ConfigureAwait(false); + _shutdownCompletedTcs.TrySetResult(true); Logger.LogInformation("Stopped Cluster {Id}", System.Id); } diff --git a/src/Proto.Remote/GrpcNet/GrpcNetRemote.cs b/src/Proto.Remote/GrpcNet/GrpcNetRemote.cs index 038441bcd5..1c873517f5 100644 --- a/src/Proto.Remote/GrpcNet/GrpcNetRemote.cs +++ b/src/Proto.Remote/GrpcNet/GrpcNetRemote.cs @@ -161,7 +161,7 @@ public async Task ShutdownAsync(bool graceful = true) if (_host is not null) { - await _host.StopAsync().ConfigureAwait(false); + await _host.StopAsync().WaitAsync(TimeSpan.FromSeconds(5)).ConfigureAwait(false); } } } diff --git a/tests/Proto.Cluster.Tests/ClusterFixture.cs b/tests/Proto.Cluster.Tests/ClusterFixture.cs index 05b184a516..7648b027a9 100644 --- a/tests/Proto.Cluster.Tests/ClusterFixture.cs +++ b/tests/Proto.Cluster.Tests/ClusterFixture.cs @@ -182,12 +182,14 @@ public async Task RemoveNode(Cluster member, bool graceful = true) if (Members.Contains(member)) { Members.Remove(member); - await member.ShutdownAsync(graceful, "Stopped by ClusterFixture"); + var t = member.ShutdownAsync(graceful, "Stopped by ClusterFixture"); + await t.WaitAsync(TimeSpan.FromSeconds(5)); } else if (Clients.Contains(member)) { Clients.Remove(member); - await member.ShutdownAsync(graceful, "Stopped by ClusterFixture"); + var t = member.ShutdownAsync(graceful, "Stopped by ClusterFixture"); + await t.WaitAsync(TimeSpan.FromSeconds(5)); } else { diff --git a/tests/Proto.Cluster.Tests/ClusterTests.cs b/tests/Proto.Cluster.Tests/ClusterTests.cs index 02b469e8d1..c2c0811993 100644 --- a/tests/Proto.Cluster.Tests/ClusterTests.cs +++ b/tests/Proto.Cluster.Tests/ClusterTests.cs @@ -565,7 +565,7 @@ private async Task PingPong( if (response == null) { - await Task.Delay(200, token); + await Task.Delay(200); } } while (response == null && !token.IsCancellationRequested); diff --git a/tests/Proto.Cluster.Tests/RetryOnDeadLetterTests.cs b/tests/Proto.Cluster.Tests/RetryOnDeadLetterTests.cs index fb9f47dd57..1c91832a25 100644 --- a/tests/Proto.Cluster.Tests/RetryOnDeadLetterTests.cs +++ b/tests/Proto.Cluster.Tests/RetryOnDeadLetterTests.cs @@ -10,7 +10,7 @@ namespace Proto.Cluster.Tests; [Collection("ClusterTests")] public class RetryOnDeadLetterTests { - [Fact(Skip = "Flaky")] + [Fact] public async Task ShouldRetryRequestOnDeadLetterResponseRegardlessOfResponseType() { var fixture = new Fixture(1);