From a9dae872ba67f0f7f518190d410cadcdda24628a Mon Sep 17 00:00:00 2001 From: John Howard Date: Wed, 1 May 2024 15:32:06 -0700 Subject: [PATCH] Fix race condition in connection termination See https://github.com/hyperium/hyper/issues/3652. What I have found is the final reference to a stream being dropped after the `maybe_close_connection_if_no_streams` but before the `inner.poll()` completes can lead to the connection dangling forever without any forward progress. No streams/references are alive, but the connection is not complete and never wakes up again. This seems like a classic TOCTOU race condition. In this fix, I check again at the end of poll and if this state is detected, wake up the task again. Wth the test in https://github.com/hyperium/hyper/pull/3655, on my machine, it fails about 5% of the time: ``` 1876 runs so far, 100 failures (94.94% pass rate). 95.197349ms avg, 1.097347435s max, 5.398457ms min ``` With that PR, this test is 100% reliable ``` 64010 runs so far, 0 failures (100.00% pass rate). 44.484057ms avg, 121.454709ms max, 1.872657ms min ``` Note: we also have reproduced this using `h2` directly outside of `hyper`, which is what gives me confidence this issue lies in `h2` and not `hyper`. --- src/client.rs | 7 ++++++- src/proto/connection.rs | 7 +++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/client.rs b/src/client.rs index 25b151f5..8449ff8c 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1428,7 +1428,12 @@ where fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { self.inner.maybe_close_connection_if_no_streams(); - self.inner.poll(cx).map_err(Into::into) + let result = self.inner.poll(cx).map_err(Into::into); + if result.is_pending() && !self.inner.has_streams_or_other_references() { + tracing::trace!("last stream closed during poll, wake again"); + cx.waker().wake_by_ref(); + } + result } } diff --git a/src/proto/connection.rs b/src/proto/connection.rs index 5969bb84..5589fabc 100644 --- a/src/proto/connection.rs +++ b/src/proto/connection.rs @@ -242,6 +242,13 @@ where } } + /// Checks if there are any streams or references left + pub fn has_streams_or_other_references(&self) -> bool { + // If we poll() and realize that there are no streams or references + // then we can close the connection by transitioning to GOAWAY + self.inner.streams.has_streams_or_other_references() + } + pub(crate) fn take_user_pings(&mut self) -> Option { self.inner.ping_pong.take_user_pings() }