From ebb226f980e48a9d15830d60075f3208d149ff71 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 17 Oct 2023 17:00:10 +0200 Subject: [PATCH] BUG/MEDIUM: quic_conn: let the scheduler kill the task when needed The two timer handlers qc_process_timer() and qc_idle_timer_task() would inadvertently return NULL when they don't want to be requeued, instead of just returning the task itself. The effect of returning NULL for the scheduler is that it considers the task as freed, so it must not touch it anymore. As such, the TASK_F_RUNNING flag is never removed from these tasks, and when quic_conn_release() later tries to release these tasks using task_destroy(), the latter sees the RUNNING flag and just sets ->process to NULL, hoping that the scheduler will kill them on return, but there's no longer being executed so this never happens and they are leaked. Interestingly, this doesn't seem to happen as much when multi-queue is set to off, but it's likely because the tasks are being replaced and the first ones have already been woken up and leaked, while the latter might only trigger on a timeout or timer renewal. This should address github issue #2310. Thanks to @hpn0t0ad for the numerous traces that helped understand this sequence. This must be backported to 2.7 at least, and adapted for 2.6 (qc_idle_timer_task must return t there). (cherry picked from commit 4070e4042a1fbe64fa820992c63af7284735e854) Signed-off-by: Willy Tarreau --- src/quic_conn.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/quic_conn.c b/src/quic_conn.c index a62beee33b25..2b57dc8ef884 100644 --- a/src/quic_conn.c +++ b/src/quic_conn.c @@ -5389,7 +5389,6 @@ struct task *qc_process_timer(struct task *task, void *ctx, unsigned int state) if (qc->flags & (QUIC_FL_CONN_DRAINING|QUIC_FL_CONN_TO_KILL)) { TRACE_PROTO("cancelled action (draining state)", QUIC_EV_CONN_PTIMER, qc); - task = NULL; goto out; } @@ -5970,6 +5969,7 @@ struct task *qc_idle_timer_task(struct task *t, void *ctx, unsigned int state) if (qc->mux_state != QC_MUX_READY) { quic_conn_release(qc); qc = NULL; + t = NULL; } /* TODO if the quic-conn cannot be freed because of the MUX, we may at @@ -5982,10 +5982,6 @@ struct task *qc_idle_timer_task(struct task *t, void *ctx, unsigned int state) HA_ATOMIC_DEC(&prx_counters->half_open_conn); } - leave: - TRACE_LEAVE(QUIC_EV_CONN_IDLE_TIMER, qc); - return NULL; - requeue: TRACE_LEAVE(QUIC_EV_CONN_IDLE_TIMER, qc); return t;