From b2d6ec398dc4a249eb508cd4488d910cccded710 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 12 Jun 2024 14:14:48 -0400 Subject: [PATCH] zebra: Modify dplane loop to allow backpressure to filter up Currently when the dplane_thread_loop is run, it moves contexts from the dg_update_list and puts the contexts on the input queue of the first provider. This provider is given a chance to run and then the items on the output queue are pulled off and placed on the input queue of the next provider. Rinse/Repeat down through the entire list of providers. Now imagine that we have a list of multiple providers and the last provider is getting backed up. Contexts will end up sticking in the input Queue of the `slow` provider. This can grow without bounds. This is a real problem when you have a situation where an interface is flapping and an upper level protocol is sending a continous stream of route updates to reflect the change in ecmp. You can end up with a very very large backlog of contexts. This is bad because zebra can easily grow to a very very large memory size and on restricted systems you can run out of memory. Fortunately for us, the MetaQ already participates with this process by not doing more route processing until the dg_update_list goes below the working limit of dg_updates_per_cycle. Thus if FRR modifies the behavior of this loop to not move more contexts onto the input queue if either the input queue or output queue of the next provider has reached this limit. FRR will naturaly start auto handling backpressure for the dplane context system and memory will not go out of control. Signed-off-by: Donald Sharp --- zebra/zebra_dplane.c | 114 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 20 deletions(-) diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 80f3ae285a01..0a49537f6c69 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -6706,10 +6706,10 @@ static void dplane_thread_loop(struct thread *event) { struct dplane_ctx_list_head work_list; struct dplane_ctx_list_head error_list; - struct zebra_dplane_provider *prov; + struct zebra_dplane_provider *prov, *next_prov; struct zebra_dplane_ctx *ctx; int limit, counter, error_counter; - uint64_t curr, high; + uint64_t curr, out_curr, high; bool reschedule = false; /* Capture work limit per cycle */ @@ -6733,18 +6733,48 @@ static void dplane_thread_loop(struct thread *event) /* Locate initial registered provider */ prov = dplane_prov_list_first(&zdplane_info.dg_providers); - /* Move new work from incoming list to temp list */ - for (counter = 0; counter < limit; counter++) { - ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list); - if (ctx) { - ctx->zd_provider = prov->dp_id; + curr = dplane_ctx_queue_count(&prov->dp_ctx_in_list); + out_curr = dplane_ctx_queue_count(&prov->dp_ctx_out_list); - dplane_ctx_list_add_tail(&work_list, ctx); - } else { - break; + if (curr >= (uint64_t)limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%s: Current first provider(%s) Input queue is %" PRIu64 + ", holding off work", + __func__, prov->dp_name, curr); + counter = 0; + } else if (out_curr >= (uint64_t)limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%s: Current first provider(%s) Output queue is %" PRIu64 + ", holding off work", + __func__, prov->dp_name, out_curr); + counter = 0; + } else { + int tlimit; + /* + * Let's limit the work to how what can be put on the + * in or out queue without going over + */ + tlimit = limit - MAX(curr, out_curr); + /* Move new work from incoming list to temp list */ + for (counter = 0; counter < tlimit; counter++) { + ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list); + if (ctx) { + ctx->zd_provider = prov->dp_id; + + dplane_ctx_list_add_tail(&work_list, ctx); + } else { + break; + } } } + /* + * If there is anything still on the two input queues reschedule + */ + if (dplane_ctx_queue_count(&prov->dp_ctx_in_list) > 0 || + dplane_ctx_queue_count(&zdplane_info.dg_update_list) > 0) + reschedule = true; + DPLANE_UNLOCK(); atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, counter, @@ -6763,8 +6793,9 @@ static void dplane_thread_loop(struct thread *event) * items. */ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) - zlog_debug("dplane enqueues %d new work to provider '%s'", - counter, dplane_provider_get_name(prov)); + zlog_debug("dplane enqueues %d new work to provider '%s' curr is %" PRIu64, + counter, dplane_provider_get_name(prov), + curr); /* Capture current provider id in each context; check for * error status. @@ -6822,18 +6853,61 @@ static void dplane_thread_loop(struct thread *event) if (!zdplane_info.dg_run) break; + /* Locate next provider */ + next_prov = dplane_prov_list_next(&zdplane_info.dg_providers, + prov); + if (next_prov) { + curr = dplane_ctx_queue_count( + &next_prov->dp_ctx_in_list); + out_curr = dplane_ctx_queue_count( + &next_prov->dp_ctx_out_list); + } else + out_curr = curr = 0; + /* Dequeue completed work from the provider */ dplane_provider_lock(prov); - while (counter < limit) { - ctx = dplane_provider_dequeue_out_ctx(prov); - if (ctx) { - dplane_ctx_list_add_tail(&work_list, ctx); - counter++; - } else - break; + if (curr >= (uint64_t)limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%s: Next Provider(%s) Input queue is %" PRIu64 + ", holding off work", + __func__, next_prov->dp_name, curr); + counter = 0; + } else if (out_curr >= (uint64_t)limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%s: Next Provider(%s) Output queue is %" PRIu64 + ", holding off work", + __func__, next_prov->dp_name, + out_curr); + counter = 0; + } else { + int tlimit; + + /* + * Let's limit the work to how what can be put on the + * in or out queue without going over + */ + tlimit = limit - MAX(curr, out_curr); + while (counter < tlimit) { + ctx = dplane_provider_dequeue_out_ctx(prov); + if (ctx) { + dplane_ctx_list_add_tail(&work_list, + ctx); + counter++; + } else + break; + } } + /* + * Let's check if there are still any items on the + * input or output queus of the current provider + * if so then we know we need to reschedule. + */ + if (dplane_ctx_queue_count(&prov->dp_ctx_in_list) > 0 || + dplane_ctx_queue_count(&prov->dp_ctx_out_list) > 0) + reschedule = true; + dplane_provider_unlock(prov); if (counter >= limit) @@ -6844,7 +6918,7 @@ static void dplane_thread_loop(struct thread *event) counter, dplane_provider_get_name(prov)); /* Locate next provider */ - prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov); + prov = next_prov; } /*