Skip to content

Commit

Permalink
zebra: Modify dplane loop to allow backpressure to filter up
Browse files Browse the repository at this point in the history
Currently when the dplane_thread_loop is run, it moves contexts
from the dg_update_list and puts the contexts on the input queue
of the first provider.  This provider is given a chance to run
and then the items on the output queue are pulled off and placed
on the input queue of the next provider.  Rinse/Repeat down through
the entire list of providers.  Now imagine that we have a list
of multiple providers and the last provider is getting backed up.
Contexts will end up sticking in the input Queue of the `slow`
provider.  This can grow without bounds.  This is a real problem
when you have a situation where an interface is flapping and an
upper level protocol is sending a continous stream of route
updates to reflect the change in ecmp.  You can end up with
a very very large backlog of contexts.  This is bad because
zebra can easily grow to a very very large memory size and on
restricted systems you can run out of memory.  Fortunately
for us, the MetaQ already participates with this process
by not doing more route processing until the dg_update_list
goes below the working limit of dg_updates_per_cycle.  Thus
if FRR modifies the behavior of this loop to not move more
contexts onto the input queue if either the input queue
or output queue of the next provider has reached this limit.
FRR will naturaly start auto handling backpressure for the dplane
context system and memory will not go out of control.

Signed-off-by: Donald Sharp <[email protected]>
  • Loading branch information
donaldsharp committed Jun 13, 2024
1 parent de0e358 commit 00150d2
Showing 1 changed file with 82 additions and 20 deletions.
102 changes: 82 additions & 20 deletions zebra/zebra_dplane.c
Original file line number Diff line number Diff line change
Expand Up @@ -6715,10 +6715,10 @@ static void dplane_thread_loop(struct thread *event)
{
struct dplane_ctx_list_head work_list;
struct dplane_ctx_list_head error_list;
struct zebra_dplane_provider *prov;
struct zebra_dplane_provider *prov, *next_prov;
struct zebra_dplane_ctx *ctx;
int limit, counter, error_counter;
uint64_t curr, high;
uint64_t curr, out_curr, high;
bool reschedule = false;

/* Capture work limit per cycle */
Expand All @@ -6742,18 +6742,44 @@ static void dplane_thread_loop(struct thread *event)
/* Locate initial registered provider */
prov = dplane_prov_list_first(&zdplane_info.dg_providers);

/* Move new work from incoming list to temp list */
for (counter = 0; counter < limit; counter++) {
ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list);
if (ctx) {
ctx->zd_provider = prov->dp_id;
curr = atomic_load_explicit(&prov->dp_in_queued, memory_order_relaxed);
out_curr = atomic_load_explicit(&prov->dp_out_queued,
memory_order_relaxed);

dplane_ctx_list_add_tail(&work_list, ctx);
} else {
break;
if (curr >= (uint64_t)limit) {
reschedule = true;
if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
zlog_debug("%s: Current first provider(%s) Input queue is %" PRIu64
", holding off work",
__func__, prov->dp_name, curr);
counter = 0;
} else if (out_curr >= (uint64_t)limit) {
reschedule = true;
if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
zlog_debug("%s: Current first provider(%s) Output queue is %" PRIu64
", holding off work",
__func__, prov->dp_name, out_curr);
counter = 0;
} else {
/*
* Let's limit the work to how what can be put on the
* in or out queue without going over
*/
limit -= (curr > out_curr) ? curr : out_curr;
/* Move new work from incoming list to temp list */
for (counter = 0; counter < limit; counter++) {
ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list);
if (ctx) {
ctx->zd_provider = prov->dp_id;

dplane_ctx_list_add_tail(&work_list, ctx);
} else {
break;
}
}
}


DPLANE_UNLOCK();

atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, counter,
Expand All @@ -6772,8 +6798,9 @@ static void dplane_thread_loop(struct thread *event)
* items.
*/
if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
zlog_debug("dplane enqueues %d new work to provider '%s'",
counter, dplane_provider_get_name(prov));
zlog_debug("dplane enqueues %d new work to provider '%s' curr is %" PRIu64,
counter, dplane_provider_get_name(prov),
curr);

/* Capture current provider id in each context; check for
* error status.
Expand Down Expand Up @@ -6834,16 +6861,51 @@ static void dplane_thread_loop(struct thread *event)
if (!zdplane_info.dg_run)
break;

/* Locate next provider */
next_prov = dplane_prov_list_next(&zdplane_info.dg_providers,
prov);
if (next_prov) {
curr = atomic_load_explicit(&next_prov->dp_in_queued,
memory_order_relaxed);
out_curr =
atomic_load_explicit(&next_prov->dp_out_queued,
memory_order_relaxed);
} else
out_curr = curr = 0;

/* Dequeue completed work from the provider */
dplane_provider_lock(prov);

while (counter < limit) {
ctx = dplane_provider_dequeue_out_ctx(prov);
if (ctx) {
dplane_ctx_list_add_tail(&work_list, ctx);
counter++;
} else
break;
if (curr >= (uint64_t)limit) {
reschedule = true;
if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
zlog_debug("%s: Next Provider(%s) Input queue is %" PRIu64
", holding off work",
__func__, next_prov->dp_name, curr);
counter = 0;
} else if (out_curr >= (uint64_t)limit) {
reschedule = true;
if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
zlog_debug("%s: Next Provider(%s) Output queue is %" PRIu64
", holding off work",
__func__, next_prov->dp_name,
out_curr);
counter = 0;
} else {
/*
* Let's limit the work to how what can be put on the
* in or out queue without going over
*/
limit -= (curr > out_curr) ? curr : out_curr;
while (counter < limit) {
ctx = dplane_provider_dequeue_out_ctx(prov);
if (ctx) {
dplane_ctx_list_add_tail(&work_list,
ctx);
counter++;
} else
break;
}
}

dplane_provider_unlock(prov);
Expand All @@ -6856,7 +6918,7 @@ static void dplane_thread_loop(struct thread *event)
counter, dplane_provider_get_name(prov));

/* Locate next provider */
prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov);
prov = next_prov;
}

/*
Expand Down

0 comments on commit 00150d2

Please sign in to comment.