Skip to content

Commit

Permalink
zebra: Prevent possible wedged fpm write
Browse files Browse the repository at this point in the history
An operator is reporting that the dplane_fpm_nl connection has
started to accumulate contexts.  One such path that could cause
this is that the obuf used is full and stays full.  This would
imply that what ever is on the receiving end has gotten wedged
and is not reading from the stream of data being sent it's way.
If after 15 seconds of no response, let's declare the connection
dead and reset it.

Signed-off-by: Donald Sharp <[email protected]>
  • Loading branch information
donaldsharp committed Dec 14, 2023
1 parent 5e87eae commit f132246
Showing 1 changed file with 24 additions and 1 deletion.
25 changes: 24 additions & 1 deletion zebra/dplane_fpm_nl.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
#define SOUTHBOUND_DEFAULT_ADDR INADDR_LOOPBACK
#define SOUTHBOUND_DEFAULT_PORT 2620

/*
* Time in seconds that if the other end is not responding
* something terrible has gone wrong. Let's fix that.
*/
#define DPLANE_FPM_NL_WEDGIE_TIME 15

/**
* FPM header:
* {
Expand Down Expand Up @@ -93,6 +99,7 @@ struct fpm_nl_ctx {
struct event *t_event;
struct event *t_nhg;
struct event *t_dequeue;
struct event *t_wedged;

/* zebra events. */
struct event *t_lspreset;
Expand Down Expand Up @@ -1367,6 +1374,18 @@ static void fpm_rmac_reset(struct event *t)
&fnc->t_rmacwalk);
}

static void fpm_process_wedged(struct event *t)
{
struct fpm_nl_ctx *fnc = EVENT_ARG(t);

zlog_warn("%s: Connection unable to write to peer for over %u seconds, reseting",
__func__, DPLANE_FPM_NL_WEDGIE_TIME);

atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1,
memory_order_relaxed);
FPM_RECONNECT(fnc);
}

static void fpm_process_queue(struct event *t)
{
struct fpm_nl_ctx *fnc = EVENT_ARG(t);
Expand Down Expand Up @@ -1411,9 +1430,13 @@ static void fpm_process_queue(struct event *t)
processed_contexts, memory_order_relaxed);

/* Re-schedule if we ran out of buffer space */
if (no_bufs)
if (no_bufs) {
event_add_timer(fnc->fthread->master, fpm_process_queue, fnc, 0,
&fnc->t_dequeue);
event_add_timer(fnc->fthread->master, fpm_process_wedged, fnc,
DPLANE_FPM_NL_WEDGIE_TIME, &fnc->t_wedged);
} else
EVENT_OFF(fnc->t_wedged);

/*
* Let the dataplane thread know if there are items in the
Expand Down

0 comments on commit f132246

Please sign in to comment.