Skip to content

Commit

Permalink
pimd: Fix PIM MLAG Update Peer Zebra Status Upon Local MLAG Connectio…
Browse files Browse the repository at this point in the history
…n Restoration

Issue:
In scenarios where the local MLAG connection is down, we currently halt processing
peer MLAG messages. However, upon restoration of the local connection,
 we fail to update the peer Zebra status accordingly.

Consider the case where the peer is up and sends FRR status messages to the local node.
If CLAGd restarts on the local node while FRR is running,
the local CLAGd assumes the peer is still down even when it's up.

Fix:
Update the peer Zebra status once the local MLAG connection is restored

Testing: UT

Ticket: #
Signed-off-by: Rajesh Varatharaj <[email protected]>
  • Loading branch information
routingrocks committed Jul 29, 2024
1 parent 6186368 commit 2eeafae
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 4 deletions.
4 changes: 3 additions & 1 deletion pimd/pim_instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ enum pim_mlag_flags {
/* initial dump of data done post peerlink flap */
PIM_MLAGF_PEER_REPLAY_DONE = (1 << 3),
/* zebra is up on the peer */
PIM_MLAGF_PEER_ZEBRA_UP = (1 << 4)
PIM_MLAGF_PEER_ZEBRA_UP = (1 << 4),
/* Local MLAGD session is not up*/
PIM_MLAGF_PEER_ZEBRA_UP_NOTIFY_RECEIVE_PENDING = (1 << 5)
};

struct pim_router {
Expand Down
42 changes: 39 additions & 3 deletions pimd/pim_mlag.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,23 @@ static inline void pim_mlag_vxlan_state_update(void)


/********************API to process PIM MLAG Data ************************/
static void pim_mlag_peer_zebra_flag_set(void)
{
if (CHECK_FLAG(router->mlag_flags,
PIM_MLAGF_PEER_ZEBRA_UP_NOTIFY_RECEIVE_PENDING)) {
if (!(CHECK_FLAG(router->mlag_flags,
PIM_MLAGF_PEER_ZEBRA_UP))) {
if (PIM_DEBUG_MLAG)
zlog_debug(
"%s: update Mlag flag with PIM_MLAGF_PEER_ZEBRA_UP",
__func__);
SET_FLAG(router->mlag_flags, PIM_MLAGF_PEER_ZEBRA_UP);
UNSET_FLAG(
router->mlag_flags,
PIM_MLAGF_PEER_ZEBRA_UP_NOTIFY_RECEIVE_PENDING);
}
}
}

static void pim_mlag_process_mlagd_state_change(struct mlag_status msg)
{
Expand Down Expand Up @@ -612,6 +629,7 @@ static void pim_mlag_process_mlagd_state_change(struct mlag_status msg)
router->connected_to_mlag = false;
}

pim_mlag_peer_zebra_flag_set();
/* apply the changes */
/* when connection to mlagd comes up we hold send mroutes till we have
* rxed the status and had a chance to re-valuate DF state
Expand All @@ -620,7 +638,7 @@ static void pim_mlag_process_mlagd_state_change(struct mlag_status msg)
router->mlag_flags |= PIM_MLAGF_STATUS_RXED;
pim_mlag_vxlan_state_update();
/* on session up re-eval DF status */
pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_up");
pim_mlag_up_local_reeval(true /*mlagd_send*/, "mlagd_up");
/* replay all the upstream entries to the local MLAG daemon */
pim_mlag_up_local_replay();
return;
Expand Down Expand Up @@ -657,9 +675,13 @@ static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg)
(msg.frr_state == MLAG_FRR_STATE_UP ? "UP" : "DOWN"));

if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
SET_FLAG(router->mlag_flags,
PIM_MLAGF_PEER_ZEBRA_UP_NOTIFY_RECEIVE_PENDING);
if (PIM_DEBUG_MLAG)
zlog_debug("%s: msg ignored mlagd process state down",
__func__);
zlog_debug(
"%s: msg ignored mlagd process state down, \
Setting Local MLAG Pending flag",
__func__);
return;
}
++router->mlag_stats.msg.peer_zebra_status_updates;
Expand All @@ -668,6 +690,10 @@ static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg)
if (msg.frr_state == MLAG_FRR_STATE_UP) {
if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)) {
router->mlag_flags |= PIM_MLAGF_PEER_ZEBRA_UP;
if (PIM_DEBUG_MLAG)
zlog_debug(
"%s:%d: Mlag Peer FRR state is UP Setting PIM_MLAGF_PEER_ZEBRA_UP mlag flag %0x",
__func__, __LINE__, router->mlag_flags);
/* XXX - when peer zebra comes up we need to wait for
* for some time to let the peer setup MDTs before
* before relinquishing DF status
Expand All @@ -679,6 +705,10 @@ static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg)
if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) {
++router->mlag_stats.peer_zebra_downs;
router->mlag_flags &= ~PIM_MLAGF_PEER_ZEBRA_UP;
if (PIM_DEBUG_MLAG)
zlog_debug(
"%s:%d: Mlag Peer FRR state is DOWN unsetting PIM_MLAGF_PEER_ZEBRA_UP mlag flag %0x",
__func__, __LINE__, router->mlag_flags);
/* when a peer zebra goes down we assume DF role */
pim_mlag_up_local_reeval(true /*mlagd_send*/,
"zebra_down");
Expand Down Expand Up @@ -881,6 +911,12 @@ int pim_zebra_mlag_process_up(ZAPI_CALLBACK_ARGS)
*/
router->connected_to_mlag = true;
router->mlag_flags |= PIM_MLAGF_LOCAL_CONN_UP;
/*
* Handle when local mlag session comes up later,
* Update the Peer Zebra status once the local MLAG connection is
* restored based on peer connection status.
*/
pim_mlag_peer_zebra_flag_set();
return 0;
}

Expand Down

0 comments on commit 2eeafae

Please sign in to comment.