Skip to content

Commit

Permalink
bgpd: backpressure - Optimize EVPN L2VNI remote routes processing
Browse files Browse the repository at this point in the history
Anytime BGP gets a L2 VNI ADD from zebra,
 - Walking the entire global routing table per L2VNI is very expensive.
 - The next read (say of another VNI ADD) from the socket does
   not proceed unless this walk is complete.

So for triggers where a bulk of L2VNI's are flapped, this results in
huge output buffer FIFO growth spiking up the memory in zebra since bgp
is slow/busy processing the first message.

To avoid this, idea is to hookup the VPN off the bgp_master struct and
maintain a VPN FIFO list which is processed later on, where we walk a
chunk of VPNs and do the remote route install.

Note: So far in the L3 backpressure cases(FRRouting#15524), we have considered
the fact that zebra is slow, and the buffer grows in the BGP.

However this is the reverse i.e. BGP is very busy processing the first
ZAPI message from zebra due to which the buffer grows huge in zebra
and memory spikes up.

Ticket :#3864372

Signed-off-by: Rajasekar Raja <[email protected]>
  • Loading branch information
raja-rajasekar committed Dec 9, 2024
1 parent 8c71360 commit 07a8070
Show file tree
Hide file tree
Showing 8 changed files with 237 additions and 59 deletions.
226 changes: 175 additions & 51 deletions bgpd/bgp_evpn.c
Original file line number Diff line number Diff line change
Expand Up @@ -3979,30 +3979,80 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install)
return 0;
}

#define BGP_PROC_L2VNI_LIMIT 10
static int install_evpn_remote_route_per_l2vni(struct bgp *bgp, struct bgp_path_info *pi,
const struct prefix_evpn *evp)
{
int ret = 0;
uint8_t vni_iter = 0;
struct bgpevpn *t_vpn = NULL;
struct bgpevpn *t_vpn_next = NULL;

for (t_vpn = zebra_l2_vni_first(&bm->zebra_l2_vni_head);
t_vpn && vni_iter < BGP_PROC_L2VNI_LIMIT; t_vpn = t_vpn_next) {
t_vpn_next = zebra_l2_vni_next(&bm->zebra_l2_vni_head, t_vpn);
vni_iter++;
/*
* Skip install/uninstall if the route entry is not needed to
* be imported into the VNI i.e. RTs dont match
*/
if (!is_route_matching_for_vni(bgp, t_vpn, pi))
continue;

ret = install_evpn_route_entry(bgp, t_vpn, evp, pi);

if (ret) {
flog_err(EC_BGP_EVPN_FAIL,
"%u: Failed to install EVPN %s route in VNI %u during BP",
bgp->vrf_id, bgp_evpn_route_type_str[evp->prefix.route_type].str,
t_vpn->vni);
zebra_l2_vni_del(&bm->zebra_l2_vni_head, t_vpn);

return ret;
}
}

return 0;
}

/*
* Install or uninstall routes of specified type that are appropriate for this
* particular VNI.
*/
static int install_uninstall_routes_for_vni(struct bgp *bgp,
struct bgpevpn *vpn, bool install)
int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install)
{
afi_t afi;
safi_t safi;
struct bgp_dest *rd_dest, *dest;
struct bgp_table *table;
struct bgp_path_info *pi;
int ret;
int ret = 0;
uint8_t count = 0;
bool walk_fifo = false;

afi = AFI_L2VPN;
safi = SAFI_EVPN;

/* Walk entire global routing table and evaluate routes which could be
if (!bgp) {
walk_fifo = true;
bgp = bgp_get_evpn();
if (!bgp) {
zlog_warn("%s: No BGP EVPN instance found...", __func__);

return -1;
}
}

if (BGP_DEBUG(zebra, ZEBRA))
zlog_debug("%s: Total %u L2VNI VPNs pending to be processed for remote route installation",
__func__, (uint32_t)zebra_l2_vni_count(&bm->zebra_l2_vni_head));
/*
* Walk entire global routing table and evaluate routes which could be
* imported into this VPN. Note that we cannot just look at the routes
* for
* the VNI's RD - remote routes applicable for this VNI could have any
* RD.
* for the VNI's RD - remote routes applicable for this VNI could have
* any RD.
* Note: EVPN routes are a 2-level table.
*/
/* EVPN routes are a 2-level table. */
for (rd_dest = bgp_table_top(bgp->rib[afi][safi]); rd_dest;
rd_dest = bgp_route_next(rd_dest)) {
table = bgp_dest_get_bgp_table_info(rd_dest);
Expand All @@ -4015,54 +4065,80 @@ static int install_uninstall_routes_for_vni(struct bgp *bgp,
(const struct prefix_evpn *)bgp_dest_get_prefix(
dest);

if (evp->prefix.route_type != BGP_EVPN_IMET_ROUTE &&
evp->prefix.route_type != BGP_EVPN_AD_ROUTE &&
evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
/* Proceed only for AD, MAC_IP and IMET routes */
switch (evp->prefix.route_type) {
case BGP_EVPN_AD_ROUTE:
case BGP_EVPN_MAC_IP_ROUTE:
case BGP_EVPN_IMET_ROUTE:
break;
case BGP_EVPN_ES_ROUTE:
case BGP_EVPN_IP_PREFIX_ROUTE:
continue;
}

for (pi = bgp_dest_get_bgp_path_info(dest); pi;
pi = pi->next) {
/* Consider "valid" remote routes applicable for
* this VNI. */
if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID)
&& pi->type == ZEBRA_ROUTE_BGP
&& pi->sub_type == BGP_ROUTE_NORMAL))
continue;

if (!is_route_matching_for_vni(bgp, vpn, pi))
/*
* Skip install/uninstall if
* - Not a valid remote routes
* - Install & evpn route matchesi macvrf SOO
*/
if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) &&
pi->type == ZEBRA_ROUTE_BGP &&
pi->sub_type == BGP_ROUTE_NORMAL) ||
(install && bgp_evpn_route_matches_macvrf_soo(pi, evp)))
continue;

if (install) {
if (bgp_evpn_route_matches_macvrf_soo(
pi, evp))
if (walk_fifo) {
ret = install_evpn_remote_route_per_l2vni(bgp, pi, evp);
if (ret) {
bgp_dest_unlock_node(rd_dest);
bgp_dest_unlock_node(dest);
return ret;
}
} else {
/*
* Skip install/uninstall if the route
* entry is not needed to be imported
* into the VNI i.e. RTs dont match
*/
if (!is_route_matching_for_vni(bgp, vpn, pi))
continue;

ret = install_evpn_route_entry(bgp, vpn,
evp, pi);
} else
ret = uninstall_evpn_route_entry(
bgp, vpn, evp, pi);

if (ret) {
flog_err(EC_BGP_EVPN_FAIL,
"%u: Failed to %s EVPN %s route in VNI %u",
bgp->vrf_id,
install ? "install"
: "uninstall",
evp->prefix.route_type ==
BGP_EVPN_MAC_IP_ROUTE
? "MACIP"
: "IMET",
vpn->vni);

bgp_dest_unlock_node(rd_dest);
bgp_dest_unlock_node(dest);
return ret;
if (install)
ret = install_evpn_route_entry(bgp, vpn, evp, pi);
else
ret = uninstall_evpn_route_entry(bgp, vpn, evp, pi);

if (ret) {
flog_err(EC_BGP_EVPN_FAIL,
"%u: Failed to %s EVPN %s route in VNI %u",
bgp->vrf_id,
install ? "install" : "uninstall",
bgp_evpn_route_type_str[evp->prefix.route_type]
.str,
vpn->vni);

bgp_dest_unlock_node(rd_dest);
bgp_dest_unlock_node(dest);
return ret;
}
}
}
}
}

if (walk_fifo) {
while (count < BGP_PROC_L2VNI_LIMIT) {
vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head);
if (!vpn)
return 0;

UNSET_FLAG(vpn->flags, VNI_FLAG_ADD);
count++;
}
}

return 0;
}

Expand Down Expand Up @@ -7024,6 +7100,45 @@ int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id)
return 0;
}

static void bgp_evpn_l2vni_remote_route_processing(struct bgpevpn *vpn)
{
/*
* Anytime BGP gets a Bulk of L2 VNIs ADD/UPD from zebra,
* - Walking the entire global routing table per VNI is very expensive.
* - The next read (say of another VNI ADD/UPD) from the socket does
* not proceed unless this walk is complete.
* This results in huge output buffer FIFO growth spiking up the
* memory in zebra.
*
* To avoid this, idea is to hookup the VPN off the struct bgp_master
* and maintain a VPN FIFO list which is processed later on, where we
* walk a chunk of VPNs and do the remote route install.
*/
if (!CHECK_FLAG(vpn->flags, VNI_FLAG_ADD)) {
zebra_l2_vni_add_tail(&bm->zebra_l2_vni_head, vpn);
SET_FLAG(vpn->flags, VNI_FLAG_ADD);
}

if (BGP_DEBUG(zebra, ZEBRA))
zlog_debug("Scheduling L2VNI ADD to be processed later for VNI %u", vpn->vni);

/*
* If there are no VNI's in the bgp VPN FIFO list i.e. an update
* for an already processed VNI comes in, schedule the remote
* route install immediately.
*
* In all other cases, it is ok to schedule the remote route install
* after a small sleep. This is to give benefit of doubt in case more
* L2VNI ADD events come.
*/
if (zebra_l2_vni_count(&bm->zebra_l2_vni_head))
event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL,
20, &bm->t_bgp_zebra_l2_vni);
else
event_add_event(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, 0,
&bm->t_bgp_zebra_l2_vni);
}

/*
* When bgp instance goes down also clean up what might have been left over
* from evpn.
Expand All @@ -7047,6 +7162,10 @@ int bgp_evpn_local_vni_del(struct bgp *bgp, vni_t vni)
if (!vpn)
return 0;

/* Remove the VPN from the bgp VPN FIFO (if exists) */
UNSET_FLAG(vpn->flags, VNI_FLAG_ADD);
zebra_l2_vni_del(&bm->zebra_l2_vni_head, vpn);

/* Remove all local EVPN routes and schedule for processing (to
* withdraw from peers).
*/
Expand Down Expand Up @@ -7203,19 +7322,15 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni,
}
}

/* If we have learnt and retained remote routes (VTEPs, MACs) for this
* VNI,
* install them.
*/
install_routes_for_vni(bgp, vpn);

/* If we are advertising gateway mac-ip
It needs to be conveyed again to zebra */
bgp_zebra_advertise_gw_macip(bgp, vpn->advertise_gw_macip, vpn->vni);

/* advertise svi mac-ip knob to zebra */
bgp_zebra_advertise_svi_macip(bgp, vpn->advertise_svi_macip, vpn->vni);

bgp_evpn_l2vni_remote_route_processing(vpn);

return 0;
}

Expand Down Expand Up @@ -7245,8 +7360,17 @@ void bgp_evpn_flood_control_change(struct bgp *bgp)
*/
void bgp_evpn_cleanup_on_disable(struct bgp *bgp)
{
hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *,
void *))cleanup_vni_on_disable,
struct bgpevpn *vpn = NULL;
uint32_t vni_count = zebra_l2_vni_count(&bm->zebra_l2_vni_head);

/* Cleanup VNI FIFO list from this bgp instance */
while (vni_count) {
vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head);
UNSET_FLAG(vpn->flags, VNI_FLAG_ADD);
vni_count--;
}

hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *, void *))cleanup_vni_on_disable,
bgp);
}

Expand Down
1 change: 1 addition & 0 deletions bgpd/bgp_evpn.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,4 +200,5 @@ bool bgp_evpn_skip_vrf_import_of_local_es(struct bgp *bgp_vrf, const struct pref
int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, const struct prefix_evpn *evp,
struct bgp_path_info *parent_pi);
extern void bgp_zebra_evpn_pop_items_from_announce_fifo(struct bgpevpn *vpn);
extern int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install);
#endif /* _QUAGGA_BGP_EVPN_H */
9 changes: 7 additions & 2 deletions bgpd/bgp_evpn_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@ struct bgpevpn {
#define VNI_FLAG_RD_CFGD 0x4 /* RD is user configured. */
#define VNI_FLAG_IMPRT_CFGD 0x8 /* Import RT is user configured */
#define VNI_FLAG_EXPRT_CFGD 0x10 /* Export RT is user configured */
#define VNI_FLAG_USE_TWO_LABELS 0x20 /* Attach both L2-VNI and L3-VNI if
needed for this VPN */
/* Attach both L2-VNI and L3-VNI if needed for this VPN */
#define VNI_FLAG_USE_TWO_LABELS 0x20
#define VNI_FLAG_ADD 0x40 /* L2VNI Add */

struct bgp *bgp_vrf; /* back pointer to the vrf instance */

Expand Down Expand Up @@ -115,11 +116,15 @@ struct bgpevpn {
/* List of local ESs */
struct list *local_es_evi_list;

struct zebra_l2_vni_item zl2vni;

QOBJ_FIELDS;
};

DECLARE_QOBJ_TYPE(bgpevpn);

DECLARE_LIST(zebra_l2_vni, struct bgpevpn, zl2vni);

/* Mapping of Import RT to VNIs.
* The Import RTs of all VNIs are maintained in a hash table with each
* RT linking to all VNIs that will import routes matching this RT.
Expand Down
1 change: 1 addition & 0 deletions bgpd/bgp_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ static __attribute__((__noreturn__)) void bgp_exit(int status)
bgp_nhg_finish();

zebra_announce_fini(&bm->zebra_announce_head);
zebra_l2_vni_fini(&bm->zebra_l2_vni_head);

/* reverse bgp_dump_init */
bgp_dump_finish();
Expand Down
17 changes: 17 additions & 0 deletions bgpd/bgp_zebra.c
Original file line number Diff line number Diff line change
Expand Up @@ -3029,6 +3029,23 @@ static void bgp_zebra_connected(struct zclient *zclient)
BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(bgp, bgp->peer);
}

void bgp_zebra_process_remote_routes_for_l2vni(struct event *e)
{
/*
* If we have learnt and retained remote routes (VTEPs, MACs)
* for this VNI, install them.
*/
install_uninstall_routes_for_vni(NULL, NULL, true);

/*
* If there are VNIs still pending to be processed, schedule them
* after a small sleep so that CPU can be used for other purposes.
*/
if (zebra_l2_vni_count(&bm->zebra_l2_vni_head))
event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL,
20, &bm->t_bgp_zebra_l2_vni);
}

static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS)
{
esi_t esi;
Expand Down
1 change: 1 addition & 0 deletions bgpd/bgp_zebra.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,5 @@ extern void bgp_zebra_release_label_range(uint32_t start, uint32_t end);
extern enum zclient_send_status
bgp_zebra_withdraw_actual(struct bgp_dest *dest, struct bgp_path_info *info,
struct bgp *bgp);
extern void bgp_zebra_process_remote_routes_for_l2vni(struct event *e);
#endif /* _QUAGGA_BGP_ZEBRA_H */
Loading

0 comments on commit 07a8070

Please sign in to comment.