From bb18fa82f98fb23f3ef7fee1269248c282b39fb3 Mon Sep 17 00:00:00 2001 From: Rajasekar Raja Date: Thu, 31 Oct 2024 13:16:35 -0700 Subject: [PATCH] bgpd: backpressure - Optimize EVPN L3VNI remote routes processing Anytime BGP gets a L3 VNI ADD/DEL from zebra, - Walking the entire global routing table per L3VNI is very expensive. - The next read (say of another VNI ADD/DEL) from the socket does not proceed unless this walk is complete. So for triggers where a bulk of L3VNI's are flapped, this results in huge output buffer FIFO growth spiking up the memory in zebra since bgp is slow/busy processing the first message. To avoid this, idea is to hookup the BGP-VRF off the struct bgp_master and maintain a struct bgp FIFO list which is processed later on, where we walk a chunk of BGP-VRFs and do the remote route install/uninstall. Ticket :#3864372 Signed-off-by: Rajasekar Raja --- bgpd/bgp_evpn.c | 250 +++++++++++++++++++++++++++++++++++++---------- bgpd/bgp_evpn.h | 1 + bgpd/bgp_zebra.c | 25 +++++ bgpd/bgp_zebra.h | 1 + bgpd/bgpd.c | 23 ++++- bgpd/bgpd.h | 3 + 6 files changed, 245 insertions(+), 58 deletions(-) diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index 353372433a5a..9886fde1eb43 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -79,6 +79,8 @@ static void bgp_evpn_remote_ip_hash_unlink_nexthop(struct hash_bucket *bucket, void *args); static struct in_addr zero_vtep_ip; +static void bgp_evpn_local_l3vni_del_post_processing(struct bgp *bgp_vrf); + /* * Private functions. */ @@ -3882,14 +3884,6 @@ int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, const struct prefix_evpn *evp = (const struct prefix_evpn *)bgp_dest_get_prefix(pi->net); - /* Consider "valid" remote routes applicable for - * this VRF. - */ - if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) - return 0; - if (is_route_matching_for_vrf(bgp_vrf, pi)) { if (bgp_evpn_route_rmac_self_check(bgp_vrf, evp, pi)) return 0; @@ -3920,22 +3914,36 @@ int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, * Install or uninstall mac-ip routes are appropriate for this * particular VRF. */ -static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) +#define BGP_PROC_L3VNI_LIMIT 10 +int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) { afi_t afi; safi_t safi; struct bgp_dest *rd_dest, *dest; struct bgp_table *table; struct bgp_path_info *pi; - int ret; + int ret = 0; struct bgp *bgp_evpn = NULL; + uint8_t count = 0; + uint8_t vni_iter = 0; + uint8_t vni_iter_max = BGP_PROC_L3VNI_LIMIT; + bool is_install = false; + struct bgp *bgp_to_proc = NULL; + struct bgp *bgp_to_proc_next = NULL; afi = AFI_L2VPN; safi = SAFI_EVPN; bgp_evpn = bgp_get_evpn(); - if (!bgp_evpn) + if (!bgp_evpn) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("No BGP EVPN instance found..."); + return -1; + } + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Total %lu L3VNI BGP-VRFs pending to be processed for remote route installation", + __func__, zebra_l3_vni_count(&bm->zebra_l3_vni_head)); /* Walk entire global routing table and evaluate routes which could be * imported into this VRF. Note that we need to loop through all global * routes to determine which route matches the import rt on vrf @@ -3952,30 +3960,100 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) (const struct prefix_evpn *)bgp_dest_get_prefix( dest); - /* if not mac-ip route skip this route */ - if (!(evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE - || evp->prefix.route_type - == BGP_EVPN_IP_PREFIX_ROUTE)) - continue; - - /* if not a mac+ip route skip this route */ - if (!(is_evpn_prefix_ipaddr_v4(evp) - || is_evpn_prefix_ipaddr_v6(evp))) + /* Proceed only for MAC_IP/IP-Pfx routes */ + switch (evp->prefix.route_type) { + case BGP_EVPN_IP_PREFIX_ROUTE: + case BGP_EVPN_MAC_IP_ROUTE: + if (!(is_evpn_prefix_ipaddr_v4(evp) || + is_evpn_prefix_ipaddr_v6(evp))) + continue; + break; + default: continue; + } for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { - ret = bgp_evpn_route_entry_install_if_vrf_match( - bgp_vrf, pi, install); - if (ret) { - bgp_dest_unlock_node(rd_dest); - bgp_dest_unlock_node(dest); - return ret; + /* Consider "valid" remote routes applicable for + * this VRF */ + if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) && + pi->type == ZEBRA_ROUTE_BGP && + pi->sub_type == BGP_ROUTE_NORMAL)) + continue; + + if (!bgp_vrf) { + vni_iter = 0; + for (bgp_to_proc = + zebra_l3_vni_first(&bm->zebra_l3_vni_head); + bgp_to_proc && vni_iter < vni_iter_max; + bgp_to_proc = bgp_to_proc_next) { + bgp_to_proc_next = + zebra_l3_vni_next(&bm->zebra_l3_vni_head, + bgp_to_proc); + vni_iter++; + is_install = false; + if (CHECK_FLAG(bgp_to_proc->flags, + BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL)) + is_install = true; + + ret = bgp_evpn_route_entry_install_if_vrf_match( + bgp_to_proc, pi, is_install); + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in L3VNI %u during BP", + bgp_to_proc->vrf_id, + install ? "install" : "uninstall", + bgp_evpn_route_type_str + [evp->prefix.route_type] + .str, + bgp_to_proc->l3vni); + vni_iter_max--; + zebra_l3_vni_del(&bm->zebra_l3_vni_head, + bgp_to_proc); + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + if (!is_install) + bgp_evpn_local_l3vni_del_post_processing( + bgp_to_proc); + + return ret; + } + } + } else { + ret = bgp_evpn_route_entry_install_if_vrf_match(bgp_vrf, pi, + install); + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in L3VNI %u", + bgp_to_proc->vrf_id, + install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type] + .str, + bgp_to_proc->l3vni); + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + return ret; + } } } } } + if (!bgp_vrf) { + while (count < vni_iter_max) { + bgp_to_proc = zebra_l3_vni_pop(&bm->zebra_l3_vni_head); + if (!bgp_to_proc) + return 0; + + if (CHECK_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + bgp_evpn_local_l3vni_del_post_processing(bgp_to_proc); + + UNSET_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + UNSET_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + count++; + } + } + return 0; } @@ -6860,6 +6938,53 @@ static void link_l2vni_hash_to_l3vni(struct hash_bucket *bucket, bgpevpn_link_to_l3vni(vpn); } +static void bgp_evpn_l3vni_remote_route_processing(struct bgp *bgp, bool install) +{ + /* + * Anytime BGP gets a Bulk of L3 VNI ADD/DEL from zebra, + * - Walking the entire global routing table per VNI is very expensive. + * - The next read (say of another VNI ADD/DEL) from the socket does + * not proceed unless this walk is complete. + * This results in huge output buffer FIFO growth spiking up the + * memory in zebra. + * + * To avoid this, idea is to hookup the BGP-VRF off the struct + * bgp_master and maintain a struct bgp FIFO list which is processed + * later on, where we walk a chunk of BGP-VRFs and do the remote route + * install/uninstall. + */ + if (!CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL) && + !CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + zebra_l3_vni_add_tail(&bm->zebra_l3_vni_head, bgp); + + if (install) { + SET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + UNSET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + } else { + SET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + UNSET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Scheduling L3VNI %s to be processed later for %s VNI %u", + install ? "ADD" : "DEL", bgp->name_pretty, bgp->l3vni); + /* + * If there are no BGP-VRFs's in the bm L3VNI FIFO list i.e. an update + * for an already processed L3VNI comes in, schedule the remote route + * install immediately. + * + * In all other cases, it is ok to schedule the remote route un/install + * after a small sleep. This is to give benefit of doubt in case more + * L3VNI events come. + */ + if (zebra_l3_vni_count(&bm->zebra_l3_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, + 20, &bm->t_bgp_zebra_l3_vni); + else + event_add_event(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, 0, + &bm->t_bgp_zebra_l3_vni); +} + int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, struct ethaddr *svi_rmac, struct ethaddr *vrr_rmac, @@ -7005,52 +7130,36 @@ int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, /* advertise type-5 routes if needed */ update_advertise_vrf_routes(bgp_vrf); - /* install all remote routes belonging to this l3vni into correspondng - * vrf */ - install_routes_for_vrf(bgp_vrf); + bgp_evpn_l3vni_remote_route_processing(bgp_vrf, true); return 0; } -int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) +static void bgp_evpn_local_l3vni_del_post_processing(struct bgp *bgp_vrf) { - struct bgp *bgp_vrf = NULL; /* bgp vrf instance */ struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ struct listnode *node = NULL; struct listnode *next = NULL; struct bgpevpn *vpn = NULL; - bgp_vrf = bgp_lookup_by_vrf_id(vrf_id); - if (!bgp_vrf) { - flog_err( - EC_BGP_NO_DFLT, - "Cannot process L3VNI %u Del - Could not find BGP instance", - l3vni); - return -1; - } - bgp_evpn = bgp_get_evpn(); if (!bgp_evpn) { - flog_err( - EC_BGP_NO_DFLT, - "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", - l3vni); - return -1; + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", + bgp_vrf->l3vni); + return; } if (CHECK_FLAG(bgp_evpn->flags, BGP_FLAG_DELETE_IN_PROGRESS)) { flog_err(EC_BGP_NO_DFLT, - "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", - l3vni); - return -1; + "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", + bgp_vrf->l3vni); + return; } - /* Remove remote routes from BGT VRF even if BGP_VRF_AUTO is configured, - * bgp_delete would not remove/decrement bgp_path_info of the ip_prefix - * routes. This will uninstalling the routes from zebra and decremnt the - * bgp info count. - */ - uninstall_routes_for_vrf(bgp_vrf); + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("In %s for L3VNI %u after remote route installation", __func__, + bgp_vrf->l3vni); /* delete/withdraw all type-5 routes */ delete_withdraw_vrf_routes(bgp_vrf); @@ -7100,6 +7209,39 @@ int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) /* Delete the instance if it was autocreated */ if (CHECK_FLAG(bgp_vrf->vrf_flags, BGP_VRF_AUTO)) bgp_delete(bgp_vrf); +} + +int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) +{ + struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ + struct bgp *bgp_vrf = NULL; /* bgp vrf instance */ + + bgp_vrf = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp_vrf) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find BGP instance", l3vni); + return -1; + } + + bgp_evpn = bgp_get_evpn(); + if (!bgp_evpn) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", l3vni); + return -1; + } + + if (CHECK_FLAG(bgp_evpn->flags, BGP_FLAG_DELETE_IN_PROGRESS)) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", l3vni); + return -1; + } + + /* + * Move all the l3vni_delete operation post the remote route + * installation processing i.e. add the L3VNI DELETE item on the + * BGP-VRFs FIFO and move on. + */ + bgp_evpn_l3vni_remote_route_processing(bgp_vrf, false); return 0; } diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index 75dde616ce78..8bbc5d3c37f2 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -201,4 +201,5 @@ int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, const struct prefix_e struct bgp_path_info *parent_pi); extern void bgp_zebra_evpn_pop_items_from_announce_fifo(struct bgpevpn *vpn); extern int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install); +extern int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install); #endif /* _QUAGGA_BGP_EVPN_H */ diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 1eb614385c06..783dec7802db 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -3042,6 +3042,31 @@ void bgp_zebra_process_remote_routes_for_l2vni(struct event *e) 20, &bm->t_bgp_zebra_l2_vni); } +void bgp_zebra_process_remote_routes_for_l3vrf(struct event *e) +{ + /* + * Install/Uninstall all remote routes belonging to l3vni + * + * NOTE: + * - At this point it does not matter whether we call + * install_routes_for_vrf/uninstall_routes_for_vrf. + * - Since we pass struct bgp as NULL, + * * we iterate the bm FIFO list + * * the second variable (true) is ignored as well and + * calculated based on the BGP-VRFs flags for ADD/DELETE. + */ + install_uninstall_routes_for_vrf(NULL, true); + + /* + * If there are L3VNIs still pending to be processed, schedule them + * after a small sleep so that CPU can be used for other purposes. + */ + if (zebra_l3_vni_count(&bm->zebra_l3_vni_head)) { + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, + 20, &bm->t_bgp_zebra_l3_vni); + } +} + static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS) { esi_t esi; diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index 993d002998f2..7e9d57cb8521 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -136,4 +136,5 @@ extern enum zclient_send_status bgp_zebra_withdraw_actual(struct bgp_dest *dest, struct bgp_path_info *info, struct bgp *bgp); extern void bgp_zebra_process_remote_routes_for_l2vni(struct event *e); +extern void bgp_zebra_process_remote_routes_for_l3vrf(struct event *e); #endif /* _QUAGGA_BGP_ZEBRA_H */ diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 45c4229afa3c..041c8fc611b7 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -3972,8 +3972,10 @@ int bgp_delete(struct bgp *bgp) struct bgp_dest *dest_next = NULL; struct bgp_table *dest_table = NULL; struct graceful_restart_info *gr_info; - uint32_t b_ann_cnt = 0, b_l2_cnt = 0; - uint32_t a_ann_cnt = 0, a_l2_cnt = 0; + uint32_t b_ann_cnt = 0, b_l2_cnt = 0, b_l3_cnt = 0; + uint32_t a_ann_cnt = 0, a_l2_cnt = 0, a_l3_cnt = 0; + struct bgp *bgp_to_proc = NULL; + struct bgp *bgp_to_proc_next = NULL; assert(bgp); @@ -4007,13 +4009,24 @@ int bgp_delete(struct bgp *bgp) } } + b_l3_cnt = zebra_l3_vni_count(&bm->zebra_l3_vni_head); + for (bgp_to_proc = zebra_l3_vni_first(&bm->zebra_l3_vni_head); bgp_to_proc; + bgp_to_proc = bgp_to_proc_next) { + bgp_to_proc_next = zebra_l3_vni_next(&bm->zebra_l3_vni_head, bgp_to_proc); + if (bgp_to_proc == bgp) + zebra_l3_vni_del(&bm->zebra_l3_vni_head, bgp_to_proc); + } + if (BGP_DEBUG(zebra, ZEBRA)) { a_ann_cnt = zebra_announce_count(&bm->zebra_announce_head); a_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + a_l3_cnt = zebra_l3_vni_count(&bm->zebra_l3_vni_head); zlog_debug("FIFO Cleanup Count during BGP %s deletion :: " "Zebra Announce - before %u after %u :: " - "BGP L2_VNI - before %u after %u", - bgp->name_pretty, b_ann_cnt, a_ann_cnt, b_l2_cnt, a_l2_cnt); + "BGP L2_VNI - before %u after %u :: " + "BGP L3_VNI - before %u after %u", + bgp->name_pretty, b_ann_cnt, a_ann_cnt, b_l2_cnt, a_l2_cnt, b_l3_cnt, + a_l3_cnt); } bgp_soft_reconfig_table_task_cancel(bgp, NULL, NULL); @@ -8539,6 +8552,7 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, bm->select_defer_time = BGP_DEFAULT_SELECT_DEFERRAL_TIME; bm->rib_stale_time = BGP_DEFAULT_RIB_STALE_TIME; bm->t_bgp_zebra_l2_vni = NULL; + bm->t_bgp_zebra_l3_vni = NULL; bgp_mac_init(); /* init the rd id space. @@ -8787,6 +8801,7 @@ void bgp_terminate(void) EVENT_OFF(bm->t_bgp_start_label_manager); EVENT_OFF(bm->t_bgp_zebra_route); EVENT_OFF(bm->t_bgp_zebra_l2_vni); + EVENT_OFF(bm->t_bgp_zebra_l3_vni); bgp_mac_finish(); } diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 139871e4825c..bdfc3316febb 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -210,6 +210,7 @@ struct bgp_master { /* To preserve ordering of processing of L2 VNIs in BGP */ struct zebra_l2_vni_head zebra_l2_vni_head; + struct event *t_bgp_zebra_l3_vni; /* To preserve ordering of processing of BGP-VRFs for L3 VNIs */ struct zebra_l3_vni_head zebra_l3_vni_head; @@ -563,6 +564,8 @@ struct bgp { #define BGP_FLAG_INSTANCE_HIDDEN (1ULL << 39) /* Prohibit BGP from enabling IPv6 RA on interfaces */ #define BGP_FLAG_IPV6_NO_AUTO_RA (1ULL << 40) +#define BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL (1ULL << 41) +#define BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE (1ULL << 42) /* BGP default address-families. * New peers inherit enabled afi/safis from bgp instance.