From 8c713609dda6e71890fd61556ee7654170680ccd Mon Sep 17 00:00:00 2001 From: Trey Aspelund Date: Wed, 25 Jan 2023 13:47:24 -0500 Subject: [PATCH 1/4] bgpd: add EVPN route type msg list Adds a msg list for getting strings mapping to enum bgp_evpn_route_type Ticket: #3318830 Signed-off-by: Trey Aspelund --- bgpd/bgp_evpn_private.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index b05df3d82ae7..f1dee8d2bcf9 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -32,6 +32,13 @@ #define BGP_EVPN_TYPE4_V4_PSIZE 23 #define BGP_EVPN_TYPE4_V6_PSIZE 34 +static const struct message bgp_evpn_route_type_str[] = { { BGP_EVPN_AD_ROUTE, "AD" }, + { BGP_EVPN_MAC_IP_ROUTE, "MACIP" }, + { BGP_EVPN_IMET_ROUTE, "IMET" }, + { BGP_EVPN_ES_ROUTE, "ES" }, + { BGP_EVPN_IP_PREFIX_ROUTE, "IP-PREFIX" }, + { 0 } }; + RB_HEAD(bgp_es_evi_rb_head, bgp_evpn_es_evi); RB_PROTOTYPE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node, bgp_es_evi_rb_cmp); From 07a80709c728d87abc2d15393a719d4232b1f33b Mon Sep 17 00:00:00 2001 From: Rajasekar Raja Date: Wed, 28 Aug 2024 11:38:53 -0700 Subject: [PATCH 2/4] bgpd: backpressure - Optimize EVPN L2VNI remote routes processing Anytime BGP gets a L2 VNI ADD from zebra, - Walking the entire global routing table per L2VNI is very expensive. - The next read (say of another VNI ADD) from the socket does not proceed unless this walk is complete. So for triggers where a bulk of L2VNI's are flapped, this results in huge output buffer FIFO growth spiking up the memory in zebra since bgp is slow/busy processing the first message. To avoid this, idea is to hookup the VPN off the bgp_master struct and maintain a VPN FIFO list which is processed later on, where we walk a chunk of VPNs and do the remote route install. Note: So far in the L3 backpressure cases(#15524), we have considered the fact that zebra is slow, and the buffer grows in the BGP. However this is the reverse i.e. BGP is very busy processing the first ZAPI message from zebra due to which the buffer grows huge in zebra and memory spikes up. Ticket :#3864372 Signed-off-by: Rajasekar Raja --- bgpd/bgp_evpn.c | 226 +++++++++++++++++++++++++++++++--------- bgpd/bgp_evpn.h | 1 + bgpd/bgp_evpn_private.h | 9 +- bgpd/bgp_main.c | 1 + bgpd/bgp_zebra.c | 17 +++ bgpd/bgp_zebra.h | 1 + bgpd/bgpd.c | 36 +++++-- bgpd/bgpd.h | 5 + 8 files changed, 237 insertions(+), 59 deletions(-) diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index f173bd01f20f..f4bae58724a2 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -3979,30 +3979,80 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) return 0; } +#define BGP_PROC_L2VNI_LIMIT 10 +static int install_evpn_remote_route_per_l2vni(struct bgp *bgp, struct bgp_path_info *pi, + const struct prefix_evpn *evp) +{ + int ret = 0; + uint8_t vni_iter = 0; + struct bgpevpn *t_vpn = NULL; + struct bgpevpn *t_vpn_next = NULL; + + for (t_vpn = zebra_l2_vni_first(&bm->zebra_l2_vni_head); + t_vpn && vni_iter < BGP_PROC_L2VNI_LIMIT; t_vpn = t_vpn_next) { + t_vpn_next = zebra_l2_vni_next(&bm->zebra_l2_vni_head, t_vpn); + vni_iter++; + /* + * Skip install/uninstall if the route entry is not needed to + * be imported into the VNI i.e. RTs dont match + */ + if (!is_route_matching_for_vni(bgp, t_vpn, pi)) + continue; + + ret = install_evpn_route_entry(bgp, t_vpn, evp, pi); + + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to install EVPN %s route in VNI %u during BP", + bgp->vrf_id, bgp_evpn_route_type_str[evp->prefix.route_type].str, + t_vpn->vni); + zebra_l2_vni_del(&bm->zebra_l2_vni_head, t_vpn); + + return ret; + } + } + + return 0; +} + /* * Install or uninstall routes of specified type that are appropriate for this * particular VNI. */ -static int install_uninstall_routes_for_vni(struct bgp *bgp, - struct bgpevpn *vpn, bool install) +int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install) { afi_t afi; safi_t safi; struct bgp_dest *rd_dest, *dest; struct bgp_table *table; struct bgp_path_info *pi; - int ret; + int ret = 0; + uint8_t count = 0; + bool walk_fifo = false; afi = AFI_L2VPN; safi = SAFI_EVPN; - /* Walk entire global routing table and evaluate routes which could be + if (!bgp) { + walk_fifo = true; + bgp = bgp_get_evpn(); + if (!bgp) { + zlog_warn("%s: No BGP EVPN instance found...", __func__); + + return -1; + } + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Total %u L2VNI VPNs pending to be processed for remote route installation", + __func__, (uint32_t)zebra_l2_vni_count(&bm->zebra_l2_vni_head)); + /* + * Walk entire global routing table and evaluate routes which could be * imported into this VPN. Note that we cannot just look at the routes - * for - * the VNI's RD - remote routes applicable for this VNI could have any - * RD. + * for the VNI's RD - remote routes applicable for this VNI could have + * any RD. + * Note: EVPN routes are a 2-level table. */ - /* EVPN routes are a 2-level table. */ for (rd_dest = bgp_table_top(bgp->rib[afi][safi]); rd_dest; rd_dest = bgp_route_next(rd_dest)) { table = bgp_dest_get_bgp_table_info(rd_dest); @@ -4015,54 +4065,80 @@ static int install_uninstall_routes_for_vni(struct bgp *bgp, (const struct prefix_evpn *)bgp_dest_get_prefix( dest); - if (evp->prefix.route_type != BGP_EVPN_IMET_ROUTE && - evp->prefix.route_type != BGP_EVPN_AD_ROUTE && - evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) + /* Proceed only for AD, MAC_IP and IMET routes */ + switch (evp->prefix.route_type) { + case BGP_EVPN_AD_ROUTE: + case BGP_EVPN_MAC_IP_ROUTE: + case BGP_EVPN_IMET_ROUTE: + break; + case BGP_EVPN_ES_ROUTE: + case BGP_EVPN_IP_PREFIX_ROUTE: continue; + } for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { - /* Consider "valid" remote routes applicable for - * this VNI. */ - if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) - continue; - - if (!is_route_matching_for_vni(bgp, vpn, pi)) + /* + * Skip install/uninstall if + * - Not a valid remote routes + * - Install & evpn route matchesi macvrf SOO + */ + if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) && + pi->type == ZEBRA_ROUTE_BGP && + pi->sub_type == BGP_ROUTE_NORMAL) || + (install && bgp_evpn_route_matches_macvrf_soo(pi, evp))) continue; - if (install) { - if (bgp_evpn_route_matches_macvrf_soo( - pi, evp)) + if (walk_fifo) { + ret = install_evpn_remote_route_per_l2vni(bgp, pi, evp); + if (ret) { + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + return ret; + } + } else { + /* + * Skip install/uninstall if the route + * entry is not needed to be imported + * into the VNI i.e. RTs dont match + */ + if (!is_route_matching_for_vni(bgp, vpn, pi)) continue; - ret = install_evpn_route_entry(bgp, vpn, - evp, pi); - } else - ret = uninstall_evpn_route_entry( - bgp, vpn, evp, pi); - - if (ret) { - flog_err(EC_BGP_EVPN_FAIL, - "%u: Failed to %s EVPN %s route in VNI %u", - bgp->vrf_id, - install ? "install" - : "uninstall", - evp->prefix.route_type == - BGP_EVPN_MAC_IP_ROUTE - ? "MACIP" - : "IMET", - vpn->vni); - - bgp_dest_unlock_node(rd_dest); - bgp_dest_unlock_node(dest); - return ret; + if (install) + ret = install_evpn_route_entry(bgp, vpn, evp, pi); + else + ret = uninstall_evpn_route_entry(bgp, vpn, evp, pi); + + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in VNI %u", + bgp->vrf_id, + install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type] + .str, + vpn->vni); + + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + return ret; + } } } } } + if (walk_fifo) { + while (count < BGP_PROC_L2VNI_LIMIT) { + vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head); + if (!vpn) + return 0; + + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + count++; + } + } + return 0; } @@ -7024,6 +7100,45 @@ int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) return 0; } +static void bgp_evpn_l2vni_remote_route_processing(struct bgpevpn *vpn) +{ + /* + * Anytime BGP gets a Bulk of L2 VNIs ADD/UPD from zebra, + * - Walking the entire global routing table per VNI is very expensive. + * - The next read (say of another VNI ADD/UPD) from the socket does + * not proceed unless this walk is complete. + * This results in huge output buffer FIFO growth spiking up the + * memory in zebra. + * + * To avoid this, idea is to hookup the VPN off the struct bgp_master + * and maintain a VPN FIFO list which is processed later on, where we + * walk a chunk of VPNs and do the remote route install. + */ + if (!CHECK_FLAG(vpn->flags, VNI_FLAG_ADD)) { + zebra_l2_vni_add_tail(&bm->zebra_l2_vni_head, vpn); + SET_FLAG(vpn->flags, VNI_FLAG_ADD); + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Scheduling L2VNI ADD to be processed later for VNI %u", vpn->vni); + + /* + * If there are no VNI's in the bgp VPN FIFO list i.e. an update + * for an already processed VNI comes in, schedule the remote + * route install immediately. + * + * In all other cases, it is ok to schedule the remote route install + * after a small sleep. This is to give benefit of doubt in case more + * L2VNI ADD events come. + */ + if (zebra_l2_vni_count(&bm->zebra_l2_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, + 20, &bm->t_bgp_zebra_l2_vni); + else + event_add_event(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, 0, + &bm->t_bgp_zebra_l2_vni); +} + /* * When bgp instance goes down also clean up what might have been left over * from evpn. @@ -7047,6 +7162,10 @@ int bgp_evpn_local_vni_del(struct bgp *bgp, vni_t vni) if (!vpn) return 0; + /* Remove the VPN from the bgp VPN FIFO (if exists) */ + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + zebra_l2_vni_del(&bm->zebra_l2_vni_head, vpn); + /* Remove all local EVPN routes and schedule for processing (to * withdraw from peers). */ @@ -7203,12 +7322,6 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, } } - /* If we have learnt and retained remote routes (VTEPs, MACs) for this - * VNI, - * install them. - */ - install_routes_for_vni(bgp, vpn); - /* If we are advertising gateway mac-ip It needs to be conveyed again to zebra */ bgp_zebra_advertise_gw_macip(bgp, vpn->advertise_gw_macip, vpn->vni); @@ -7216,6 +7329,8 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, /* advertise svi mac-ip knob to zebra */ bgp_zebra_advertise_svi_macip(bgp, vpn->advertise_svi_macip, vpn->vni); + bgp_evpn_l2vni_remote_route_processing(vpn); + return 0; } @@ -7245,8 +7360,17 @@ void bgp_evpn_flood_control_change(struct bgp *bgp) */ void bgp_evpn_cleanup_on_disable(struct bgp *bgp) { - hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *, - void *))cleanup_vni_on_disable, + struct bgpevpn *vpn = NULL; + uint32_t vni_count = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + + /* Cleanup VNI FIFO list from this bgp instance */ + while (vni_count) { + vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head); + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + vni_count--; + } + + hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *, void *))cleanup_vni_on_disable, bgp); } diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index 1a333a5a09a3..75dde616ce78 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -200,4 +200,5 @@ bool bgp_evpn_skip_vrf_import_of_local_es(struct bgp *bgp_vrf, const struct pref int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, const struct prefix_evpn *evp, struct bgp_path_info *parent_pi); extern void bgp_zebra_evpn_pop_items_from_announce_fifo(struct bgpevpn *vpn); +extern int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install); #endif /* _QUAGGA_BGP_EVPN_H */ diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index f1dee8d2bcf9..568d3d45eed3 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -60,8 +60,9 @@ struct bgpevpn { #define VNI_FLAG_RD_CFGD 0x4 /* RD is user configured. */ #define VNI_FLAG_IMPRT_CFGD 0x8 /* Import RT is user configured */ #define VNI_FLAG_EXPRT_CFGD 0x10 /* Export RT is user configured */ -#define VNI_FLAG_USE_TWO_LABELS 0x20 /* Attach both L2-VNI and L3-VNI if - needed for this VPN */ +/* Attach both L2-VNI and L3-VNI if needed for this VPN */ +#define VNI_FLAG_USE_TWO_LABELS 0x20 +#define VNI_FLAG_ADD 0x40 /* L2VNI Add */ struct bgp *bgp_vrf; /* back pointer to the vrf instance */ @@ -115,11 +116,15 @@ struct bgpevpn { /* List of local ESs */ struct list *local_es_evi_list; + struct zebra_l2_vni_item zl2vni; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgpevpn); +DECLARE_LIST(zebra_l2_vni, struct bgpevpn, zl2vni); + /* Mapping of Import RT to VNIs. * The Import RTs of all VNIs are maintained in a hash table with each * RT linking to all VNIs that will import routes matching this RT. diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 535d2fc5f434..9d89fd6f96bc 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -207,6 +207,7 @@ static __attribute__((__noreturn__)) void bgp_exit(int status) bgp_nhg_finish(); zebra_announce_fini(&bm->zebra_announce_head); + zebra_l2_vni_fini(&bm->zebra_l2_vni_head); /* reverse bgp_dump_init */ bgp_dump_finish(); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index ac4a6bb03bf6..90e2d5af7bf6 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -3029,6 +3029,23 @@ static void bgp_zebra_connected(struct zclient *zclient) BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(bgp, bgp->peer); } +void bgp_zebra_process_remote_routes_for_l2vni(struct event *e) +{ + /* + * If we have learnt and retained remote routes (VTEPs, MACs) + * for this VNI, install them. + */ + install_uninstall_routes_for_vni(NULL, NULL, true); + + /* + * If there are VNIs still pending to be processed, schedule them + * after a small sleep so that CPU can be used for other purposes. + */ + if (zebra_l2_vni_count(&bm->zebra_l2_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, + 20, &bm->t_bgp_zebra_l2_vni); +} + static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS) { esi_t esi; diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index 8deecba747b3..993d002998f2 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -135,4 +135,5 @@ extern void bgp_zebra_release_label_range(uint32_t start, uint32_t end); extern enum zclient_send_status bgp_zebra_withdraw_actual(struct bgp_dest *dest, struct bgp_path_info *info, struct bgp *bgp); +extern void bgp_zebra_process_remote_routes_for_l2vni(struct event *e); #endif /* _QUAGGA_BGP_ZEBRA_H */ diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 7b21c29ea663..44640c84f256 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -3966,11 +3966,14 @@ int bgp_delete(struct bgp *bgp) afi_t afi; safi_t safi; int i; + uint32_t vni_count; + struct bgpevpn *vpn = NULL; struct bgp_dest *dest = NULL; struct bgp_dest *dest_next = NULL; struct bgp_table *dest_table = NULL; struct graceful_restart_info *gr_info; - uint32_t cnt_before, cnt_after; + uint32_t b_ann_cnt = 0, b_l2_cnt = 0; + uint32_t a_ann_cnt = 0, a_l2_cnt = 0; assert(bgp); @@ -3978,7 +3981,7 @@ int bgp_delete(struct bgp *bgp) * Iterate the pending dest list and remove all the dest pertaininig to * the bgp under delete. */ - cnt_before = zebra_announce_count(&bm->zebra_announce_head); + b_ann_cnt = zebra_announce_count(&bm->zebra_announce_head); for (dest = zebra_announce_first(&bm->zebra_announce_head); dest; dest = dest_next) { dest_next = zebra_announce_next(&bm->zebra_announce_head, dest); @@ -3990,10 +3993,28 @@ int bgp_delete(struct bgp *bgp) } } - cnt_after = zebra_announce_count(&bm->zebra_announce_head); - if (BGP_DEBUG(zebra, ZEBRA)) - zlog_debug("Zebra Announce Fifo cleanup count before %u and after %u during BGP %s deletion", - cnt_before, cnt_after, bgp->name_pretty); + /* + * Pop all VPNs yet to be processed for remote routes install if the + * bgp-evpn instance is getting deleted + */ + if (bgp == bgp_get_evpn()) { + b_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + vni_count = b_l2_cnt; + while (vni_count) { + vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head); + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + vni_count--; + } + } + + if (BGP_DEBUG(zebra, ZEBRA)) { + a_ann_cnt = zebra_announce_count(&bm->zebra_announce_head); + a_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + zlog_debug("FIFO Cleanup Count during BGP %s deletion :: " + "Zebra Announce - before %u after %u :: " + "BGP L2_VNI - before %u after %u", + bgp->name_pretty, b_ann_cnt, a_ann_cnt, b_l2_cnt, a_l2_cnt); + } bgp_soft_reconfig_table_task_cancel(bgp, NULL, NULL); @@ -8492,6 +8513,7 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, bm = &bgp_master; zebra_announce_init(&bm->zebra_announce_head); + zebra_l2_vni_init(&bm->zebra_l2_vni_head); bm->bgp = list_new(); bm->listen_sockets = list_new(); bm->port = BGP_PORT_DEFAULT; @@ -8515,6 +8537,7 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, bm->stalepath_time = BGP_DEFAULT_STALEPATH_TIME; bm->select_defer_time = BGP_DEFAULT_SELECT_DEFERRAL_TIME; bm->rib_stale_time = BGP_DEFAULT_RIB_STALE_TIME; + bm->t_bgp_zebra_l2_vni = NULL; bgp_mac_init(); /* init the rd id space. @@ -8762,6 +8785,7 @@ void bgp_terminate(void) EVENT_OFF(bm->t_bgp_sync_label_manager); EVENT_OFF(bm->t_bgp_start_label_manager); EVENT_OFF(bm->t_bgp_zebra_route); + EVENT_OFF(bm->t_bgp_zebra_l2_vni); bgp_mac_finish(); } diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index bb56fd355a05..fd2bd95e5f18 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -19,6 +19,7 @@ #include "asn.h" PREDECL_LIST(zebra_announce); +PREDECL_LIST(zebra_l2_vni); /* For union sockunion. */ #include "queue.h" @@ -204,6 +205,10 @@ struct bgp_master { /* To preserve ordering of installations into zebra across all Vrfs */ struct zebra_announce_head zebra_announce_head; + struct event *t_bgp_zebra_l2_vni; + /* To preserve ordering of processing of L2 VNIs in BGP */ + struct zebra_l2_vni_head zebra_l2_vni_head; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgp_master); From 0f2cb2731053a678c85115abda008dc1d012e591 Mon Sep 17 00:00:00 2001 From: Rajasekar Raja Date: Wed, 27 Nov 2024 00:04:51 -0800 Subject: [PATCH 3/4] bgpd: backpressure - Optimize EVPN L3VNI remote routes processing Anytime BGP gets a L3 VNI ADD/DEL from zebra, - Walking the entire global routing table per L3VNI is very expensive. - The next read (say of another VNI ADD/DEL) from the socket does not proceed unless this walk is complete. So for triggers where a bulk of L3VNI's are flapped, this results in huge output buffer FIFO growth spiking up the memory in zebra since bgp is slow/busy processing the first message. To avoid this, idea is to hookup the BGP-VRF off the struct bgp_master and maintain a struct bgp FIFO list which is processed later on, where we walk a chunk of BGP-VRFs and do the remote route install/uninstall. Ticket :#3864372 Signed-off-by: Rajasekar Raja --- bgpd/bgp_evpn.c | 250 ++++++++++++++---- bgpd/bgp_evpn.h | 1 + bgpd/bgp_main.c | 1 + bgpd/bgp_vty.c | 9 +- bgpd/bgp_zebra.c | 25 ++ bgpd/bgp_zebra.h | 1 + bgpd/bgpd.c | 25 +- bgpd/bgpd.h | 11 + .../test_evpn_type5_chaos_topo1.py | 12 + .../test_evpn_type5_topo1.py | 15 +- 10 files changed, 287 insertions(+), 63 deletions(-) diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index f4bae58724a2..c9b5640b0be9 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -79,6 +79,8 @@ static void bgp_evpn_remote_ip_hash_unlink_nexthop(struct hash_bucket *bucket, void *args); static struct in_addr zero_vtep_ip; +static void bgp_evpn_local_l3vni_del_post_processing(struct bgp *bgp_vrf); + /* * Private functions. */ @@ -3882,14 +3884,6 @@ int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, const struct prefix_evpn *evp = (const struct prefix_evpn *)bgp_dest_get_prefix(pi->net); - /* Consider "valid" remote routes applicable for - * this VRF. - */ - if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) - return 0; - if (is_route_matching_for_vrf(bgp_vrf, pi)) { if (bgp_evpn_route_rmac_self_check(bgp_vrf, evp, pi)) return 0; @@ -3916,26 +3910,66 @@ int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, return ret; } +#define BGP_PROC_L3VNI_LIMIT 10 +static int install_uninstall_evpn_remote_route_per_l3vni(struct bgp_path_info *pi, + const struct prefix_evpn *evp) +{ + int ret = 0; + uint8_t vni_iter = 0; + bool is_install = false; + struct bgp *bgp_to_proc = NULL; + struct bgp *bgp_to_proc_next = NULL; + + for (bgp_to_proc = zebra_l3_vni_first(&bm->zebra_l3_vni_head); + bgp_to_proc && vni_iter < BGP_PROC_L3VNI_LIMIT; bgp_to_proc = bgp_to_proc_next) { + bgp_to_proc_next = zebra_l3_vni_next(&bm->zebra_l3_vni_head, bgp_to_proc); + vni_iter++; + is_install = !!CHECK_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + + ret = bgp_evpn_route_entry_install_if_vrf_match(bgp_to_proc, pi, is_install); + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in L3VNI %u during BP", + bgp_to_proc->vrf_id, is_install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type].str, + bgp_to_proc->l3vni); + zebra_l3_vni_del(&bm->zebra_l3_vni_head, bgp_to_proc); + if (!is_install) + bgp_evpn_local_l3vni_del_post_processing(bgp_to_proc); + + return ret; + } + } + + return 0; +} /* * Install or uninstall mac-ip routes are appropriate for this * particular VRF. */ -static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) +int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) { afi_t afi; safi_t safi; struct bgp_dest *rd_dest, *dest; struct bgp_table *table; struct bgp_path_info *pi; - int ret; + int ret = 0; struct bgp *bgp_evpn = NULL; + uint8_t count = 0; afi = AFI_L2VPN; safi = SAFI_EVPN; bgp_evpn = bgp_get_evpn(); - if (!bgp_evpn) + if (!bgp_evpn) { + zlog_warn("%s: No BGP EVPN instance found...", __func__); + return -1; + } + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Total %u L3VNI BGP-VRFs pending to be processed for remote route installation", + __func__, (uint32_t)zebra_l3_vni_count(&bm->zebra_l3_vni_head)); /* Walk entire global routing table and evaluate routes which could be * imported into this VRF. Note that we need to loop through all global * routes to determine which route matches the import rt on vrf @@ -3952,30 +3986,73 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) (const struct prefix_evpn *)bgp_dest_get_prefix( dest); - /* if not mac-ip route skip this route */ - if (!(evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE - || evp->prefix.route_type - == BGP_EVPN_IP_PREFIX_ROUTE)) - continue; - - /* if not a mac+ip route skip this route */ - if (!(is_evpn_prefix_ipaddr_v4(evp) - || is_evpn_prefix_ipaddr_v6(evp))) + /* Proceed only for MAC-IP and IP-Prefix routes */ + switch (evp->prefix.route_type) { + case BGP_EVPN_MAC_IP_ROUTE: + case BGP_EVPN_IP_PREFIX_ROUTE: + if (!(is_evpn_prefix_ipaddr_v4(evp) || + is_evpn_prefix_ipaddr_v6(evp))) + continue; + break; + case BGP_EVPN_AD_ROUTE: + case BGP_EVPN_IMET_ROUTE: + case BGP_EVPN_ES_ROUTE: continue; + } for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { - ret = bgp_evpn_route_entry_install_if_vrf_match( - bgp_vrf, pi, install); - if (ret) { - bgp_dest_unlock_node(rd_dest); - bgp_dest_unlock_node(dest); - return ret; + /* Consider "valid" remote routes applicable for + * this VRF */ + if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) && + pi->type == ZEBRA_ROUTE_BGP && + pi->sub_type == BGP_ROUTE_NORMAL)) + continue; + + if (!bgp_vrf) { + ret = install_uninstall_evpn_remote_route_per_l3vni(pi, evp); + if (ret) { + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + + return ret; + } + } else { + ret = bgp_evpn_route_entry_install_if_vrf_match(bgp_vrf, pi, + install); + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in L3VNI %u", + bgp_vrf->vrf_id, + install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type] + .str, + bgp_vrf->l3vni); + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + + return ret; + } } } } } + if (!bgp_vrf) { + while (count < BGP_PROC_L3VNI_LIMIT) { + struct bgp *bgp_to_proc = zebra_l3_vni_pop(&bm->zebra_l3_vni_head); + + if (!bgp_to_proc) + return 0; + + if (CHECK_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + bgp_evpn_local_l3vni_del_post_processing(bgp_to_proc); + + UNSET_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + count++; + } + } + return 0; } @@ -6856,6 +6933,53 @@ static void link_l2vni_hash_to_l3vni(struct hash_bucket *bucket, bgpevpn_link_to_l3vni(vpn); } +static void bgp_evpn_l3vni_remote_route_processing(struct bgp *bgp, bool install) +{ + /* + * Anytime BGP gets a Bulk of L3 VNI ADD/DEL from zebra, + * - Walking the entire global routing table per VNI is very expensive. + * - The next read (say of another VNI ADD/DEL) from the socket does + * not proceed unless this walk is complete. + * This results in huge output buffer FIFO growth spiking up the + * memory in zebra. + * + * To avoid this, idea is to hookup the BGP-VRF off the struct + * bgp_master and maintain a struct bgp FIFO list which is processed + * later on, where we walk a chunk of BGP-VRFs and do the remote route + * install/uninstall. + */ + if (!CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL) && + !CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + zebra_l3_vni_add_tail(&bm->zebra_l3_vni_head, bgp); + + if (install) { + SET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + UNSET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + } else { + SET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + UNSET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Scheduling L3VNI %s to be processed later for %s VNI %u", + install ? "ADD" : "DEL", bgp->name_pretty, bgp->l3vni); + /* + * If there are no BGP-VRFs's in the bm L3VNI FIFO list i.e. an update + * for an already processed L3VNI comes in, schedule the remote route + * install immediately. + * + * In all other cases, it is ok to schedule the remote route un/install + * after a small sleep. This is to give benefit of doubt in case more + * L3VNI events come. + */ + if (zebra_l3_vni_count(&bm->zebra_l3_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, + 20, &bm->t_bgp_zebra_l3_vni); + else + event_add_event(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, 0, + &bm->t_bgp_zebra_l3_vni); +} + int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, struct ethaddr *svi_rmac, struct ethaddr *vrr_rmac, @@ -7001,52 +7125,36 @@ int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, /* advertise type-5 routes if needed */ update_advertise_vrf_routes(bgp_vrf); - /* install all remote routes belonging to this l3vni into correspondng - * vrf */ - install_routes_for_vrf(bgp_vrf); + bgp_evpn_l3vni_remote_route_processing(bgp_vrf, true); return 0; } -int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) +static void bgp_evpn_local_l3vni_del_post_processing(struct bgp *bgp_vrf) { - struct bgp *bgp_vrf = NULL; /* bgp vrf instance */ struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ struct listnode *node = NULL; struct listnode *next = NULL; struct bgpevpn *vpn = NULL; - bgp_vrf = bgp_lookup_by_vrf_id(vrf_id); - if (!bgp_vrf) { - flog_err( - EC_BGP_NO_DFLT, - "Cannot process L3VNI %u Del - Could not find BGP instance", - l3vni); - return -1; - } - bgp_evpn = bgp_get_evpn(); if (!bgp_evpn) { - flog_err( - EC_BGP_NO_DFLT, - "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", - l3vni); - return -1; + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", + bgp_vrf->l3vni); + return; } if (CHECK_FLAG(bgp_evpn->flags, BGP_FLAG_DELETE_IN_PROGRESS)) { flog_err(EC_BGP_NO_DFLT, - "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", - l3vni); - return -1; + "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", + bgp_vrf->l3vni); + return; } - /* Remove remote routes from BGT VRF even if BGP_VRF_AUTO is configured, - * bgp_delete would not remove/decrement bgp_path_info of the ip_prefix - * routes. This will uninstalling the routes from zebra and decremnt the - * bgp info count. - */ - uninstall_routes_for_vrf(bgp_vrf); + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("In %s for L3VNI %u after remote route installation", __func__, + bgp_vrf->l3vni); /* delete/withdraw all type-5 routes */ delete_withdraw_vrf_routes(bgp_vrf); @@ -7092,10 +7200,44 @@ int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) bgpevpn_unlink_from_l3vni(vpn); UNSET_FLAG(bgp_vrf->vrf_flags, BGP_VRF_L3VNI_PREFIX_ROUTES_ONLY); + UNSET_FLAG(bgp_vrf->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); /* Delete the instance if it was autocreated */ if (CHECK_FLAG(bgp_vrf->vrf_flags, BGP_VRF_AUTO)) bgp_delete(bgp_vrf); +} + +int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) +{ + struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ + struct bgp *bgp_vrf = NULL; /* bgp vrf instance */ + + bgp_vrf = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp_vrf) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find BGP instance", l3vni); + return -1; + } + + bgp_evpn = bgp_get_evpn(); + if (!bgp_evpn) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", l3vni); + return -1; + } + + if (CHECK_FLAG(bgp_evpn->flags, BGP_FLAG_DELETE_IN_PROGRESS)) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", l3vni); + return -1; + } + + /* + * Move all the l3vni_delete operation post the remote route + * installation processing i.e. add the L3VNI DELETE item on the + * BGP-VRFs FIFO and move on. + */ + bgp_evpn_l3vni_remote_route_processing(bgp_vrf, false); return 0; } diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index 75dde616ce78..8bbc5d3c37f2 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -201,4 +201,5 @@ int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, const struct prefix_e struct bgp_path_info *parent_pi); extern void bgp_zebra_evpn_pop_items_from_announce_fifo(struct bgpevpn *vpn); extern int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install); +extern int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install); #endif /* _QUAGGA_BGP_EVPN_H */ diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 9d89fd6f96bc..9ca20c949a30 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -208,6 +208,7 @@ static __attribute__((__noreturn__)) void bgp_exit(int status) zebra_announce_fini(&bm->zebra_announce_head); zebra_l2_vni_fini(&bm->zebra_l2_vni_head); + zebra_l3_vni_fini(&bm->zebra_l3_vni_head); /* reverse bgp_dump_init */ bgp_dump_finish(); diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index bb0c69ca56ee..2fc5dc847fe1 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -1696,8 +1696,13 @@ DEFUN (no_router_bgp, } if (bgp->l3vni) { - vty_out(vty, "%% Please unconfigure l3vni %u\n", - bgp->l3vni); + if (CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + vty_out(vty, + "%% L3VNI %u is scheduled to be deleted. Please give it few secs and retry the command\n", + bgp->l3vni); + else + vty_out(vty, "%% Please unconfigure l3vni %u\n", bgp->l3vni); + return CMD_WARNING_CONFIG_FAILED; } diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 90e2d5af7bf6..6b7398fbc964 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -3046,6 +3046,31 @@ void bgp_zebra_process_remote_routes_for_l2vni(struct event *e) 20, &bm->t_bgp_zebra_l2_vni); } +void bgp_zebra_process_remote_routes_for_l3vrf(struct event *e) +{ + /* + * Install/Uninstall all remote routes belonging to l3vni + * + * NOTE: + * - At this point it does not matter whether we call + * install_routes_for_vrf/uninstall_routes_for_vrf. + * - Since we pass struct bgp as NULL, + * * we iterate the bm FIFO list + * * the second variable (true) is ignored as well and + * calculated based on the BGP-VRFs flags for ADD/DELETE. + */ + install_uninstall_routes_for_vrf(NULL, true); + + /* + * If there are L3VNIs still pending to be processed, schedule them + * after a small sleep so that CPU can be used for other purposes. + */ + if (zebra_l3_vni_count(&bm->zebra_l3_vni_head)) { + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, + 20, &bm->t_bgp_zebra_l3_vni); + } +} + static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS) { esi_t esi; diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index 993d002998f2..7e9d57cb8521 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -136,4 +136,5 @@ extern enum zclient_send_status bgp_zebra_withdraw_actual(struct bgp_dest *dest, struct bgp_path_info *info, struct bgp *bgp); extern void bgp_zebra_process_remote_routes_for_l2vni(struct event *e); +extern void bgp_zebra_process_remote_routes_for_l3vrf(struct event *e); #endif /* _QUAGGA_BGP_ZEBRA_H */ diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 44640c84f256..d580da4e1a38 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -3972,8 +3972,10 @@ int bgp_delete(struct bgp *bgp) struct bgp_dest *dest_next = NULL; struct bgp_table *dest_table = NULL; struct graceful_restart_info *gr_info; - uint32_t b_ann_cnt = 0, b_l2_cnt = 0; - uint32_t a_ann_cnt = 0, a_l2_cnt = 0; + uint32_t b_ann_cnt = 0, b_l2_cnt = 0, b_l3_cnt = 0; + uint32_t a_ann_cnt = 0, a_l2_cnt = 0, a_l3_cnt = 0; + struct bgp *bgp_to_proc = NULL; + struct bgp *bgp_to_proc_next = NULL; assert(bgp); @@ -4007,13 +4009,21 @@ int bgp_delete(struct bgp *bgp) } } + b_l3_cnt = zebra_l3_vni_count(&bm->zebra_l3_vni_head); + for (bgp_to_proc = zebra_l3_vni_first(&bm->zebra_l3_vni_head); bgp_to_proc; + bgp_to_proc = bgp_to_proc_next) { + bgp_to_proc_next = zebra_l3_vni_next(&bm->zebra_l3_vni_head, bgp_to_proc); + if (bgp_to_proc == bgp) + zebra_l3_vni_del(&bm->zebra_l3_vni_head, bgp_to_proc); + } + if (BGP_DEBUG(zebra, ZEBRA)) { a_ann_cnt = zebra_announce_count(&bm->zebra_announce_head); a_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head); - zlog_debug("FIFO Cleanup Count during BGP %s deletion :: " - "Zebra Announce - before %u after %u :: " - "BGP L2_VNI - before %u after %u", - bgp->name_pretty, b_ann_cnt, a_ann_cnt, b_l2_cnt, a_l2_cnt); + a_l3_cnt = zebra_l3_vni_count(&bm->zebra_l3_vni_head); + zlog_debug("BGP %s deletion FIFO cnt Zebra_Ann before %u after %u, L2_VNI before %u after, %u L3_VNI before %u after %u", + bgp->name_pretty, b_ann_cnt, a_ann_cnt, b_l2_cnt, a_l2_cnt, b_l3_cnt, + a_l3_cnt); } bgp_soft_reconfig_table_task_cancel(bgp, NULL, NULL); @@ -8514,6 +8524,7 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, zebra_announce_init(&bm->zebra_announce_head); zebra_l2_vni_init(&bm->zebra_l2_vni_head); + zebra_l3_vni_init(&bm->zebra_l3_vni_head); bm->bgp = list_new(); bm->listen_sockets = list_new(); bm->port = BGP_PORT_DEFAULT; @@ -8538,6 +8549,7 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, bm->select_defer_time = BGP_DEFAULT_SELECT_DEFERRAL_TIME; bm->rib_stale_time = BGP_DEFAULT_RIB_STALE_TIME; bm->t_bgp_zebra_l2_vni = NULL; + bm->t_bgp_zebra_l3_vni = NULL; bgp_mac_init(); /* init the rd id space. @@ -8786,6 +8798,7 @@ void bgp_terminate(void) EVENT_OFF(bm->t_bgp_start_label_manager); EVENT_OFF(bm->t_bgp_zebra_route); EVENT_OFF(bm->t_bgp_zebra_l2_vni); + EVENT_OFF(bm->t_bgp_zebra_l3_vni); bgp_mac_finish(); } diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index fd2bd95e5f18..f66b41abe942 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -20,6 +20,7 @@ PREDECL_LIST(zebra_announce); PREDECL_LIST(zebra_l2_vni); +PREDECL_LIST(zebra_l3_vni); /* For union sockunion. */ #include "queue.h" @@ -209,6 +210,10 @@ struct bgp_master { /* To preserve ordering of processing of L2 VNIs in BGP */ struct zebra_l2_vni_head zebra_l2_vni_head; + struct event *t_bgp_zebra_l3_vni; + /* To preserve ordering of processing of BGP-VRFs for L3 VNIs */ + struct zebra_l3_vni_head zebra_l3_vni_head; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgp_master); @@ -559,6 +564,8 @@ struct bgp { #define BGP_FLAG_INSTANCE_HIDDEN (1ULL << 39) /* Prohibit BGP from enabling IPv6 RA on interfaces */ #define BGP_FLAG_IPV6_NO_AUTO_RA (1ULL << 40) +#define BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL (1ULL << 41) +#define BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE (1ULL << 42) /* BGP default address-families. * New peers inherit enabled afi/safis from bgp instance. @@ -873,10 +880,14 @@ struct bgp { uint64_t node_already_on_queue; uint64_t node_deferred_on_queue; + struct zebra_l3_vni_item zl3vni; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgp); +DECLARE_LIST(zebra_l3_vni, struct bgp, zl3vni); + struct bgp_interface { #define BGP_INTERFACE_MPLS_BGP_FORWARDING (1 << 0) /* L3VPN multi domain switching */ diff --git a/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_chaos_topo1.py b/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_chaos_topo1.py index 45868663a8c6..cb3104a522e2 100644 --- a/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_chaos_topo1.py +++ b/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_chaos_topo1.py @@ -21,6 +21,8 @@ import time import pytest import platform +import functools +from lib import topotest from copy import deepcopy @@ -539,6 +541,16 @@ def test_RT_verification_auto_p0(request): result = create_vrf_cfg(tgen, topo, input_dict=input_dict_vni) assert result is True, "Testcase {} :Failed \n Error: {}".format(tc_name, result) + expected = {"numL3Vnis": 0} + test_func = functools.partial( + topotest.router_json_cmp, + tgen.gears["e1"], + "show bgp l2vpn evpn vni json", + expected, + ) + _, result = topotest.run_and_expect(test_func, None, count=5, wait=3) + assert result is None, "Testcase {} :Failed \n Error: {}".format(tc_name, result) + input_dict_2 = {} for dut in ["e1"]: temp = {dut: {"bgp": []}} diff --git a/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_topo1.py b/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_topo1.py index beb4de432e22..52181a75dcca 100644 --- a/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_topo1.py +++ b/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_topo1.py @@ -25,6 +25,8 @@ import time import pytest import platform +import functools +from lib import topotest from copy import deepcopy @@ -1124,7 +1126,6 @@ def test_active_standby_evpn_implementation_p1(request): ) for addr_type in ADDR_TYPES: - logger.info("Verifying only ipv4 routes") if addr_type != "ipv4": continue @@ -2050,6 +2051,18 @@ def test_bgp_attributes_for_evpn_address_family_p1(request, attribute): tc_name, result ) + expected = {"numL3Vnis": 0} + test_func = functools.partial( + topotest.router_json_cmp, + tgen.gears["d1"], + "show bgp l2vpn evpn vni json", + expected, + ) + _, result = topotest.run_and_expect(test_func, None, count=5, wait=3) + assert result is None, "Testcase {} :Failed \n Error: {}".format( + tc_name, result + ) + input_dict_2 = {} for dut in ["d1"]: temp = {dut: {"bgp": []}} From bd32706c704175872f5af23715639990775820eb Mon Sep 17 00:00:00 2001 From: Rajasekar Raja Date: Tue, 26 Nov 2024 12:02:53 -0800 Subject: [PATCH 4/4] bgpd: Suppress redundant L3VNI delete processing Consider a master bridge interface (br_l3vni) having a slave vxlan99 mapped to vlans used by 3 L3VNIs. During ifdown br_l3vni interface, the function zebra_vxlan_process_l3vni_oper_down() where zebra sends ZAPI to bgp for a delete L3VNI is sent twice. 1) if_down -> zebra_vxlan_svi_down() 2) VXLAN is unlinked from the bridge i.e. vxlan99 zebra_if_dplane_ifp_handling() --> zebra_vxlan_if_update_vni() (since ZEBRA_VXLIF_MASTER_CHANGE flag is set) During ifup br_l3vni interface, the function zebra_vxlan_process_l3vni_oper_down() is invoked because of access-vlan change - process oper down, associate with new svi_if and then process oper up again The problem here is that the redundant ZAPI message of L3VNI delete results in BGP doing a inline Global table walk for remote route installation when the L3VNI is already removed/deleted. Bigger the scale, more wastage is the CPU utilization. Given the triggers for bridge flap is not a common scenario, idea is to simply return from BGP if the L3VNI is already set to 0 i.e. if the L3VNI is already deleted, do nothing and return. NOTE/TBD: An ideal fix is to make zebra not send the second L3VNI delete ZAPI message. However it is a much involved and a day-1 code to handle corner cases. Ticket :#3864372 Signed-off-by: Rajasekar Raja --- bgpd/bgp_evpn.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index c9b5640b0be9..64feeb38d1ef 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -7232,6 +7232,14 @@ int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) return -1; } + if (!bgp_vrf->l3vni) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Returning from %s since VNI %u is already deleted", __func__, + l3vni); + + return -1; + } + /* * Move all the l3vni_delete operation post the remote route * installation processing i.e. add the L3VNI DELETE item on the