diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index f173bd01f20f..64feeb38d1ef 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -79,6 +79,8 @@ static void bgp_evpn_remote_ip_hash_unlink_nexthop(struct hash_bucket *bucket, void *args); static struct in_addr zero_vtep_ip; +static void bgp_evpn_local_l3vni_del_post_processing(struct bgp *bgp_vrf); + /* * Private functions. */ @@ -3882,14 +3884,6 @@ int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, const struct prefix_evpn *evp = (const struct prefix_evpn *)bgp_dest_get_prefix(pi->net); - /* Consider "valid" remote routes applicable for - * this VRF. - */ - if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) - return 0; - if (is_route_matching_for_vrf(bgp_vrf, pi)) { if (bgp_evpn_route_rmac_self_check(bgp_vrf, evp, pi)) return 0; @@ -3916,26 +3910,66 @@ int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, return ret; } +#define BGP_PROC_L3VNI_LIMIT 10 +static int install_uninstall_evpn_remote_route_per_l3vni(struct bgp_path_info *pi, + const struct prefix_evpn *evp) +{ + int ret = 0; + uint8_t vni_iter = 0; + bool is_install = false; + struct bgp *bgp_to_proc = NULL; + struct bgp *bgp_to_proc_next = NULL; + + for (bgp_to_proc = zebra_l3_vni_first(&bm->zebra_l3_vni_head); + bgp_to_proc && vni_iter < BGP_PROC_L3VNI_LIMIT; bgp_to_proc = bgp_to_proc_next) { + bgp_to_proc_next = zebra_l3_vni_next(&bm->zebra_l3_vni_head, bgp_to_proc); + vni_iter++; + is_install = !!CHECK_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + + ret = bgp_evpn_route_entry_install_if_vrf_match(bgp_to_proc, pi, is_install); + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in L3VNI %u during BP", + bgp_to_proc->vrf_id, is_install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type].str, + bgp_to_proc->l3vni); + zebra_l3_vni_del(&bm->zebra_l3_vni_head, bgp_to_proc); + if (!is_install) + bgp_evpn_local_l3vni_del_post_processing(bgp_to_proc); + + return ret; + } + } + + return 0; +} /* * Install or uninstall mac-ip routes are appropriate for this * particular VRF. */ -static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) +int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) { afi_t afi; safi_t safi; struct bgp_dest *rd_dest, *dest; struct bgp_table *table; struct bgp_path_info *pi; - int ret; + int ret = 0; struct bgp *bgp_evpn = NULL; + uint8_t count = 0; afi = AFI_L2VPN; safi = SAFI_EVPN; bgp_evpn = bgp_get_evpn(); - if (!bgp_evpn) + if (!bgp_evpn) { + zlog_warn("%s: No BGP EVPN instance found...", __func__); + return -1; + } + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Total %u L3VNI BGP-VRFs pending to be processed for remote route installation", + __func__, (uint32_t)zebra_l3_vni_count(&bm->zebra_l3_vni_head)); /* Walk entire global routing table and evaluate routes which could be * imported into this VRF. Note that we need to loop through all global * routes to determine which route matches the import rt on vrf @@ -3952,30 +3986,109 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) (const struct prefix_evpn *)bgp_dest_get_prefix( dest); - /* if not mac-ip route skip this route */ - if (!(evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE - || evp->prefix.route_type - == BGP_EVPN_IP_PREFIX_ROUTE)) - continue; - - /* if not a mac+ip route skip this route */ - if (!(is_evpn_prefix_ipaddr_v4(evp) - || is_evpn_prefix_ipaddr_v6(evp))) + /* Proceed only for MAC-IP and IP-Prefix routes */ + switch (evp->prefix.route_type) { + case BGP_EVPN_MAC_IP_ROUTE: + case BGP_EVPN_IP_PREFIX_ROUTE: + if (!(is_evpn_prefix_ipaddr_v4(evp) || + is_evpn_prefix_ipaddr_v6(evp))) + continue; + break; + case BGP_EVPN_AD_ROUTE: + case BGP_EVPN_IMET_ROUTE: + case BGP_EVPN_ES_ROUTE: continue; + } for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { - ret = bgp_evpn_route_entry_install_if_vrf_match( - bgp_vrf, pi, install); - if (ret) { - bgp_dest_unlock_node(rd_dest); - bgp_dest_unlock_node(dest); - return ret; + /* Consider "valid" remote routes applicable for + * this VRF */ + if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) && + pi->type == ZEBRA_ROUTE_BGP && + pi->sub_type == BGP_ROUTE_NORMAL)) + continue; + + if (!bgp_vrf) { + ret = install_uninstall_evpn_remote_route_per_l3vni(pi, evp); + if (ret) { + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + + return ret; + } + } else { + ret = bgp_evpn_route_entry_install_if_vrf_match(bgp_vrf, pi, + install); + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in L3VNI %u", + bgp_vrf->vrf_id, + install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type] + .str, + bgp_vrf->l3vni); + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + + return ret; + } } } } } + if (!bgp_vrf) { + while (count < BGP_PROC_L3VNI_LIMIT) { + struct bgp *bgp_to_proc = zebra_l3_vni_pop(&bm->zebra_l3_vni_head); + + if (!bgp_to_proc) + return 0; + + if (CHECK_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + bgp_evpn_local_l3vni_del_post_processing(bgp_to_proc); + + UNSET_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + count++; + } + } + + return 0; +} + +#define BGP_PROC_L2VNI_LIMIT 10 +static int install_evpn_remote_route_per_l2vni(struct bgp *bgp, struct bgp_path_info *pi, + const struct prefix_evpn *evp) +{ + int ret = 0; + uint8_t vni_iter = 0; + struct bgpevpn *t_vpn = NULL; + struct bgpevpn *t_vpn_next = NULL; + + for (t_vpn = zebra_l2_vni_first(&bm->zebra_l2_vni_head); + t_vpn && vni_iter < BGP_PROC_L2VNI_LIMIT; t_vpn = t_vpn_next) { + t_vpn_next = zebra_l2_vni_next(&bm->zebra_l2_vni_head, t_vpn); + vni_iter++; + /* + * Skip install/uninstall if the route entry is not needed to + * be imported into the VNI i.e. RTs dont match + */ + if (!is_route_matching_for_vni(bgp, t_vpn, pi)) + continue; + + ret = install_evpn_route_entry(bgp, t_vpn, evp, pi); + + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to install EVPN %s route in VNI %u during BP", + bgp->vrf_id, bgp_evpn_route_type_str[evp->prefix.route_type].str, + t_vpn->vni); + zebra_l2_vni_del(&bm->zebra_l2_vni_head, t_vpn); + + return ret; + } + } + return 0; } @@ -3983,26 +4096,40 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) * Install or uninstall routes of specified type that are appropriate for this * particular VNI. */ -static int install_uninstall_routes_for_vni(struct bgp *bgp, - struct bgpevpn *vpn, bool install) +int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install) { afi_t afi; safi_t safi; struct bgp_dest *rd_dest, *dest; struct bgp_table *table; struct bgp_path_info *pi; - int ret; + int ret = 0; + uint8_t count = 0; + bool walk_fifo = false; afi = AFI_L2VPN; safi = SAFI_EVPN; - /* Walk entire global routing table and evaluate routes which could be + if (!bgp) { + walk_fifo = true; + bgp = bgp_get_evpn(); + if (!bgp) { + zlog_warn("%s: No BGP EVPN instance found...", __func__); + + return -1; + } + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Total %u L2VNI VPNs pending to be processed for remote route installation", + __func__, (uint32_t)zebra_l2_vni_count(&bm->zebra_l2_vni_head)); + /* + * Walk entire global routing table and evaluate routes which could be * imported into this VPN. Note that we cannot just look at the routes - * for - * the VNI's RD - remote routes applicable for this VNI could have any - * RD. + * for the VNI's RD - remote routes applicable for this VNI could have + * any RD. + * Note: EVPN routes are a 2-level table. */ - /* EVPN routes are a 2-level table. */ for (rd_dest = bgp_table_top(bgp->rib[afi][safi]); rd_dest; rd_dest = bgp_route_next(rd_dest)) { table = bgp_dest_get_bgp_table_info(rd_dest); @@ -4015,54 +4142,80 @@ static int install_uninstall_routes_for_vni(struct bgp *bgp, (const struct prefix_evpn *)bgp_dest_get_prefix( dest); - if (evp->prefix.route_type != BGP_EVPN_IMET_ROUTE && - evp->prefix.route_type != BGP_EVPN_AD_ROUTE && - evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) + /* Proceed only for AD, MAC_IP and IMET routes */ + switch (evp->prefix.route_type) { + case BGP_EVPN_AD_ROUTE: + case BGP_EVPN_MAC_IP_ROUTE: + case BGP_EVPN_IMET_ROUTE: + break; + case BGP_EVPN_ES_ROUTE: + case BGP_EVPN_IP_PREFIX_ROUTE: continue; + } for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { - /* Consider "valid" remote routes applicable for - * this VNI. */ - if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) - continue; - - if (!is_route_matching_for_vni(bgp, vpn, pi)) + /* + * Skip install/uninstall if + * - Not a valid remote routes + * - Install & evpn route matchesi macvrf SOO + */ + if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) && + pi->type == ZEBRA_ROUTE_BGP && + pi->sub_type == BGP_ROUTE_NORMAL) || + (install && bgp_evpn_route_matches_macvrf_soo(pi, evp))) continue; - if (install) { - if (bgp_evpn_route_matches_macvrf_soo( - pi, evp)) + if (walk_fifo) { + ret = install_evpn_remote_route_per_l2vni(bgp, pi, evp); + if (ret) { + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + return ret; + } + } else { + /* + * Skip install/uninstall if the route + * entry is not needed to be imported + * into the VNI i.e. RTs dont match + */ + if (!is_route_matching_for_vni(bgp, vpn, pi)) continue; - ret = install_evpn_route_entry(bgp, vpn, - evp, pi); - } else - ret = uninstall_evpn_route_entry( - bgp, vpn, evp, pi); - - if (ret) { - flog_err(EC_BGP_EVPN_FAIL, - "%u: Failed to %s EVPN %s route in VNI %u", - bgp->vrf_id, - install ? "install" - : "uninstall", - evp->prefix.route_type == - BGP_EVPN_MAC_IP_ROUTE - ? "MACIP" - : "IMET", - vpn->vni); - - bgp_dest_unlock_node(rd_dest); - bgp_dest_unlock_node(dest); - return ret; + if (install) + ret = install_evpn_route_entry(bgp, vpn, evp, pi); + else + ret = uninstall_evpn_route_entry(bgp, vpn, evp, pi); + + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in VNI %u", + bgp->vrf_id, + install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type] + .str, + vpn->vni); + + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + return ret; + } } } } } + if (walk_fifo) { + while (count < BGP_PROC_L2VNI_LIMIT) { + vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head); + if (!vpn) + return 0; + + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + count++; + } + } + return 0; } @@ -6780,6 +6933,53 @@ static void link_l2vni_hash_to_l3vni(struct hash_bucket *bucket, bgpevpn_link_to_l3vni(vpn); } +static void bgp_evpn_l3vni_remote_route_processing(struct bgp *bgp, bool install) +{ + /* + * Anytime BGP gets a Bulk of L3 VNI ADD/DEL from zebra, + * - Walking the entire global routing table per VNI is very expensive. + * - The next read (say of another VNI ADD/DEL) from the socket does + * not proceed unless this walk is complete. + * This results in huge output buffer FIFO growth spiking up the + * memory in zebra. + * + * To avoid this, idea is to hookup the BGP-VRF off the struct + * bgp_master and maintain a struct bgp FIFO list which is processed + * later on, where we walk a chunk of BGP-VRFs and do the remote route + * install/uninstall. + */ + if (!CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL) && + !CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + zebra_l3_vni_add_tail(&bm->zebra_l3_vni_head, bgp); + + if (install) { + SET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + UNSET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + } else { + SET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + UNSET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Scheduling L3VNI %s to be processed later for %s VNI %u", + install ? "ADD" : "DEL", bgp->name_pretty, bgp->l3vni); + /* + * If there are no BGP-VRFs's in the bm L3VNI FIFO list i.e. an update + * for an already processed L3VNI comes in, schedule the remote route + * install immediately. + * + * In all other cases, it is ok to schedule the remote route un/install + * after a small sleep. This is to give benefit of doubt in case more + * L3VNI events come. + */ + if (zebra_l3_vni_count(&bm->zebra_l3_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, + 20, &bm->t_bgp_zebra_l3_vni); + else + event_add_event(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, 0, + &bm->t_bgp_zebra_l3_vni); +} + int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, struct ethaddr *svi_rmac, struct ethaddr *vrr_rmac, @@ -6925,52 +7125,36 @@ int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, /* advertise type-5 routes if needed */ update_advertise_vrf_routes(bgp_vrf); - /* install all remote routes belonging to this l3vni into correspondng - * vrf */ - install_routes_for_vrf(bgp_vrf); + bgp_evpn_l3vni_remote_route_processing(bgp_vrf, true); return 0; } -int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) +static void bgp_evpn_local_l3vni_del_post_processing(struct bgp *bgp_vrf) { - struct bgp *bgp_vrf = NULL; /* bgp vrf instance */ struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ struct listnode *node = NULL; struct listnode *next = NULL; struct bgpevpn *vpn = NULL; - bgp_vrf = bgp_lookup_by_vrf_id(vrf_id); - if (!bgp_vrf) { - flog_err( - EC_BGP_NO_DFLT, - "Cannot process L3VNI %u Del - Could not find BGP instance", - l3vni); - return -1; - } - bgp_evpn = bgp_get_evpn(); if (!bgp_evpn) { - flog_err( - EC_BGP_NO_DFLT, - "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", - l3vni); - return -1; + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", + bgp_vrf->l3vni); + return; } if (CHECK_FLAG(bgp_evpn->flags, BGP_FLAG_DELETE_IN_PROGRESS)) { flog_err(EC_BGP_NO_DFLT, - "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", - l3vni); - return -1; + "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", + bgp_vrf->l3vni); + return; } - /* Remove remote routes from BGT VRF even if BGP_VRF_AUTO is configured, - * bgp_delete would not remove/decrement bgp_path_info of the ip_prefix - * routes. This will uninstalling the routes from zebra and decremnt the - * bgp info count. - */ - uninstall_routes_for_vrf(bgp_vrf); + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("In %s for L3VNI %u after remote route installation", __func__, + bgp_vrf->l3vni); /* delete/withdraw all type-5 routes */ delete_withdraw_vrf_routes(bgp_vrf); @@ -7016,14 +7200,95 @@ int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) bgpevpn_unlink_from_l3vni(vpn); UNSET_FLAG(bgp_vrf->vrf_flags, BGP_VRF_L3VNI_PREFIX_ROUTES_ONLY); + UNSET_FLAG(bgp_vrf->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); /* Delete the instance if it was autocreated */ if (CHECK_FLAG(bgp_vrf->vrf_flags, BGP_VRF_AUTO)) bgp_delete(bgp_vrf); +} + +int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) +{ + struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ + struct bgp *bgp_vrf = NULL; /* bgp vrf instance */ + + bgp_vrf = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp_vrf) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find BGP instance", l3vni); + return -1; + } + + bgp_evpn = bgp_get_evpn(); + if (!bgp_evpn) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", l3vni); + return -1; + } + + if (CHECK_FLAG(bgp_evpn->flags, BGP_FLAG_DELETE_IN_PROGRESS)) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", l3vni); + return -1; + } + + if (!bgp_vrf->l3vni) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Returning from %s since VNI %u is already deleted", __func__, + l3vni); + + return -1; + } + + /* + * Move all the l3vni_delete operation post the remote route + * installation processing i.e. add the L3VNI DELETE item on the + * BGP-VRFs FIFO and move on. + */ + bgp_evpn_l3vni_remote_route_processing(bgp_vrf, false); return 0; } +static void bgp_evpn_l2vni_remote_route_processing(struct bgpevpn *vpn) +{ + /* + * Anytime BGP gets a Bulk of L2 VNIs ADD/UPD from zebra, + * - Walking the entire global routing table per VNI is very expensive. + * - The next read (say of another VNI ADD/UPD) from the socket does + * not proceed unless this walk is complete. + * This results in huge output buffer FIFO growth spiking up the + * memory in zebra. + * + * To avoid this, idea is to hookup the VPN off the struct bgp_master + * and maintain a VPN FIFO list which is processed later on, where we + * walk a chunk of VPNs and do the remote route install. + */ + if (!CHECK_FLAG(vpn->flags, VNI_FLAG_ADD)) { + zebra_l2_vni_add_tail(&bm->zebra_l2_vni_head, vpn); + SET_FLAG(vpn->flags, VNI_FLAG_ADD); + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Scheduling L2VNI ADD to be processed later for VNI %u", vpn->vni); + + /* + * If there are no VNI's in the bgp VPN FIFO list i.e. an update + * for an already processed VNI comes in, schedule the remote + * route install immediately. + * + * In all other cases, it is ok to schedule the remote route install + * after a small sleep. This is to give benefit of doubt in case more + * L2VNI ADD events come. + */ + if (zebra_l2_vni_count(&bm->zebra_l2_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, + 20, &bm->t_bgp_zebra_l2_vni); + else + event_add_event(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, 0, + &bm->t_bgp_zebra_l2_vni); +} + /* * When bgp instance goes down also clean up what might have been left over * from evpn. @@ -7047,6 +7312,10 @@ int bgp_evpn_local_vni_del(struct bgp *bgp, vni_t vni) if (!vpn) return 0; + /* Remove the VPN from the bgp VPN FIFO (if exists) */ + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + zebra_l2_vni_del(&bm->zebra_l2_vni_head, vpn); + /* Remove all local EVPN routes and schedule for processing (to * withdraw from peers). */ @@ -7203,12 +7472,6 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, } } - /* If we have learnt and retained remote routes (VTEPs, MACs) for this - * VNI, - * install them. - */ - install_routes_for_vni(bgp, vpn); - /* If we are advertising gateway mac-ip It needs to be conveyed again to zebra */ bgp_zebra_advertise_gw_macip(bgp, vpn->advertise_gw_macip, vpn->vni); @@ -7216,6 +7479,8 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, /* advertise svi mac-ip knob to zebra */ bgp_zebra_advertise_svi_macip(bgp, vpn->advertise_svi_macip, vpn->vni); + bgp_evpn_l2vni_remote_route_processing(vpn); + return 0; } @@ -7245,8 +7510,17 @@ void bgp_evpn_flood_control_change(struct bgp *bgp) */ void bgp_evpn_cleanup_on_disable(struct bgp *bgp) { - hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *, - void *))cleanup_vni_on_disable, + struct bgpevpn *vpn = NULL; + uint32_t vni_count = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + + /* Cleanup VNI FIFO list from this bgp instance */ + while (vni_count) { + vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head); + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + vni_count--; + } + + hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *, void *))cleanup_vni_on_disable, bgp); } diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index 1a333a5a09a3..8bbc5d3c37f2 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -200,4 +200,6 @@ bool bgp_evpn_skip_vrf_import_of_local_es(struct bgp *bgp_vrf, const struct pref int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, const struct prefix_evpn *evp, struct bgp_path_info *parent_pi); extern void bgp_zebra_evpn_pop_items_from_announce_fifo(struct bgpevpn *vpn); +extern int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install); +extern int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install); #endif /* _QUAGGA_BGP_EVPN_H */ diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index b05df3d82ae7..568d3d45eed3 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -32,6 +32,13 @@ #define BGP_EVPN_TYPE4_V4_PSIZE 23 #define BGP_EVPN_TYPE4_V6_PSIZE 34 +static const struct message bgp_evpn_route_type_str[] = { { BGP_EVPN_AD_ROUTE, "AD" }, + { BGP_EVPN_MAC_IP_ROUTE, "MACIP" }, + { BGP_EVPN_IMET_ROUTE, "IMET" }, + { BGP_EVPN_ES_ROUTE, "ES" }, + { BGP_EVPN_IP_PREFIX_ROUTE, "IP-PREFIX" }, + { 0 } }; + RB_HEAD(bgp_es_evi_rb_head, bgp_evpn_es_evi); RB_PROTOTYPE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node, bgp_es_evi_rb_cmp); @@ -53,8 +60,9 @@ struct bgpevpn { #define VNI_FLAG_RD_CFGD 0x4 /* RD is user configured. */ #define VNI_FLAG_IMPRT_CFGD 0x8 /* Import RT is user configured */ #define VNI_FLAG_EXPRT_CFGD 0x10 /* Export RT is user configured */ -#define VNI_FLAG_USE_TWO_LABELS 0x20 /* Attach both L2-VNI and L3-VNI if - needed for this VPN */ +/* Attach both L2-VNI and L3-VNI if needed for this VPN */ +#define VNI_FLAG_USE_TWO_LABELS 0x20 +#define VNI_FLAG_ADD 0x40 /* L2VNI Add */ struct bgp *bgp_vrf; /* back pointer to the vrf instance */ @@ -108,11 +116,15 @@ struct bgpevpn { /* List of local ESs */ struct list *local_es_evi_list; + struct zebra_l2_vni_item zl2vni; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgpevpn); +DECLARE_LIST(zebra_l2_vni, struct bgpevpn, zl2vni); + /* Mapping of Import RT to VNIs. * The Import RTs of all VNIs are maintained in a hash table with each * RT linking to all VNIs that will import routes matching this RT. diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 535d2fc5f434..9ca20c949a30 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -207,6 +207,8 @@ static __attribute__((__noreturn__)) void bgp_exit(int status) bgp_nhg_finish(); zebra_announce_fini(&bm->zebra_announce_head); + zebra_l2_vni_fini(&bm->zebra_l2_vni_head); + zebra_l3_vni_fini(&bm->zebra_l3_vni_head); /* reverse bgp_dump_init */ bgp_dump_finish(); diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index bb0c69ca56ee..2fc5dc847fe1 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -1696,8 +1696,13 @@ DEFUN (no_router_bgp, } if (bgp->l3vni) { - vty_out(vty, "%% Please unconfigure l3vni %u\n", - bgp->l3vni); + if (CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + vty_out(vty, + "%% L3VNI %u is scheduled to be deleted. Please give it few secs and retry the command\n", + bgp->l3vni); + else + vty_out(vty, "%% Please unconfigure l3vni %u\n", bgp->l3vni); + return CMD_WARNING_CONFIG_FAILED; } diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index ac4a6bb03bf6..6b7398fbc964 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -3029,6 +3029,48 @@ static void bgp_zebra_connected(struct zclient *zclient) BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(bgp, bgp->peer); } +void bgp_zebra_process_remote_routes_for_l2vni(struct event *e) +{ + /* + * If we have learnt and retained remote routes (VTEPs, MACs) + * for this VNI, install them. + */ + install_uninstall_routes_for_vni(NULL, NULL, true); + + /* + * If there are VNIs still pending to be processed, schedule them + * after a small sleep so that CPU can be used for other purposes. + */ + if (zebra_l2_vni_count(&bm->zebra_l2_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, + 20, &bm->t_bgp_zebra_l2_vni); +} + +void bgp_zebra_process_remote_routes_for_l3vrf(struct event *e) +{ + /* + * Install/Uninstall all remote routes belonging to l3vni + * + * NOTE: + * - At this point it does not matter whether we call + * install_routes_for_vrf/uninstall_routes_for_vrf. + * - Since we pass struct bgp as NULL, + * * we iterate the bm FIFO list + * * the second variable (true) is ignored as well and + * calculated based on the BGP-VRFs flags for ADD/DELETE. + */ + install_uninstall_routes_for_vrf(NULL, true); + + /* + * If there are L3VNIs still pending to be processed, schedule them + * after a small sleep so that CPU can be used for other purposes. + */ + if (zebra_l3_vni_count(&bm->zebra_l3_vni_head)) { + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, + 20, &bm->t_bgp_zebra_l3_vni); + } +} + static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS) { esi_t esi; diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index 8deecba747b3..7e9d57cb8521 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -135,4 +135,6 @@ extern void bgp_zebra_release_label_range(uint32_t start, uint32_t end); extern enum zclient_send_status bgp_zebra_withdraw_actual(struct bgp_dest *dest, struct bgp_path_info *info, struct bgp *bgp); +extern void bgp_zebra_process_remote_routes_for_l2vni(struct event *e); +extern void bgp_zebra_process_remote_routes_for_l3vrf(struct event *e); #endif /* _QUAGGA_BGP_ZEBRA_H */ diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 7b21c29ea663..d580da4e1a38 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -3966,11 +3966,16 @@ int bgp_delete(struct bgp *bgp) afi_t afi; safi_t safi; int i; + uint32_t vni_count; + struct bgpevpn *vpn = NULL; struct bgp_dest *dest = NULL; struct bgp_dest *dest_next = NULL; struct bgp_table *dest_table = NULL; struct graceful_restart_info *gr_info; - uint32_t cnt_before, cnt_after; + uint32_t b_ann_cnt = 0, b_l2_cnt = 0, b_l3_cnt = 0; + uint32_t a_ann_cnt = 0, a_l2_cnt = 0, a_l3_cnt = 0; + struct bgp *bgp_to_proc = NULL; + struct bgp *bgp_to_proc_next = NULL; assert(bgp); @@ -3978,7 +3983,7 @@ int bgp_delete(struct bgp *bgp) * Iterate the pending dest list and remove all the dest pertaininig to * the bgp under delete. */ - cnt_before = zebra_announce_count(&bm->zebra_announce_head); + b_ann_cnt = zebra_announce_count(&bm->zebra_announce_head); for (dest = zebra_announce_first(&bm->zebra_announce_head); dest; dest = dest_next) { dest_next = zebra_announce_next(&bm->zebra_announce_head, dest); @@ -3990,10 +3995,36 @@ int bgp_delete(struct bgp *bgp) } } - cnt_after = zebra_announce_count(&bm->zebra_announce_head); - if (BGP_DEBUG(zebra, ZEBRA)) - zlog_debug("Zebra Announce Fifo cleanup count before %u and after %u during BGP %s deletion", - cnt_before, cnt_after, bgp->name_pretty); + /* + * Pop all VPNs yet to be processed for remote routes install if the + * bgp-evpn instance is getting deleted + */ + if (bgp == bgp_get_evpn()) { + b_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + vni_count = b_l2_cnt; + while (vni_count) { + vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head); + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + vni_count--; + } + } + + b_l3_cnt = zebra_l3_vni_count(&bm->zebra_l3_vni_head); + for (bgp_to_proc = zebra_l3_vni_first(&bm->zebra_l3_vni_head); bgp_to_proc; + bgp_to_proc = bgp_to_proc_next) { + bgp_to_proc_next = zebra_l3_vni_next(&bm->zebra_l3_vni_head, bgp_to_proc); + if (bgp_to_proc == bgp) + zebra_l3_vni_del(&bm->zebra_l3_vni_head, bgp_to_proc); + } + + if (BGP_DEBUG(zebra, ZEBRA)) { + a_ann_cnt = zebra_announce_count(&bm->zebra_announce_head); + a_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + a_l3_cnt = zebra_l3_vni_count(&bm->zebra_l3_vni_head); + zlog_debug("BGP %s deletion FIFO cnt Zebra_Ann before %u after %u, L2_VNI before %u after, %u L3_VNI before %u after %u", + bgp->name_pretty, b_ann_cnt, a_ann_cnt, b_l2_cnt, a_l2_cnt, b_l3_cnt, + a_l3_cnt); + } bgp_soft_reconfig_table_task_cancel(bgp, NULL, NULL); @@ -8492,6 +8523,8 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, bm = &bgp_master; zebra_announce_init(&bm->zebra_announce_head); + zebra_l2_vni_init(&bm->zebra_l2_vni_head); + zebra_l3_vni_init(&bm->zebra_l3_vni_head); bm->bgp = list_new(); bm->listen_sockets = list_new(); bm->port = BGP_PORT_DEFAULT; @@ -8515,6 +8548,8 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, bm->stalepath_time = BGP_DEFAULT_STALEPATH_TIME; bm->select_defer_time = BGP_DEFAULT_SELECT_DEFERRAL_TIME; bm->rib_stale_time = BGP_DEFAULT_RIB_STALE_TIME; + bm->t_bgp_zebra_l2_vni = NULL; + bm->t_bgp_zebra_l3_vni = NULL; bgp_mac_init(); /* init the rd id space. @@ -8762,6 +8797,8 @@ void bgp_terminate(void) EVENT_OFF(bm->t_bgp_sync_label_manager); EVENT_OFF(bm->t_bgp_start_label_manager); EVENT_OFF(bm->t_bgp_zebra_route); + EVENT_OFF(bm->t_bgp_zebra_l2_vni); + EVENT_OFF(bm->t_bgp_zebra_l3_vni); bgp_mac_finish(); } diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index bb56fd355a05..f66b41abe942 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -19,6 +19,8 @@ #include "asn.h" PREDECL_LIST(zebra_announce); +PREDECL_LIST(zebra_l2_vni); +PREDECL_LIST(zebra_l3_vni); /* For union sockunion. */ #include "queue.h" @@ -204,6 +206,14 @@ struct bgp_master { /* To preserve ordering of installations into zebra across all Vrfs */ struct zebra_announce_head zebra_announce_head; + struct event *t_bgp_zebra_l2_vni; + /* To preserve ordering of processing of L2 VNIs in BGP */ + struct zebra_l2_vni_head zebra_l2_vni_head; + + struct event *t_bgp_zebra_l3_vni; + /* To preserve ordering of processing of BGP-VRFs for L3 VNIs */ + struct zebra_l3_vni_head zebra_l3_vni_head; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgp_master); @@ -554,6 +564,8 @@ struct bgp { #define BGP_FLAG_INSTANCE_HIDDEN (1ULL << 39) /* Prohibit BGP from enabling IPv6 RA on interfaces */ #define BGP_FLAG_IPV6_NO_AUTO_RA (1ULL << 40) +#define BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL (1ULL << 41) +#define BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE (1ULL << 42) /* BGP default address-families. * New peers inherit enabled afi/safis from bgp instance. @@ -868,10 +880,14 @@ struct bgp { uint64_t node_already_on_queue; uint64_t node_deferred_on_queue; + struct zebra_l3_vni_item zl3vni; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgp); +DECLARE_LIST(zebra_l3_vni, struct bgp, zl3vni); + struct bgp_interface { #define BGP_INTERFACE_MPLS_BGP_FORWARDING (1 << 0) /* L3VPN multi domain switching */ diff --git a/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_chaos_topo1.py b/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_chaos_topo1.py index 45868663a8c6..cb3104a522e2 100644 --- a/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_chaos_topo1.py +++ b/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_chaos_topo1.py @@ -21,6 +21,8 @@ import time import pytest import platform +import functools +from lib import topotest from copy import deepcopy @@ -539,6 +541,16 @@ def test_RT_verification_auto_p0(request): result = create_vrf_cfg(tgen, topo, input_dict=input_dict_vni) assert result is True, "Testcase {} :Failed \n Error: {}".format(tc_name, result) + expected = {"numL3Vnis": 0} + test_func = functools.partial( + topotest.router_json_cmp, + tgen.gears["e1"], + "show bgp l2vpn evpn vni json", + expected, + ) + _, result = topotest.run_and_expect(test_func, None, count=5, wait=3) + assert result is None, "Testcase {} :Failed \n Error: {}".format(tc_name, result) + input_dict_2 = {} for dut in ["e1"]: temp = {dut: {"bgp": []}} diff --git a/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_topo1.py b/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_topo1.py index beb4de432e22..52181a75dcca 100644 --- a/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_topo1.py +++ b/tests/topotests/evpn_type5_test_topo1/test_evpn_type5_topo1.py @@ -25,6 +25,8 @@ import time import pytest import platform +import functools +from lib import topotest from copy import deepcopy @@ -1124,7 +1126,6 @@ def test_active_standby_evpn_implementation_p1(request): ) for addr_type in ADDR_TYPES: - logger.info("Verifying only ipv4 routes") if addr_type != "ipv4": continue @@ -2050,6 +2051,18 @@ def test_bgp_attributes_for_evpn_address_family_p1(request, attribute): tc_name, result ) + expected = {"numL3Vnis": 0} + test_func = functools.partial( + topotest.router_json_cmp, + tgen.gears["d1"], + "show bgp l2vpn evpn vni json", + expected, + ) + _, result = topotest.run_and_expect(test_func, None, count=5, wait=3) + assert result is None, "Testcase {} :Failed \n Error: {}".format( + tc_name, result + ) + input_dict_2 = {} for dut in ["d1"]: temp = {dut: {"bgp": []}}