From b0800bfdf04b4fcf48504737ebfe4ba7f05268d3 Mon Sep 17 00:00:00 2001 From: Donatas Abraitis Date: Wed, 4 Dec 2024 23:38:34 +0200 Subject: [PATCH] bgpd: Validate only affected RPKI prefixes instead of a full RIB Before this fix, if rpki_sync_socket_rtr socket returns EAGAIN, then ALL routes in the RIB are revalidated which takes lots of CPU and some unnecessary traffic, e.g. if using BMP servers. With a full feed it would waste 50-80Mbps. Instead we should try to drain an existing pipe (another end), and revalidate only affected prefixes. Signed-off-by: Donatas Abraitis --- bgpd/bgp_rpki.c | 170 +++++++++++++++--------------------------------- bgpd/bgpd.c | 4 -- bgpd/bgpd.h | 1 - 3 files changed, 53 insertions(+), 122 deletions(-) diff --git a/bgpd/bgp_rpki.c b/bgpd/bgp_rpki.c index 347c5d02a1e1..04a709b350e0 100644 --- a/bgpd/bgp_rpki.c +++ b/bgpd/bgp_rpki.c @@ -155,7 +155,6 @@ static enum route_map_cmd_result_t route_match(void *rule, void *object); static void *route_match_compile(const char *arg); static void revalidate_bgp_node(struct bgp_dest *dest, afi_t afi, safi_t safi); -static void revalidate_all_routes(struct rpki_vrf *rpki_vrf); static bool rpki_debug_conf, rpki_debug_term; @@ -586,48 +585,10 @@ static void rpki_revalidate_prefix(struct event *thread) XFREE(MTYPE_BGP_RPKI_REVALIDATE, rrp); } -static void bgpd_sync_callback(struct event *thread) +static void revalidate_single_prefix(struct vrf *vrf, struct prefix prefix, afi_t afi) { struct bgp *bgp; struct listnode *node; - struct prefix prefix; - struct pfx_record rec; - struct rpki_vrf *rpki_vrf = EVENT_ARG(thread); - struct vrf *vrf = NULL; - - event_add_read(bm->master, bgpd_sync_callback, rpki_vrf, - rpki_vrf->rpki_sync_socket_bgpd, NULL); - - if (atomic_load_explicit(&rpki_vrf->rtr_update_overflow, - memory_order_seq_cst)) { - while (read(rpki_vrf->rpki_sync_socket_bgpd, &rec, - sizeof(struct pfx_record)) != -1) - ; - - atomic_store_explicit(&rpki_vrf->rtr_update_overflow, 0, - memory_order_seq_cst); - revalidate_all_routes(rpki_vrf); - return; - } - - int retval = read(rpki_vrf->rpki_sync_socket_bgpd, &rec, - sizeof(struct pfx_record)); - if (retval != sizeof(struct pfx_record)) { - RPKI_DEBUG("Could not read from rpki_sync_socket_bgpd"); - return; - } - pfx_record_to_prefix(&rec, &prefix); - - afi_t afi = (rec.prefix.ver == LRTR_IPV4) ? AFI_IP : AFI_IP6; - - if (rpki_vrf->vrfname) { - vrf = vrf_lookup_by_name(rpki_vrf->vrfname); - if (!vrf) { - zlog_err("%s(): vrf for rpki %s not found", __func__, - rpki_vrf->vrfname); - return; - } - } for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp)) { safi_t safi; @@ -655,101 +616,76 @@ static void bgpd_sync_callback(struct event *thread) } } -static void revalidate_bgp_node(struct bgp_dest *bgp_dest, afi_t afi, - safi_t safi) +static void bgpd_sync_callback(struct event *thread) { - struct bgp_adj_in *ain; - mpls_label_t *label; - uint8_t num_labels; - - for (ain = bgp_dest->adj_in; ain; ain = ain->next) { - struct bgp_path_info *path = - bgp_dest_get_bgp_path_info(bgp_dest); - - num_labels = BGP_PATH_INFO_NUM_LABELS(path); - label = num_labels ? path->extra->labels->label : NULL; - - (void)bgp_update(ain->peer, bgp_dest_get_prefix(bgp_dest), - ain->addpath_rx_id, ain->attr, afi, safi, - ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, NULL, label, - num_labels, 1, NULL); - } -} - -/* - * The act of a soft reconfig in revalidation is really expensive - * coupled with the fact that the download of a full rpki state - * from a rpki server can be expensive, let's break up the revalidation - * to a point in time in the future to allow other bgp events - * to take place too. - */ -struct rpki_revalidate_peer { + struct prefix prefix; + struct pfx_record rec; + struct rpki_vrf *rpki_vrf = EVENT_ARG(thread); + struct vrf *vrf = NULL; afi_t afi; - safi_t safi; - struct peer *peer; -}; + int retval; -static void bgp_rpki_revalidate_peer(struct event *thread) -{ - struct rpki_revalidate_peer *rvp = EVENT_ARG(thread); - - /* - * Here's the expensive bit of gnomish deviousness - */ - bgp_soft_reconfig_in(rvp->peer, rvp->afi, rvp->safi); - - XFREE(MTYPE_BGP_RPKI_REVALIDATE, rvp); -} - -static void revalidate_all_routes(struct rpki_vrf *rpki_vrf) -{ - struct bgp *bgp; - struct listnode *node; - struct vrf *vrf = NULL; + event_add_read(bm->master, bgpd_sync_callback, rpki_vrf, rpki_vrf->rpki_sync_socket_bgpd, + NULL); if (rpki_vrf->vrfname) { vrf = vrf_lookup_by_name(rpki_vrf->vrfname); if (!vrf) { - zlog_err("%s(): vrf for rpki %s not found", __func__, - rpki_vrf->vrfname); + zlog_err("%s(): vrf for rpki %s not found", __func__, rpki_vrf->vrfname); return; } } - for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp)) { - struct peer *peer; - struct listnode *peer_listnode; + if (atomic_load_explicit(&rpki_vrf->rtr_update_overflow, memory_order_seq_cst)) { + ssize_t size = 0; - if (!vrf && bgp->vrf_id != VRF_DEFAULT) - continue; - if (vrf && bgp->vrf_id != vrf->vrf_id) - continue; + retval = read(rpki_vrf->rpki_sync_socket_bgpd, &rec, sizeof(struct pfx_record)); + while (retval != -1) { + if (retval != sizeof(struct pfx_record)) + break; - for (ALL_LIST_ELEMENTS_RO(bgp->peer, peer_listnode, peer)) { - afi_t afi; - safi_t safi; + size += retval; + pfx_record_to_prefix(&rec, &prefix); + afi = (rec.prefix.ver == LRTR_IPV4) ? AFI_IP : AFI_IP6; + revalidate_single_prefix(vrf, prefix, afi); - FOREACH_AFI_SAFI (afi, safi) { - struct rpki_revalidate_peer *rvp; + retval = read(rpki_vrf->rpki_sync_socket_bgpd, &rec, + sizeof(struct pfx_record)); + } - if (!bgp->rib[afi][safi]) - continue; + RPKI_DEBUG("Socket overflow detected (%zu), revalidating affected prefixes", size); - if (!peer_established(peer->connection)) - continue; + atomic_store_explicit(&rpki_vrf->rtr_update_overflow, 0, memory_order_seq_cst); + return; + } - rvp = XCALLOC(MTYPE_BGP_RPKI_REVALIDATE, - sizeof(*rvp)); - rvp->peer = peer; - rvp->afi = afi; - rvp->safi = safi; + retval = read(rpki_vrf->rpki_sync_socket_bgpd, &rec, sizeof(struct pfx_record)); + if (retval != sizeof(struct pfx_record)) { + RPKI_DEBUG("Could not read from rpki_sync_socket_bgpd"); + return; + } + pfx_record_to_prefix(&rec, &prefix); - event_add_event( - bm->master, bgp_rpki_revalidate_peer, - rvp, 0, - &peer->t_revalidate_all[afi][safi]); - } - } + afi = (rec.prefix.ver == LRTR_IPV4) ? AFI_IP : AFI_IP6; + + revalidate_single_prefix(vrf, prefix, afi); +} + +static void revalidate_bgp_node(struct bgp_dest *bgp_dest, afi_t afi, safi_t safi) +{ + struct bgp_adj_in *ain; + mpls_label_t *label; + uint8_t num_labels; + + for (ain = bgp_dest->adj_in; ain; ain = ain->next) { + struct bgp_path_info *path = bgp_dest_get_bgp_path_info(bgp_dest); + + num_labels = BGP_PATH_INFO_NUM_LABELS(path); + label = num_labels ? path->extra->labels->label : NULL; + + (void)bgp_update(ain->peer, bgp_dest_get_prefix(bgp_dest), ain->addpath_rx_id, + ain->attr, afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, NULL, + label, num_labels, 1, NULL); } } diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index dccac3eceb19..977cf9ea4d0d 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -1227,8 +1227,6 @@ static void peer_free(struct peer *peer) bgp_reads_off(peer->connection); bgp_writes_off(peer->connection); event_cancel_event_ready(bm->master, peer->connection); - FOREACH_AFI_SAFI (afi, safi) - EVENT_OFF(peer->t_revalidate_all[afi][safi]); assert(!peer->connection->t_write); assert(!peer->connection->t_read); @@ -2677,8 +2675,6 @@ int peer_delete(struct peer *peer) bgp_reads_off(peer->connection); bgp_writes_off(peer->connection); event_cancel_event_ready(bm->master, peer->connection); - FOREACH_AFI_SAFI (afi, safi) - EVENT_OFF(peer->t_revalidate_all[afi][safi]); assert(!CHECK_FLAG(peer->connection->thread_flags, PEER_THREAD_WRITES_ON)); assert(!CHECK_FLAG(peer->connection->thread_flags, diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index df55d879e71d..aa859d0d46ff 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -1654,7 +1654,6 @@ struct peer { /* Threads. */ struct event *t_llgr_stale[AFI_MAX][SAFI_MAX]; - struct event *t_revalidate_all[AFI_MAX][SAFI_MAX]; struct event *t_refresh_stalepath; /* Thread flags. */