Skip to content

Commit

Permalink
bgpd: batch peer connection error clearing
Browse files Browse the repository at this point in the history
When peer connections encounter errors, attempt to batch some
of the clearing processing that occurs. Add a new batch object,
add multiple peers to it, if possible. Do one rib walk for the
batch, rather than one walk per peer. Use a handler callback
per batch to check and remove peers' path-infos, rather than
a work-queue and callback per peer. The original clearing code
remains; it's used for single peers.

Signed-off-by: Mark Stapp <[email protected]>
  • Loading branch information
Mark Stapp committed Nov 26, 2024
1 parent affc54a commit 7e73ebf
Show file tree
Hide file tree
Showing 8 changed files with 603 additions and 77 deletions.
3 changes: 2 additions & 1 deletion bgpd/bgp_fsm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,8 @@ void bgp_fsm_change_status(struct peer_connection *connection,
* Clearing
* (or Deleted).
*/
if (!work_queue_is_scheduled(peer->clear_node_queue) &&
if (!CHECK_FLAG(peer->flags, PEER_FLAG_CLEARING_BATCH) &&
!work_queue_is_scheduled(peer->clear_node_queue) &&
status != Deleted)
BGP_EVENT_ADD(connection, Clearing_Completed);
}
Expand Down
2 changes: 2 additions & 0 deletions bgpd/bgp_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,6 @@ DECLARE_MTYPE(BGP_SOFT_VERSION);

DECLARE_MTYPE(BGP_EVPN_OVERLAY);

DECLARE_MTYPE(CLEARING_BATCH);

#endif /* _QUAGGA_BGP_MEMORY_H */
59 changes: 0 additions & 59 deletions bgpd/bgp_packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -4196,62 +4196,3 @@ void bgp_send_delayed_eor(struct bgp *bgp)
for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer))
bgp_write_proceed_actions(peer);
}

/*
* Task callback in the main pthread to handle socket error
* encountered in the io pthread. We avoid having the io pthread try
* to enqueue fsm events or mess with the peer struct.
*/

/* Max number of peers to process without rescheduling */
#define BGP_CONN_ERROR_DEQUEUE_MAX 10

void bgp_packet_process_error(struct event *thread)
{
struct peer_connection *connection;
struct peer *peer;
struct bgp *bgp;
int counter = 0;
bool more_p = false;

bgp = EVENT_ARG(thread);

/* Dequeue peers from the error list */
while ((peer = bgp_dequeue_conn_err_peer(bgp, &more_p)) != NULL) {
connection = peer->connection;

if (bgp_debug_neighbor_events(peer))
zlog_debug("%s [Event] BGP error %d on fd %d",
peer->host, peer->connection_errcode,
connection->fd);

/* Closed connection or error on the socket */
if (peer_established(connection)) {
if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
|| CHECK_FLAG(peer->flags,
PEER_FLAG_GRACEFUL_RESTART_HELPER))
&& CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
} else
peer->last_reset = PEER_DOWN_CLOSE_SESSION;
}

/* No need for keepalives, if enabled */
bgp_keepalives_off(connection);

bgp_event_update(connection, peer->connection_errcode);

counter++;
if (counter >= BGP_CONN_ERROR_DEQUEUE_MAX)
break;
}

/* Reschedule event if necessary */
if (more_p)
bgp_conn_err_reschedule(bgp);

if (bgp_debug_neighbor_events(NULL))
zlog_debug("%s: dequeued and processed %d peers", __func__,
counter);
}
2 changes: 0 additions & 2 deletions bgpd/bgp_packet.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ extern void bgp_process_packet(struct event *event);

extern void bgp_send_delayed_eor(struct bgp *bgp);

/* Task callback to handle socket error encountered in the io pthread */
void bgp_packet_process_error(struct event *thread);
extern struct bgp_notify
bgp_notify_decapsulate_hard_reset(struct bgp_notify *notify);
extern bool bgp_has_graceful_restart_notification(struct peer *peer);
Expand Down
232 changes: 232 additions & 0 deletions bgpd/bgp_route.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@

#include "bgpd/bgp_route_clippy.c"

/* Memory for batched clearing of peers from the RIB */
DEFINE_MTYPE(BGPD, CLEARING_BATCH, "Clearing batch");

DEFINE_HOOK(bgp_snmp_update_stats,
(struct bgp_dest *rn, struct bgp_path_info *pi, bool added),
(rn, pi, added));
Expand Down Expand Up @@ -6186,11 +6189,240 @@ void bgp_clear_route(struct peer *peer, afi_t afi, safi_t safi)
peer_unlock(peer);
}

/*
* Callback scheduled to process prefixes/dests for batch clearing; the
* dests were found via a rib walk.
* The one-peer version of this uses a per-peer workqueue to manage
* rescheduling, but we're just using a fixed limit here.
*/

/* Limit the number of dests we'll process per callback */
#define BGP_CLEARING_BATCH_MAX_DESTS 100

static void bgp_clear_batch_dests_task(struct event *event)
{
struct bgp_clearing_info *cinfo = EVENT_ARG(event);
struct bgp_dest *dest;
struct bgp_path_info *pi;
struct bgp_table *table;
struct bgp *bgp;
afi_t afi;
safi_t safi;
int counter = 0;

bgp = cinfo->bgp;

next_dest:

dest = bgp_clearing_batch_next_dest(cinfo);
if (dest == NULL)
goto done;

table = bgp_dest_table(dest);
afi = table->afi;
safi = table->safi;

/* Have to check every path: it is possible that we have multiple paths
* for a prefix from a peer if that peer is using AddPath.
*/
for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) {
if (!bgp_clearing_batch_check_peer(cinfo, pi->peer))
continue;

/* graceful restart STALE flag set. */
if (((CHECK_FLAG(pi->peer->sflags, PEER_STATUS_NSF_WAIT)
&& pi->peer->nsf[afi][safi])
|| CHECK_FLAG(pi->peer->af_sflags[afi][safi],
PEER_STATUS_ENHANCED_REFRESH))
&& !CHECK_FLAG(pi->flags, BGP_PATH_STALE)
&& !CHECK_FLAG(pi->flags, BGP_PATH_UNUSEABLE))
bgp_path_info_set_flag(dest, pi, BGP_PATH_STALE);
else {
/* If this is an EVPN route, process for
* un-import. */
if (safi == SAFI_EVPN)
bgp_evpn_unimport_route(
bgp, afi, safi,
bgp_dest_get_prefix(dest), pi);
/* Handle withdraw for VRF route-leaking and L3VPN */
if (SAFI_UNICAST == safi
&& (bgp->inst_type == BGP_INSTANCE_TYPE_VRF ||
bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_from_vrf_withdraw(bgp_get_default(),
bgp, pi);
}
if (SAFI_MPLS_VPN == safi &&
bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT) {
vpn_leak_to_vrf_withdraw(pi);
}

bgp_rib_remove(dest, pi, pi->peer, afi, safi);
}
}

/* Unref this dest and table */
bgp_dest_unlock_node(dest);
bgp_table_unlock(bgp_dest_table(dest));

counter++;
if (counter < BGP_CLEARING_BATCH_MAX_DESTS)
goto next_dest;

done:

/* If there are still dests to process, reschedule. */
if (bgp_clearing_batch_dests_present(cinfo)) {
if (bgp_debug_neighbor_events(NULL))
zlog_debug("%s: Batch %p: Rescheduled after processing %d dests",
__func__, cinfo, counter);

event_add_event(bm->master, bgp_clear_batch_dests_task, cinfo,
0, &cinfo->t_sched);
} else {
if (bgp_debug_neighbor_events(NULL))
zlog_debug("%s: Batch %p: Done after processing %d dests",
__func__, cinfo, counter);
bgp_clearing_batch_completed(cinfo);
}

return;
}

/*
* Walk a single table for batch peer clearing processing
*/
static void clear_batch_table_helper(struct bgp_clearing_info *cinfo,
struct bgp_table *table)
{
struct bgp_dest *dest;
bool force = (cinfo->bgp->process_queue == NULL);
uint32_t examined = 0, queued = 0;

for (dest = bgp_table_top(table); dest; dest = bgp_route_next(dest)) {
struct bgp_path_info *pi, *next;
struct bgp_adj_in *ain;
struct bgp_adj_in *ain_next;

examined++;

ain = dest->adj_in;
while (ain) {
ain_next = ain->next;

if (bgp_clearing_batch_check_peer(cinfo, ain->peer))
bgp_adj_in_remove(&dest, ain);

ain = ain_next;

assert(dest != NULL);
}

for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = next) {
next = pi->next;
if (!bgp_clearing_batch_check_peer(cinfo, pi->peer))
continue;

queued++;

if (force) {
bgp_path_info_reap(dest, pi);
} else {
/* Unlocked after processing */
bgp_table_lock(bgp_dest_table(dest));
bgp_dest_lock_node(dest);

bgp_clearing_batch_add_dest(cinfo, dest);
break;
}
}
}

if (examined > 0) {
if (bgp_debug_neighbor_events(NULL))
zlog_debug("%s: %s/%s: examined %u, queued %u",
__func__, afi2str(table->afi),
safi2str(table->safi), examined, queued);
}
}

/*
* RIB-walking helper for batch clearing work: walk all tables, identify
* dests that are affected by the peers in the batch, enqueue the dests for
* async processing.
*/
static void clear_batch_rib_helper(struct bgp_clearing_info *cinfo)
{
afi_t afi;
safi_t safi;
struct bgp_dest *dest;
struct bgp_table *table;

FOREACH_AFI_SAFI (afi, safi) {
/* Identify table to be examined */
if (safi != SAFI_MPLS_VPN && safi != SAFI_ENCAP &&
safi != SAFI_EVPN) {
table = cinfo->bgp->rib[afi][safi];
if (!table)
continue;

clear_batch_table_helper(cinfo, table);
} else {
for (dest = bgp_table_top(cinfo->bgp->rib[afi][safi]);
dest; dest = bgp_route_next(dest)) {
table = bgp_dest_get_bgp_table_info(dest);
if (!table)
continue;

/* TODO -- record the tables we've seen
* and don't repeat any?
*/

clear_batch_table_helper(cinfo, table);
}
}
}
}

/*
* Identify prefixes that need to be cleared for a batch of peers in 'cinfo'.
* The actual clearing processing will be done async...
*/
void bgp_clear_route_batch(struct bgp_clearing_info *cinfo)
{
if (bgp_debug_neighbor_events(NULL))
zlog_debug("%s: BGP %s, batch %p", __func__,
cinfo->bgp->name_pretty, cinfo);

/* Walk the rib, checking the peers in the batch */
clear_batch_rib_helper(cinfo);

/* If we found some prefixes, schedule a task to begin work. */
if (bgp_clearing_batch_dests_present(cinfo))
event_add_event(bm->master, bgp_clear_batch_dests_task, cinfo,
0, &cinfo->t_sched);

/* NB -- it's the caller's job to clean up, release refs, etc. if
* we didn't find any dests
*/
}

void bgp_clear_route_all(struct peer *peer)
{
afi_t afi;
safi_t safi;

/* We may be able to batch multiple peers' clearing work: check
* and see.
*/
if (bgp_clearing_batch_add_peer(peer->bgp, peer)) {
if (bgp_debug_neighbor_events(peer))
zlog_debug("%s: peer %pBP batched", __func__, peer);
return;
}

if (bgp_debug_neighbor_events(peer))
zlog_debug("%s: peer %pBP", __func__, peer);

FOREACH_AFI_SAFI (afi, safi)
bgp_clear_route(peer, afi, safi);

Expand Down
3 changes: 3 additions & 0 deletions bgpd/bgp_route.h
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,9 @@ extern void bgp_soft_reconfig_table_task_cancel(const struct bgp *bgp,
extern bool bgp_soft_reconfig_in(struct peer *peer, afi_t afi, safi_t safi);
extern void bgp_clear_route(struct peer *, afi_t, safi_t);
extern void bgp_clear_route_all(struct peer *);
/* Clear routes for a batch of peers */
void bgp_clear_route_batch(struct bgp_clearing_info *cinfo);

extern void bgp_clear_adj_in(struct peer *, afi_t, safi_t);
extern void bgp_clear_stale_route(struct peer *, afi_t, safi_t);
extern void bgp_set_stale_route(struct peer *peer, afi_t afi, safi_t safi);
Expand Down
Loading

0 comments on commit 7e73ebf

Please sign in to comment.