Skip to content

Commit

Permalink
Merge pull request #17229 from donaldsharp/bgp_update_optimizations
Browse files Browse the repository at this point in the history
Optimizations and problem fixing for large scale ecmp from bgp
  • Loading branch information
ton31337 authored Oct 25, 2024
2 parents c5231ba + 138935a commit ba3836c
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 44 deletions.
37 changes: 35 additions & 2 deletions bgpd/bgp_aspath.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ static struct aspath *aspath_new(enum asnotation_mode asnotation)

as = XCALLOC(MTYPE_AS_PATH, sizeof(struct aspath));
as->asnotation = asnotation;
as->count = 0;

return as;
}

Expand Down Expand Up @@ -399,6 +401,11 @@ unsigned int aspath_count_confeds(struct aspath *aspath)
}

unsigned int aspath_count_hops(const struct aspath *aspath)
{
return aspath->count;
}

static unsigned int aspath_count_hops_internal(const struct aspath *aspath)
{
int count = 0;
struct assegment *seg = aspath->segments;
Expand Down Expand Up @@ -708,6 +715,7 @@ struct aspath *aspath_dup(struct aspath *aspath)
else
new->str[0] = '\0';

new->count = aspath->count;
return new;
}

Expand All @@ -729,6 +737,7 @@ static void *aspath_hash_alloc(void *arg)
new->str_len = aspath->str_len;
new->json = aspath->json;
new->asnotation = aspath->asnotation;
new->count = aspath->count;

return new;
}
Expand Down Expand Up @@ -856,6 +865,8 @@ struct aspath *aspath_parse(struct stream *s, size_t length, int use32bit,
if (assegments_parse(s, length, &as.segments, use32bit) < 0)
return NULL;

as.count = aspath_count_hops_internal(&as);

/* If already same aspath exist then return it. */
find = hash_get(ashash, &as, aspath_hash_alloc);

Expand Down Expand Up @@ -1032,7 +1043,7 @@ static struct assegment *aspath_aggregate_as_set_add(struct aspath *aspath,
asset->as[asset->length - 1] = as;
}


aspath->count = aspath_count_hops_internal(aspath);
return asset;
}

Expand Down Expand Up @@ -1113,6 +1124,8 @@ struct aspath *aspath_aggregate(struct aspath *as1, struct aspath *as2)

assegment_normalise(aspath->segments);
aspath_str_update(aspath, false);
aspath->count = aspath_count_hops_internal(aspath);

return aspath;
}

Expand Down Expand Up @@ -1268,6 +1281,7 @@ struct aspath *aspath_replace_regex_asn(struct aspath *aspath,
}

aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);
return new;
}

Expand All @@ -1293,6 +1307,8 @@ struct aspath *aspath_replace_specific_asn(struct aspath *aspath,
}

aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);

return new;
}

Expand All @@ -1315,6 +1331,8 @@ struct aspath *aspath_replace_all_asn(struct aspath *aspath, as_t our_asn)
}

aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);

return new;
}

Expand All @@ -1341,6 +1359,8 @@ struct aspath *aspath_replace_private_asns(struct aspath *aspath, as_t asn,
}

aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);

return new;
}

Expand Down Expand Up @@ -1413,6 +1433,7 @@ struct aspath *aspath_remove_private_asns(struct aspath *aspath, as_t peer_asn)
if (!aspath->refcnt)
aspath_free(aspath);
aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);
return new;
}

Expand Down Expand Up @@ -1469,6 +1490,7 @@ static struct aspath *aspath_merge(struct aspath *as1, struct aspath *as2)
last->next = as2->segments;
as2->segments = new;
aspath_str_update(as2, false);
as2->count = aspath_count_hops_internal(as2);
return as2;
}

Expand All @@ -1486,6 +1508,7 @@ struct aspath *aspath_prepend(struct aspath *as1, struct aspath *as2)
if (as2->segments == NULL) {
as2->segments = assegment_dup_all(as1->segments);
aspath_str_update(as2, false);
as2->count = aspath_count_hops_internal(as2);
return as2;
}

Expand All @@ -1506,6 +1529,7 @@ struct aspath *aspath_prepend(struct aspath *as1, struct aspath *as2)
if (!as2->segments) {
as2->segments = assegment_dup_all(as1->segments);
aspath_str_update(as2, false);
as2->count = aspath_count_hops_internal(as2);
return as2;
}

Expand Down Expand Up @@ -1551,6 +1575,7 @@ struct aspath *aspath_prepend(struct aspath *as1, struct aspath *as2)
* the inbetween AS_SEQUENCE of seg2 in the process
*/
aspath_str_update(as2, false);
as2->count = aspath_count_hops_internal(as2);
return as2;
} else {
/* AS_SET merge code is needed at here. */
Expand Down Expand Up @@ -1662,6 +1687,7 @@ struct aspath *aspath_filter_exclude(struct aspath *source,
lastseg = newseg;
}
aspath_str_update(newpath, false);
newpath->count = aspath_count_hops_internal(newpath);
/* We are happy returning even an empty AS_PATH, because the
* administrator
* might expect this very behaviour. There's a mean to avoid this, if
Expand All @@ -1680,6 +1706,7 @@ struct aspath *aspath_filter_exclude_all(struct aspath *source)
newpath = aspath_new(source->asnotation);

aspath_str_update(newpath, false);
newpath->count = aspath_count_hops_internal(newpath);
/* We are happy returning even an empty AS_PATH, because the
* administrator
* might expect this very behaviour. There's a mean to avoid this, if
Expand Down Expand Up @@ -1767,6 +1794,7 @@ struct aspath *aspath_filter_exclude_acl(struct aspath *source,


aspath_str_update(source, false);
source->count = aspath_count_hops_internal(source);
/* We are happy returning even an empty AS_PATH, because the
* administrator
* might expect this very behaviour. There's a mean to avoid this, if
Expand Down Expand Up @@ -1805,6 +1833,7 @@ static struct aspath *aspath_add_asns(struct aspath *aspath, as_t asno,
}

aspath_str_update(aspath, false);
aspath->count = aspath_count_hops_internal(aspath);
return aspath;
}

Expand Down Expand Up @@ -1896,6 +1925,7 @@ struct aspath *aspath_reconcile_as4(struct aspath *aspath,
if (!hops) {
newpath = aspath_dup(as4path);
aspath_str_update(newpath, false);
/* dup sets the count properly */
return newpath;
}

Expand Down Expand Up @@ -1957,6 +1987,7 @@ struct aspath *aspath_reconcile_as4(struct aspath *aspath,
aspath_free(newpath);
mergedpath->segments = assegment_normalise(mergedpath->segments);
aspath_str_update(mergedpath, false);
mergedpath->count = aspath_count_hops_internal(mergedpath);

if (BGP_DEBUG(as4, AS4))
zlog_debug("[AS4] result of synthesizing is %s",
Expand Down Expand Up @@ -2027,8 +2058,10 @@ struct aspath *aspath_delete_confed_seq(struct aspath *aspath)
seg = next;
}

if (removed_confed_segment)
if (removed_confed_segment) {
aspath_str_update(aspath, false);
aspath->count = aspath_count_hops_internal(aspath);
}

return aspath;
}
Expand Down
1 change: 1 addition & 0 deletions bgpd/bgp_aspath.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ struct aspath {
and AS path regular expression match. */
char *str;
unsigned short str_len;
uint32_t count;

/* AS notation used by string expression of AS path */
enum asnotation_mode asnotation;
Expand Down
10 changes: 10 additions & 0 deletions bgpd/bgp_fsm.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
EVENT_OFF(going_away->t_delayopen);
EVENT_OFF(going_away->t_connect_check_r);
EVENT_OFF(going_away->t_connect_check_w);
EVENT_OFF(going_away->t_stop_with_notify);
EVENT_OFF(keeper->t_routeadv);
EVENT_OFF(keeper->t_connect);
EVENT_OFF(keeper->t_delayopen);
Expand Down Expand Up @@ -1475,6 +1476,8 @@ enum bgp_fsm_state_progress bgp_stop(struct peer_connection *connection)
EVENT_OFF(connection->t_connect_check_r);
EVENT_OFF(connection->t_connect_check_w);

EVENT_OFF(connection->t_stop_with_notify);

/* Stop all timers. */
EVENT_OFF(connection->t_start);
EVENT_OFF(connection->t_connect);
Expand Down Expand Up @@ -3032,3 +3035,10 @@ void bgp_peer_gr_flags_update(struct peer *peer)
}
}
}

void bgp_event_stop_with_notify(struct event *event)
{
struct peer_connection *connection = EVENT_ARG(event);

bgp_stop_with_notify(connection, BGP_NOTIFY_SEND_HOLD_ERR, 0);
}
1 change: 1 addition & 0 deletions bgpd/bgp_fsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ enum bgp_fsm_state_progress {
extern void bgp_fsm_nht_update(struct peer_connection *connection,
struct peer *peer, bool has_valid_nexthops);
extern void bgp_event(struct event *event);
extern void bgp_event_stop_with_notify(struct event *event);
extern int bgp_event_update(struct peer_connection *connection,
enum bgp_fsm_events event);
extern enum bgp_fsm_state_progress bgp_stop(struct peer_connection *connection);
Expand Down
60 changes: 28 additions & 32 deletions bgpd/bgp_packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,42 +122,38 @@ static void bgp_packet_add(struct peer_connection *connection,
peer->last_sendq_ok = monotime(NULL);

stream_fifo_push(connection->obuf, s);
}

delta = monotime(NULL) - peer->last_sendq_ok;
delta = monotime(NULL) - peer->last_sendq_ok;

if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
holdtime = atomic_load_explicit(&peer->holdtime,
memory_order_relaxed);
else
holdtime = peer->bgp->default_holdtime;
if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
holdtime = atomic_load_explicit(&peer->holdtime, memory_order_relaxed);
else
holdtime = peer->bgp->default_holdtime;

sendholdtime = holdtime * 2;
sendholdtime = holdtime * 2;

/* Note that when we're here, we're adding some packet to the
* OutQ. That includes keepalives when there is nothing to
* do, so there's a guarantee we pass by here once in a while.
*
* That implies there is no need to go set up another separate
* timer that ticks down SendHoldTime, as we'll be here sooner
* or later anyway and will see the checks below failing.
*/
if (!holdtime) {
/* no holdtime, do nothing. */
} else if (delta > sendholdtime) {
flog_err(
EC_BGP_SENDQ_STUCK_PROPER,
"%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
peer, sendholdtime);
bgp_stop_with_notify(connection,
BGP_NOTIFY_SEND_HOLD_ERR, 0);
} else if (delta > (intmax_t)holdtime &&
monotime(NULL) - peer->last_sendq_warn > 5) {
flog_warn(
EC_BGP_SENDQ_STUCK_WARN,
"%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
peer, holdtime);
peer->last_sendq_warn = monotime(NULL);
}
/* Note that when we're here, we're adding some packet to the
* OutQ. That includes keepalives when there is nothing to
* do, so there's a guarantee we pass by here once in a while.
*
* That implies there is no need to go set up another separate
* timer that ticks down SendHoldTime, as we'll be here sooner
* or later anyway and will see the checks below failing.
*/
if (!holdtime) {
/* no holdtime, do nothing. */
} else if (delta > sendholdtime) {
flog_err(EC_BGP_SENDQ_STUCK_PROPER,
"%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
peer, sendholdtime);
event_add_event(bm->master, bgp_event_stop_with_notify, connection, 0,
&connection->t_stop_with_notify);
} else if (delta > (intmax_t)holdtime && monotime(NULL) - peer->last_sendq_warn > 5) {
flog_warn(EC_BGP_SENDQ_STUCK_WARN,
"%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
peer, holdtime);
peer->last_sendq_warn = monotime(NULL);
}
}

Expand Down
22 changes: 13 additions & 9 deletions bgpd/bgp_route.c
Original file line number Diff line number Diff line change
Expand Up @@ -1133,9 +1133,9 @@ int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
/* 4. AS path length check. */
if (!CHECK_FLAG(bgp->flags, BGP_FLAG_ASPATH_IGNORE)) {
int exist_hops = aspath_count_hops(existattr->aspath);
int exist_confeds = aspath_count_confeds(existattr->aspath);

if (CHECK_FLAG(bgp->flags, BGP_FLAG_ASPATH_CONFED)) {
int exist_confeds = aspath_count_confeds(existattr->aspath);
int aspath_hops;

aspath_hops = aspath_count_hops(newattr->aspath);
Expand Down Expand Up @@ -4676,10 +4676,12 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
* will not be interned. In which case, it is ok to update the
* attr->evpn_overlay, so that, this can be stored in adj_in.
*/
if ((afi == AFI_L2VPN) && evpn)
bgp_attr_set_evpn_overlay(attr, evpn);
else
evpn_overlay_free(evpn);
if (evpn) {
if (afi == AFI_L2VPN)
bgp_attr_set_evpn_overlay(attr, evpn);
else
evpn_overlay_free(evpn);
}
bgp_adj_in_set(dest, peer, attr, addpath_id, &bgp_labels);
}

Expand Down Expand Up @@ -4855,10 +4857,12 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
* attr->evpn_overlay with evpn directly. Instead memcpy
* evpn to new_atr.evpn_overlay before it is interned.
*/
if (soft_reconfig && (afi == AFI_L2VPN) && evpn)
bgp_attr_set_evpn_overlay(&new_attr, evpn);
else
evpn_overlay_free(evpn);
if (soft_reconfig && evpn) {
if (afi == AFI_L2VPN)
bgp_attr_set_evpn_overlay(&new_attr, evpn);
else
evpn_overlay_free(evpn);
}

/* Apply incoming route-map.
* NB: new_attr may now contain newly allocated values from route-map
Expand Down
2 changes: 2 additions & 0 deletions bgpd/bgpd.h
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,8 @@ struct peer_connection {
struct event *t_process_packet;
struct event *t_process_packet_error;

struct event *t_stop_with_notify;

union sockunion su;
#define BGP_CONNECTION_SU_UNSPEC(connection) \
(connection->su.sa.sa_family == AF_UNSPEC)
Expand Down
2 changes: 1 addition & 1 deletion zebra/kernel_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
} while (status == -1 && errno == EINTR);

if (status == -1) {
if (errno == EWOULDBLOCK || errno == EAGAIN)
if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EMSGSIZE)
return 0;
flog_err(EC_ZEBRA_RECVMSG_OVERRUN, "%s recvmsg overrun: %s",
nl->name, safe_strerror(errno));
Expand Down

0 comments on commit ba3836c

Please sign in to comment.