Skip to content

Commit

Permalink
*: Add and use option for graceful (re)start
Browse files Browse the repository at this point in the history
Add a new start option "-K" to libfrr to denote a graceful start,
and use it in zebra and bgpd.

zebra will use this option to denote a planned FRR graceful restart
(supporting only bgpd currently) to wait for a route sync completion
from bgpd before cleaning up old stale routes from the FIB. An optional
timer provides an upper-bounds for this cleanup.

bgpd will use this option to denote either a planned FRR graceful
restart or a bgpd-only graceful restart, and this will drive the BGP
GR restarting router procedures.

Signed-off-by: Vivek Venkatraman <[email protected]>
  • Loading branch information
vivek-cumulus authored and donaldsharp committed Dec 18, 2024
1 parent e0dc1ba commit 9d2234b
Show file tree
Hide file tree
Showing 14 changed files with 186 additions and 63 deletions.
3 changes: 3 additions & 0 deletions bgpd/bgp_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,9 @@ int main(int argc, char **argv)
bgp_option_set(BGP_OPT_NO_FIB);
if (no_zebra_flag)
bgp_option_set(BGP_OPT_NO_ZEBRA);
if (bgpd_di.graceful_restart)
SET_FLAG(bm->flags, BM_FLAG_GRACEFUL_RESTART);

bgp_error_init();
/* Initializations. */
bgp_vrf_init();
Expand Down
3 changes: 2 additions & 1 deletion bgpd/bgp_vty.c
Original file line number Diff line number Diff line change
Expand Up @@ -3602,9 +3602,10 @@ DEFPY (neighbor_graceful_shutdown,
afi_t afi;
safi_t safi;
struct peer *peer;
VTY_DECLVAR_CONTEXT(bgp, bgp);
int ret;

VTY_DECLVAR_CONTEXT(bgp, bgp);

peer = peer_and_group_lookup_vty(vty, neighbor);
if (!peer)
return CMD_WARNING_CONFIG_FAILED;
Expand Down
6 changes: 6 additions & 0 deletions doc/user/bgp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ be specified (:ref:`common-invocation-options`).
the operator has turned off communication to zebra and is running bgpd
as a complete standalone process.

.. option:: -K, --graceful_restart

Bgpd will use this option to denote either a planned FRR graceful
restart or a bgpd-only graceful restart, and this will drive the BGP
GR restarting router procedures.

LABEL MANAGER
-------------

Expand Down
45 changes: 28 additions & 17 deletions lib/libfrr.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,37 +102,41 @@ static void opt_extend(const struct optspec *os)
#define OPTION_SCRIPTDIR 1009

static const struct option lo_always[] = {
{"help", no_argument, NULL, 'h'},
{"version", no_argument, NULL, 'v'},
{"daemon", no_argument, NULL, 'd'},
{"module", no_argument, NULL, 'M'},
{"profile", required_argument, NULL, 'F'},
{"pathspace", required_argument, NULL, 'N'},
{"vrfdefaultname", required_argument, NULL, 'o'},
{"vty_socket", required_argument, NULL, OPTION_VTYSOCK},
{"moduledir", required_argument, NULL, OPTION_MODULEDIR},
{"scriptdir", required_argument, NULL, OPTION_SCRIPTDIR},
{"log", required_argument, NULL, OPTION_LOG},
{"log-level", required_argument, NULL, OPTION_LOGLEVEL},
{"command-log-always", no_argument, NULL, OPTION_LOGGING},
{"limit-fds", required_argument, NULL, OPTION_LIMIT_FDS},
{NULL}};
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'v' },
{ "daemon", no_argument, NULL, 'd' },
{ "module", no_argument, NULL, 'M' },
{ "profile", required_argument, NULL, 'F' },
{ "pathspace", required_argument, NULL, 'N' },
{ "vrfdefaultname", required_argument, NULL, 'o' },
{ "graceful_restart", optional_argument, NULL, 'K' },
{ "vty_socket", required_argument, NULL, OPTION_VTYSOCK },
{ "moduledir", required_argument, NULL, OPTION_MODULEDIR },
{ "scriptdir", required_argument, NULL, OPTION_SCRIPTDIR },
{ "log", required_argument, NULL, OPTION_LOG },
{ "log-level", required_argument, NULL, OPTION_LOGLEVEL },
{ "command-log-always", no_argument, NULL, OPTION_LOGGING },
{ "limit-fds", required_argument, NULL, OPTION_LIMIT_FDS },
{ NULL }
};
static const struct optspec os_always = {
"hvdM:F:N:o:",
"hvdM:F:N:o:K::",
" -h, --help Display this help and exit\n"
" -v, --version Print program version\n"
" -d, --daemon Runs in daemon mode\n"
" -M, --module Load specified module\n"
" -F, --profile Use specified configuration profile\n"
" -N, --pathspace Insert prefix into config & socket paths\n"
" -o, --vrfdefaultname Set default VRF name.\n"
" -K, --graceful_restart FRR starting in Graceful Restart mode, with optional route-cleanup timer\n"
" --vty_socket Override vty socket path\n"
" --moduledir Override modules directory\n"
" --scriptdir Override scripts directory\n"
" --log Set Logging to stdout, syslog, or file:<name>\n"
" --log-level Set Logging Level to use, debug, info, warn, etc\n"
" --limit-fds Limit number of fds supported\n",
lo_always};
lo_always
};

static bool logging_to_stdout = false; /* set when --log stdout specified */

Expand Down Expand Up @@ -353,6 +357,8 @@ void frr_preinit(struct frr_daemon_info *daemon, int argc, char **argv)
strlcpy(frr_protonameinst, di->logname, sizeof(frr_protonameinst));

di->cli_mode = FRR_CLI_CLASSIC;
di->graceful_restart = false;
di->gr_cleanup_time = 0;

/* we may be starting with extra FDs open for whatever purpose,
* e.g. logging, some module, etc. Recording them here allows later
Expand Down Expand Up @@ -515,6 +521,11 @@ static int frr_opt(int opt)
di->db_file = optarg;
break;
#endif
case 'K':
di->graceful_restart = true;
if (optarg)
di->gr_cleanup_time = atoi(optarg);
break;
case 'C':
if (di->flags & FRR_NO_SPLIT_CONFIG)
return 1;
Expand Down
2 changes: 2 additions & 0 deletions lib/libfrr.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ struct frr_daemon_info {
bool dryrun;
bool daemon_mode;
bool terminal;
bool graceful_restart;
int gr_cleanup_time;
enum frr_cli_mode cli_mode;

struct event *read_in;
Expand Down
33 changes: 19 additions & 14 deletions zebra/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ struct mgmt_be_client *mgmt_be_client;
/* Route retain mode flag. */
int retain_mode = 0;

int graceful_restart;

/* Receive buffer size for kernel control sockets */
#define RCVBUFSIZE_MIN 4194304
#ifdef HAVE_NETLINK
Expand All @@ -93,15 +91,14 @@ const struct option longopts[] = {
{ "socket", required_argument, NULL, 'z' },
{ "ecmp", required_argument, NULL, 'e' },
{ "retain", no_argument, NULL, 'r' },
{ "graceful_restart", required_argument, NULL, 'K' },
{ "asic-offload", optional_argument, NULL, OPTION_ASIC_OFFLOAD },
{ "v6-with-v4-nexthops", no_argument, NULL, OPTION_V6_WITH_V4_NEXTHOP },
#ifdef HAVE_NETLINK
{ "vrfwnetns", no_argument, NULL, 'n' },
{ "nl-bufsize", required_argument, NULL, 's' },
{ "v6-rr-semantics", no_argument, NULL, OPTION_V6_RR_SEMANTICS },
#endif /* HAVE_NETLINK */
{"routing-table", optional_argument, NULL, 'R'},
{ "routing-table", optional_argument, NULL, 'R' },
{ 0 }
};

Expand Down Expand Up @@ -353,7 +350,6 @@ int main(int argc, char **argv)
bool v6_with_v4_nexthop = false;
bool notify_on_ack = true;

graceful_restart = 0;
vrf_configure_backend(VRF_BACKEND_VRF_LITE);

frr_preinit(&zebra_di, argc, argv);
Expand All @@ -369,7 +365,6 @@ int main(int argc, char **argv)
" -z, --socket Set path of zebra socket\n"
" -e, --ecmp Specify ECMP to use.\n"
" -r, --retain When program terminates, retain added route by zebra.\n"
" -K, --graceful_restart Graceful restart at the kernel level, timer in seconds for expiration\n"
" -A, --asic-offload FRR is interacting with an asic underneath the linux kernel\n"
" --v6-with-v4-nexthops Underlying dataplane supports v6 routes with v4 nexthops\n"
#ifdef HAVE_NETLINK
Expand All @@ -379,8 +374,7 @@ int main(int argc, char **argv)
#else
" -s, Set kernel socket receive buffer size\n"
#endif /* HAVE_NETLINK */
" -R, --routing-table Set kernel routing table\n"
);
" -R, --routing-table Set kernel routing table\n");

while (1) {
int opt = frr_getopt(argc, argv, NULL);
Expand Down Expand Up @@ -424,9 +418,6 @@ int main(int argc, char **argv)
case 'r':
retain_mode = 1;
break;
case 'K':
graceful_restart = atoi(optarg);
break;
case 's':
rcvbufsize = atoi(optarg);
if (rcvbufsize < RCVBUFSIZE_MIN)
Expand Down Expand Up @@ -522,17 +513,31 @@ int main(int argc, char **argv)
* Clean up zebra-originated routes. The requests will be sent to OS
* immediately, so originating PID in notifications from kernel
* will be equal to the current getpid(). To know about such routes,
* we have to have route_read() called before.
* If FRR is gracefully restarting, we either wait for clients
* (e.g., BGP) to signal GR is complete else we wait for specified
* duration.
*/
zrouter.startup_time = monotime_nano();
event_add_timer(zrouter.master, rib_sweep_route, NULL, graceful_restart,
&zrouter.sweeper);

#if defined(HAVE_CUMULUS) && defined(HAVE_CSMGR)
if (zrouter.frr_csm_smode == FAST_START ||
zrouter.frr_csm_smode == WARM_START)
zrouter.graceful_restart = true;
zrouter.maint_mode = (zrouter.frr_csm_smode == MAINT);
#endif
zrouter.rib_sweep_time = 0;
zrouter.graceful_restart = zebra_di.graceful_restart;
if (!zrouter.graceful_restart)
event_add_timer(zrouter.master, rib_sweep_route, NULL, 0, NULL);
else {
int gr_cleanup_time;

gr_cleanup_time = zebra_di.gr_cleanup_time
? zebra_di.gr_cleanup_time
: ZEBRA_GR_DEFAULT_RIB_SWEEP_TIME;
event_add_timer(zrouter.master, rib_sweep_route, NULL,
gr_cleanup_time, &zrouter.t_rib_sweep);
}

/* Needed for BSD routing socket. */
pid = getpid();
Expand Down
4 changes: 2 additions & 2 deletions zebra/rib.h
Original file line number Diff line number Diff line change
Expand Up @@ -624,10 +624,10 @@ static inline struct nexthop_group *rib_get_fib_backup_nhg(
}

extern void zebra_gr_process_client(afi_t afi, vrf_id_t vrf_id, uint8_t proto,
uint8_t instance);
uint8_t instance, time_t restart_time);

extern int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto,
uint8_t instance);
uint8_t instance, time_t restart_time);

extern void zebra_vty_init(void);

Expand Down
76 changes: 52 additions & 24 deletions zebra/zebra_gr.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ static struct client_gr_info *zebra_gr_client_info_create(struct zserv *client)
info->stale_client_ptr = client;

TAILQ_INSERT_TAIL(&(client->gr_info_queue), info, gr_info);
info->client_ptr = client;
return info;
}

Expand Down Expand Up @@ -410,7 +411,7 @@ void zread_client_capabilities(ZAPI_HANDLER_ARGS)
* Schedule for after anything already in the meta Q
*/
rib_add_gr_run(api.afi, api.vrf_id, client->proto,
client->instance);
client->instance, client->restart_time);
zebra_gr_process_client_stale_routes(client, info);
break;
case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING:
Expand Down Expand Up @@ -445,7 +446,11 @@ static void zebra_gr_route_stale_delete_timer_expiry(struct event *thread)
struct zserv *client;
struct vrf *vrf = vrf_lookup_by_id(info->vrf_id);

client = (struct zserv *)info->stale_client_ptr;
info->t_stale_removal = NULL;
if (zrouter.graceful_restart)
client = (struct zserv *)info->client_ptr;
else
client = (struct zserv *)info->stale_client_ptr;

cnt = zebra_gr_delete_stale_routes(info);

Expand Down Expand Up @@ -476,15 +481,24 @@ static void zebra_gr_route_stale_delete_timer_expiry(struct event *thread)
*
* Returns true when a node is deleted else false
*/
static bool zebra_gr_process_route_entry(struct zserv *client, struct route_node *rn,
struct route_entry *re, uint64_t compare_time)
static bool zebra_gr_process_route_entry(struct route_node *rn,
struct route_entry *re,
uint64_t compare_time, uint8_t proto)
{
struct nexthop *nexthop;
char buf[PREFIX2STR_BUFFER];

/* If the route is not refreshed after restart, delete the entry */
if (re->uptime < compare_time) {
if (IS_ZEBRA_DEBUG_RIB)
zlog_debug("%s: Client %s stale route %pFX is deleted",
__func__, zebra_route_string(client->proto),
&rn->p);
if (IS_ZEBRA_DEBUG_RIB) {
prefix2str(&rn->p, buf, sizeof(buf));
zlog_debug("%s: Client %s stale route %s is deleted",
__func__, zebra_route_string(proto), buf);
}
SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
for (ALL_NEXTHOPS(re->nhe->nhg, nexthop))
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);

rib_delnode(rn, re);

return true;
Expand Down Expand Up @@ -520,8 +534,7 @@ static void zebra_gr_delete_stale_route_table_afi(struct event *event)
*/

if (re->type == gac->proto && re->instance == gac->instance &&
zebra_gr_process_route_entry(gac->info->stale_client_ptr, rn, re,
gac->restart_time))
zebra_gr_process_route_entry(rn, re, gac->restart_time, gac->proto))
n++;

/* If the max route count is reached
Expand Down Expand Up @@ -555,29 +568,42 @@ static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info,
uint8_t proto;
uint16_t instance;
struct zserv *s_client;
//uint64_t restart_time;
struct zserv *client;
uint64_t restart_time;

s_client = info->stale_client_ptr;
if (s_client == NULL) {
LOG_GR("%s: Stale client %s(%u) not present", __func__,
zvrf->vrf->name, zvrf->vrf->vrf_id);
if ((info == NULL) || (zvrf == NULL))
return -1;
}

proto = s_client->proto;
instance = s_client->instance;
if (zrouter.graceful_restart) {
client = info->client_ptr;
if (client == NULL) {
LOG_GR("%s: client not present", __func__);
return -1;
}
proto = client->proto;
instance = client->instance;
restart_time = zrouter.startup_time;
} else {
s_client = info->stale_client_ptr;
if (s_client == NULL) {
LOG_GR("%s: Stale client not present", __func__);
return -1;
}
proto = s_client->proto;
instance = s_client->instance;
restart_time = s_client->restart_time;
}

LOG_GR("%s: Client %s %s(%u) stale routes are being deleted", __func__,
zebra_route_string(proto), zvrf->vrf->name, zvrf->vrf->vrf_id);

/* Process routes for all AFI */
for (afi = AFI_IP; afi < AFI_MAX; afi++) {

/*
* Schedule for immediately after anything in the
* meta-Q
*/
rib_add_gr_run(afi, info->vrf_id, proto, instance);
rib_add_gr_run(afi, info->vrf_id, proto, instance, restart_time);
}
return 0;
}
Expand Down Expand Up @@ -630,20 +656,21 @@ static void zebra_gr_process_client_stale_routes(struct zserv *client,

/*
* Route update completed for all AFI, SAFI
* Cancel the stale timer, routes are already being processed
* Also perform the cleanup if FRR itself is gracefully restarting.
*/
if (info->t_stale_removal) {
info->route_sync_done_time = monotime(NULL);
if (info->t_stale_removal || zrouter.graceful_restart) {
struct vrf *vrf = vrf_lookup_by_id(info->vrf_id);

LOG_GR("%s: Client %s canceled stale delete timer vrf %s(%d)",
LOG_GR("%s: Client %s route update complete for all AFI/SAFI in vrf %s(%d)",
__func__, zebra_route_string(client->proto),
VRF_LOGNAME(vrf), info->vrf_id);
EVENT_OFF(info->t_stale_removal);
}
}

void zebra_gr_process_client(afi_t afi, vrf_id_t vrf_id, uint8_t proto,
uint8_t instance)
uint8_t instance, time_t restart_time)
{
struct zserv *client = zserv_find_client(proto, instance);
struct client_gr_info *info = NULL;
Expand All @@ -665,6 +692,7 @@ void zebra_gr_process_client(afi_t afi, vrf_id_t vrf_id, uint8_t proto,
gac->afi = afi;
gac->proto = proto;
gac->instance = instance;
gac->restart_time = restart_time;

event_add_event(zrouter.master, zebra_gr_delete_stale_route_table_afi,
gac, 0, &gac->t_gac);
Expand Down
Loading

0 comments on commit 9d2234b

Please sign in to comment.