From 87cd8a05415c52412c535c131322331a66f0d0a5 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Mon, 30 Sep 2024 11:20:17 -0400 Subject: [PATCH] zebra: On startup actually allow for nhe's to be early Recent commits have moved zebra's nexthop group cache entries to be figured out after the dplane has started up. As such this leaves us with a situation where the cache entries startup time *can* be/is greater than the startup time for the purposes of graceful restart. Just notice that we are in startup mode and do the right thing. Signed-off-by: Donald Sharp --- zebra/zebra_nhg.c | 31 +++++++++++++++++++++++++------ zebra/zebra_nhg.h | 2 ++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index 4ee9dc5fcf8e..f2a673f6d9d0 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -977,6 +977,11 @@ static int nhg_ctx_get_afi(const struct nhg_ctx *ctx) return ctx->afi; } +static bool nhg_ctx_get_startup(const struct nhg_ctx *ctx) +{ + return ctx->startup; +} + static struct nexthop *nhg_ctx_get_nh(struct nhg_ctx *ctx) { return &ctx->u.nh; @@ -1028,10 +1033,9 @@ void nhg_ctx_free(struct nhg_ctx **ctx) XFREE(MTYPE_NHG_CTX, *ctx); } -static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh, - struct nh_grp *grp, vrf_id_t vrf_id, - afi_t afi, int type, uint8_t count, - struct nhg_resilience *resilience) +static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh, struct nh_grp *grp, + vrf_id_t vrf_id, afi_t afi, int type, uint8_t count, + struct nhg_resilience *resilience, bool startup) { struct nhg_ctx *ctx = NULL; @@ -1042,6 +1046,7 @@ static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh, ctx->afi = afi; ctx->type = type; ctx->count = count; + ctx->startup = startup; if (resilience) ctx->resilience = *resilience; @@ -1204,6 +1209,7 @@ static int nhg_ctx_process_new(struct nhg_ctx *ctx) vrf_id_t vrf_id = nhg_ctx_get_vrf_id(ctx); int type = nhg_ctx_get_type(ctx); afi_t afi = nhg_ctx_get_afi(ctx); + bool startup = nhg_ctx_get_startup(ctx); lookup = zebra_nhg_lookup_id(id); @@ -1260,6 +1266,19 @@ static int nhg_ctx_process_new(struct nhg_ctx *ctx) SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + /* + * On startup Zebra is creating the nexthop group cache entry + * after the router has it's startup time set. This is because + * the process of grabbing routes and nexthops is now *after* + * the dataplane starts up, which is after the routers startup + * time is set. So let's just cheat a tiny bit on the time + * and set the nexthop group hash entry startup time to be + * slightly before the zrouter.startup_time. Then graceful + * restart sweeping will work properly for these nexthop entries + */ + if (startup) + nhe->uptime = zrouter.startup_time - 1; + return 0; } @@ -1368,7 +1387,7 @@ int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp, */ id_counter = id; - ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count, nhgr); + ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count, nhgr, startup); nhg_ctx_set_op(ctx, NHG_CTX_OP_NEW); /* Under statup conditions, we need to handle them immediately @@ -1391,7 +1410,7 @@ int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id) { struct nhg_ctx *ctx = NULL; - ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0, NULL); + ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0, NULL, false); nhg_ctx_set_op(ctx, NHG_CTX_OP_DEL); diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h index 712c1057a1a8..6ae2041878c9 100644 --- a/zebra/zebra_nhg.h +++ b/zebra/zebra_nhg.h @@ -242,6 +242,8 @@ struct nhg_ctx { struct nhg_resilience resilience; enum nhg_ctx_op_e op; enum nhg_ctx_status status; + + bool startup; }; /* Global control to disable use of kernel nexthops, if available. We can't