zebra: On startup actually allow for nhe's to be early

Recent commits have moved zebra's nexthop group cache entries to be figured out after the dplane has started up. As such this leaves us with a situation where the cache entries startup time *can* be/is greater than the startup time for the purposes of graceful restart. Just notice that we are in startup mode and do the right thing. Signed-off-by: Donald Sharp <[email protected]>
FRRouting · Sep 30, 2024 · 87cd8a0 · 87cd8a0
1 parent 3941fb4
commit 87cd8a0
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 6 deletions.
diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c
@@ -977,6 +977,11 @@ static int nhg_ctx_get_afi(const struct nhg_ctx *ctx)
 	return ctx->afi;
 }
 
+static bool nhg_ctx_get_startup(const struct nhg_ctx *ctx)
+{
+	return ctx->startup;
+}
+
 static struct nexthop *nhg_ctx_get_nh(struct nhg_ctx *ctx)
 {
 	return &ctx->u.nh;
@@ -1028,10 +1033,9 @@ void nhg_ctx_free(struct nhg_ctx **ctx)
 	XFREE(MTYPE_NHG_CTX, *ctx);
 }
 
-static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh,
-				    struct nh_grp *grp, vrf_id_t vrf_id,
-				    afi_t afi, int type, uint8_t count,
-				    struct nhg_resilience *resilience)
+static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh, struct nh_grp *grp,
+				    vrf_id_t vrf_id, afi_t afi, int type, uint8_t count,
+				    struct nhg_resilience *resilience, bool startup)
 {
 	struct nhg_ctx *ctx = NULL;
 
@@ -1042,6 +1046,7 @@ static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh,
 	ctx->afi = afi;
 	ctx->type = type;
 	ctx->count = count;
+	ctx->startup = startup;
 
 	if (resilience)
 		ctx->resilience = *resilience;
@@ -1204,6 +1209,7 @@ static int nhg_ctx_process_new(struct nhg_ctx *ctx)
 	vrf_id_t vrf_id = nhg_ctx_get_vrf_id(ctx);
 	int type = nhg_ctx_get_type(ctx);
 	afi_t afi = nhg_ctx_get_afi(ctx);
+	bool startup = nhg_ctx_get_startup(ctx);
 
 	lookup = zebra_nhg_lookup_id(id);
 
@@ -1260,6 +1266,19 @@ static int nhg_ctx_process_new(struct nhg_ctx *ctx)
 	SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
 	SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
 
+	/*
+	 * On startup Zebra is creating the nexthop group cache entry
+	 * after the router has it's startup time set.  This is because
+	 * the process of grabbing routes and nexthops is now *after*
+	 * the dataplane starts up, which is after the routers startup
+	 * time is set.  So let's just cheat a tiny bit on the time
+	 * and set the nexthop group hash entry startup time to be
+	 * slightly before the zrouter.startup_time.  Then graceful
+	 * restart sweeping will work properly for these nexthop entries
+	 */
+	if (startup)
+		nhe->uptime = zrouter.startup_time - 1;
+
 	return 0;
 }
 
@@ -1368,7 +1387,7 @@ int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp,
 		 */
 		id_counter = id;
 
-	ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count, nhgr);
+	ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count, nhgr, startup);
 	nhg_ctx_set_op(ctx, NHG_CTX_OP_NEW);
 
 	/* Under statup conditions, we need to handle them immediately
@@ -1391,7 +1410,7 @@ int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id)
 {
 	struct nhg_ctx *ctx = NULL;
 
-	ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0, NULL);
+	ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0, NULL, false);
 
 	nhg_ctx_set_op(ctx, NHG_CTX_OP_DEL);
 

diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h
@@ -242,6 +242,8 @@ struct nhg_ctx {
 	struct nhg_resilience resilience;
 	enum nhg_ctx_op_e op;
 	enum nhg_ctx_status status;
+
+	bool startup;
 };
 
 /* Global control to disable use of kernel nexthops, if available. We can't