Skip to content

Commit

Permalink
zebra: On startup actually allow for nhe's to be early
Browse files Browse the repository at this point in the history
Recent commits have moved zebra's nexthop group cache
entries to be figured out after the dplane has started up.
As such this leaves us with a situation where the cache
entries startup time *can* be/is greater than the startup
time for the purposes of graceful restart.  Just notice
that we are in startup mode and do the right thing.

Signed-off-by: Donald Sharp <[email protected]>
  • Loading branch information
donaldsharp committed Sep 30, 2024
1 parent 3941fb4 commit 87cd8a0
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
31 changes: 25 additions & 6 deletions zebra/zebra_nhg.c
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,11 @@ static int nhg_ctx_get_afi(const struct nhg_ctx *ctx)
return ctx->afi;
}

static bool nhg_ctx_get_startup(const struct nhg_ctx *ctx)
{
return ctx->startup;
}

static struct nexthop *nhg_ctx_get_nh(struct nhg_ctx *ctx)
{
return &ctx->u.nh;
Expand Down Expand Up @@ -1028,10 +1033,9 @@ void nhg_ctx_free(struct nhg_ctx **ctx)
XFREE(MTYPE_NHG_CTX, *ctx);
}

static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh,
struct nh_grp *grp, vrf_id_t vrf_id,
afi_t afi, int type, uint8_t count,
struct nhg_resilience *resilience)
static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh, struct nh_grp *grp,
vrf_id_t vrf_id, afi_t afi, int type, uint8_t count,
struct nhg_resilience *resilience, bool startup)
{
struct nhg_ctx *ctx = NULL;

Expand All @@ -1042,6 +1046,7 @@ static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh,
ctx->afi = afi;
ctx->type = type;
ctx->count = count;
ctx->startup = startup;

if (resilience)
ctx->resilience = *resilience;
Expand Down Expand Up @@ -1204,6 +1209,7 @@ static int nhg_ctx_process_new(struct nhg_ctx *ctx)
vrf_id_t vrf_id = nhg_ctx_get_vrf_id(ctx);
int type = nhg_ctx_get_type(ctx);
afi_t afi = nhg_ctx_get_afi(ctx);
bool startup = nhg_ctx_get_startup(ctx);

lookup = zebra_nhg_lookup_id(id);

Expand Down Expand Up @@ -1260,6 +1266,19 @@ static int nhg_ctx_process_new(struct nhg_ctx *ctx)
SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);

/*
* On startup Zebra is creating the nexthop group cache entry
* after the router has it's startup time set. This is because
* the process of grabbing routes and nexthops is now *after*
* the dataplane starts up, which is after the routers startup
* time is set. So let's just cheat a tiny bit on the time
* and set the nexthop group hash entry startup time to be
* slightly before the zrouter.startup_time. Then graceful
* restart sweeping will work properly for these nexthop entries
*/
if (startup)
nhe->uptime = zrouter.startup_time - 1;

return 0;
}

Expand Down Expand Up @@ -1368,7 +1387,7 @@ int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp,
*/
id_counter = id;

ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count, nhgr);
ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count, nhgr, startup);
nhg_ctx_set_op(ctx, NHG_CTX_OP_NEW);

/* Under statup conditions, we need to handle them immediately
Expand All @@ -1391,7 +1410,7 @@ int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id)
{
struct nhg_ctx *ctx = NULL;

ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0, NULL);
ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0, NULL, false);

nhg_ctx_set_op(ctx, NHG_CTX_OP_DEL);

Expand Down
2 changes: 2 additions & 0 deletions zebra/zebra_nhg.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ struct nhg_ctx {
struct nhg_resilience resilience;
enum nhg_ctx_op_e op;
enum nhg_ctx_status status;

bool startup;
};

/* Global control to disable use of kernel nexthops, if available. We can't
Expand Down

0 comments on commit 87cd8a0

Please sign in to comment.