From a272a2b364604f829aa21f323c0422613b423c43 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Thu, 19 Oct 2023 16:38:12 -0400 Subject: [PATCH] zebra: Allow longer prefix matches for nexthops Zebra currently does a shortest prefix match for resolving nexthops for a prefix. This is typically an ok thing to do but fails in several specific scenarios. If a nexthop matches to a route that is not usable, nexthop resolution just gives up and refuses to use that particular route. For example if zebra currently has a covering prefix say a 10.0.0.0/8. And about the same time it receives a 10.1.0.0/16 ( a more specific than the /8 ) and another route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16 is processed enough to know we want to install it and the prefix is sent to the dataplane for installation( it is queued ) and then route A is processed, nexthop resolution will fail and the route A will be left in limbo as uninstallable. Let's modify the nexthop resolution code in zebra such that if a nexthop's most specific match is unusable, continue looking up the table till we get to the 0.0.0.0/0 route( if it's even installed ). If we find a usable route for the nexthop accept it and use it. The bgp_default_originate topology test is frequently failing with this exact problem: B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21 B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21 B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21 C>* 1.0.3.17/32 is directly connected, lo, 00:02:00 B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32 B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21 B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21 C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00 C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00 B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32 B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21 B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32 Notice that the 1.0.1.17/32 route is inactive but the nexthop 192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match ) *and* the 0.0.0.0/0 route ( longest match ). When looking at the logs the 1.0.1.17/32 route was not being installed because the matching route was not in a usable state, which is because the 192.168.0.0/24 route was in the process of being installed. Signed-off-by: Donald Sharp --- .../test_all_protocol_startup.py | 6 +- zebra/zebra_nhg.c | 82 ++++++++----------- 2 files changed, 39 insertions(+), 49 deletions(-) diff --git a/tests/topotests/all_protocol_startup/test_all_protocol_startup.py b/tests/topotests/all_protocol_startup/test_all_protocol_startup.py index c319477c8ae3..ca340749feb7 100644 --- a/tests/topotests/all_protocol_startup/test_all_protocol_startup.py +++ b/tests/topotests/all_protocol_startup/test_all_protocol_startup.py @@ -599,16 +599,16 @@ def test_nexthop_groups(): count = 0 dups = [] nhg_id = route_get_nhg_id("6.6.6.1/32") - while (len(dups) != 3) and count < 10: + while (len(dups) != 4) and count < 10: output = net["r1"].cmd('vtysh -c "show nexthop-group rib %d"' % nhg_id) dups = re.findall(r"(via 1\.1\.1\.1)", output) - if len(dups) != 3: + if len(dups) != 4: count += 1 sleep(1) # Should find 3, itself is inactive - assert len(dups) == 3, ( + assert len(dups) == 4, ( "Route 6.6.6.1/32 with Nexthop Group ID=%d has wrong number of resolved nexthops" % nhg_id ) diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index 6517b7830bb0..19e2657f1df9 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -2248,20 +2248,6 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, return 1; } - if (top && - ((top->family == AF_INET && top->prefixlen == IPV4_MAX_BITLEN && - nexthop->gate.ipv4.s_addr == top->u.prefix4.s_addr) || - (top->family == AF_INET6 && top->prefixlen == IPV6_MAX_BITLEN && - memcmp(&nexthop->gate.ipv6, &top->u.prefix6, IPV6_MAX_BYTELEN) == - 0)) && - nexthop->vrf_id == vrf_id) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - " :%s: Attempting to install a max prefixlength route through itself", - __func__); - return 0; - } - /* Validation for ipv4 mapped ipv6 nexthop. */ if (IS_MAPPED_IPV6(&nexthop->gate.ipv6)) { afi = AFI_IP; @@ -2364,7 +2350,7 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, zlog_debug( " %s: Matched against ourself and prefix length is not max bit length", __func__); - return 0; + goto continue_up_tree; } /* Pick up selected route. */ @@ -2391,20 +2377,12 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, /* If there is no selected route or matched route is EGP, go up * tree. */ - if (!match) { - do { - rn = rn->parent; - } while (rn && rn->info == NULL); - if (rn) - route_lock_node(rn); - continue; - } /* If the candidate match's type is considered "connected", * we consider it first. */ - if (RIB_CONNECTED_ROUTE(match) || - (RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type))) { + if (match && (RIB_CONNECTED_ROUTE(match) || + (RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type)))) { match = zebra_nhg_connected_ifindex(rn, match, nexthop->ifindex); @@ -2420,11 +2398,7 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, zlog_debug( "%s: %pNHv given ifindex does not match nexthops ifindex found: %pNHv", __func__, nexthop, newhop); - /* - * NEXTHOP_TYPE_*_IFINDEX but ifindex - * doesn't match what we found. - */ - return 0; + goto continue_up_tree; } /* NHRP special case: need to indicate onlink */ @@ -2437,7 +2411,7 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, __func__, match, match->nhe, newhop); return 1; - } else if (CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) { + } else if (match && CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) { struct nexthop_group *nhg; struct nexthop *resolver; struct backup_nh_map_s map = {}; @@ -2473,6 +2447,10 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, "%s: match %p (%pNG) not installed or being Route Replaced", __func__, match, match->nhe); + if (CHECK_FLAG(match->status, + ROUTE_ENTRY_QUEUED)) + goto continue_up_tree; + goto done_with_match; } @@ -2541,25 +2519,37 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, if (pmtu) *pmtu = match->mtu; - } else if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - " %s: Recursion failed to find", - __func__); - - return resolved; - } else { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) { - zlog_debug( - " %s: Route Type %s has not turned on recursion", - __func__, zebra_route_string(type)); - if (type == ZEBRA_ROUTE_BGP - && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP)) + } else { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug( - " EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\""); + " %s: Recursion failed to find while looking at %pRN", + __func__, rn); + goto continue_up_tree; } - return 0; + + return 1; + } else if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + zlog_debug( + " %s: Route Type %s has not turned on recursion %pRN failed to match", + __func__, zebra_route_string(type), rn); + if (type == ZEBRA_ROUTE_BGP + && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP)) + zlog_debug( + " EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\""); } + + continue_up_tree: + /* + * If there is no selected route or matched route is EGP, go up + * tree. + */ + do { + rn = rn->parent; + } while (rn && rn->info == NULL); + if (rn) + route_lock_node(rn); } + if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug(" %s: Nexthop did not lookup in table", __func__);