diff --git a/tests/topotests/zebra_nht_double_recursion/r1/bgpd.conf b/tests/topotests/zebra_nht_double_recursion/r1/bgpd.conf new file mode 100644 index 000000000000..8295efca7c2d --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r1/bgpd.conf @@ -0,0 +1,12 @@ +router bgp 65001 + no bgp ebgp-requires-policy + no bgp network import-check + neighbor 10.0.0.2 remote-as internal + neighbor 10.0.0.2 update-source lo + neighbor 10.0.0.2 timers 1 3 + neighbor 10.0.0.2 timers connect 1 + neighbor 10.0.0.3 remote-as internal + neighbor 10.0.0.3 update-source lo + neighbor 10.0.0.3 timers 1 3 + neighbor 10.0.0.3 timers connect 1 +! diff --git a/tests/topotests/zebra_nht_double_recursion/r1/ospfd.conf b/tests/topotests/zebra_nht_double_recursion/r1/ospfd.conf new file mode 100644 index 000000000000..098bf57b0365 --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r1/ospfd.conf @@ -0,0 +1,17 @@ +! +interface lo + ip ospf passive +! +interface r1-eth0 + ip ospf dead-interval 4 + ip ospf hello-interval 1 + ip ospf cost 10 +! +interface r1-eth1 + ip ospf dead-interval 4 + ip ospf hello-interval 1 + ip ospf cost 30 +! +router ospf + router-id 10.0.0.1 + network 0.0.0.0/0 area 0 diff --git a/tests/topotests/zebra_nht_double_recursion/r1/zebra.conf b/tests/topotests/zebra_nht_double_recursion/r1/zebra.conf new file mode 100644 index 000000000000..0ed22d37be8b --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r1/zebra.conf @@ -0,0 +1,10 @@ +! +interface lo + ip address 10.0.0.1/32 +! +interface r1-eth0 + ip address 192.168.12.1/24 +! +interface r1-eth1 + ip address 192.168.13.1/24 +! diff --git a/tests/topotests/zebra_nht_double_recursion/r2/bgpd.conf b/tests/topotests/zebra_nht_double_recursion/r2/bgpd.conf new file mode 100644 index 000000000000..4750c7d6cfdc --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r2/bgpd.conf @@ -0,0 +1,18 @@ +router bgp 65001 + no bgp ebgp-requires-policy + no bgp network import-check + neighbor 10.0.0.1 remote-as internal + neighbor 10.0.0.1 update-source lo + neighbor 10.0.0.1 timers 1 3 + neighbor 10.0.0.1 timers connect 1 + neighbor 192.168.24.4 remote-as external + neighbor 192.168.24.4 timers 1 3 + neighbor 192.168.24.4 timers connect 1 + address-family ipv4 + redistribute connected route-map connected-to-bgp + exit-address-family +! +route-map connected-to-bgp permit 10 + set metric 100 + set community no-export +! diff --git a/tests/topotests/zebra_nht_double_recursion/r2/ospfd.conf b/tests/topotests/zebra_nht_double_recursion/r2/ospfd.conf new file mode 100644 index 000000000000..106a46251d2e --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r2/ospfd.conf @@ -0,0 +1,13 @@ +! +interface lo + ip ospf passive +! +interface r2-eth0 + ip ospf dead-interval 4 + ip ospf hello-interval 1 + ip ospf cost 10 +! +router ospf + router-id 10.0.0.2 + network 192.168.12.0/24 area 0 + network 10.0.0.2/32 area 0 diff --git a/tests/topotests/zebra_nht_double_recursion/r2/zebra.conf b/tests/topotests/zebra_nht_double_recursion/r2/zebra.conf new file mode 100644 index 000000000000..6d6a557caae7 --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r2/zebra.conf @@ -0,0 +1,10 @@ +! +interface lo + ip address 10.0.0.2/32 +! +interface r2-eth0 + ip address 192.168.12.2/24 +! +interface r2-eth1 + ip address 192.168.24.2/24 +! diff --git a/tests/topotests/zebra_nht_double_recursion/r3/bgpd.conf b/tests/topotests/zebra_nht_double_recursion/r3/bgpd.conf new file mode 100644 index 000000000000..79f99dbe749b --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r3/bgpd.conf @@ -0,0 +1,18 @@ +router bgp 65001 + no bgp ebgp-requires-policy + no bgp network import-check + neighbor 10.0.0.1 remote-as internal + neighbor 10.0.0.1 update-source lo + neighbor 10.0.0.1 timers 1 3 + neighbor 10.0.0.1 timers connect 1 + neighbor 192.168.34.4 remote-as external + neighbor 192.168.34.4 timers 1 3 + neighbor 192.168.34.4 timers connect 1 + address-family ipv4 + redistribute connected route-map connected-to-bgp + exit-address-family +! +route-map connected-to-bgp permit 10 + set metric 100 + set community no-export +! diff --git a/tests/topotests/zebra_nht_double_recursion/r3/ospfd.conf b/tests/topotests/zebra_nht_double_recursion/r3/ospfd.conf new file mode 100644 index 000000000000..9ede3a1fab64 --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r3/ospfd.conf @@ -0,0 +1,13 @@ +! +interface lo + ip ospf passive +! +interface r3-eth0 + ip ospf dead-interval 4 + ip ospf hello-interval 1 + ip ospf cost 30 +! +router ospf + router-id 10.0.0.3 + network 192.168.13.0/24 area 0 + network 10.0.0.3/32 area 0 diff --git a/tests/topotests/zebra_nht_double_recursion/r3/zebra.conf b/tests/topotests/zebra_nht_double_recursion/r3/zebra.conf new file mode 100644 index 000000000000..2ae098e0f378 --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r3/zebra.conf @@ -0,0 +1,10 @@ +! +interface lo + ip address 10.0.0.3/32 +! +interface r3-eth0 + ip address 192.168.13.3/24 +! +interface r3-eth1 + ip address 192.168.34.3/24 +! diff --git a/tests/topotests/zebra_nht_double_recursion/r4/bgpd.conf b/tests/topotests/zebra_nht_double_recursion/r4/bgpd.conf new file mode 100644 index 000000000000..8dc31df5e58d --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r4/bgpd.conf @@ -0,0 +1,13 @@ +router bgp 65002 + no bgp ebgp-requires-policy + no bgp network import-check + neighbor 192.168.24.2 remote-as external + neighbor 192.168.24.2 timers 1 3 + neighbor 192.168.24.2 timers connect 1 + neighbor 192.168.34.3 remote-as external + neighbor 192.168.34.3 timers 1 3 + neighbor 192.168.34.3 timers connect 1 + address-family ipv4 + network 10.0.0.4/32 + exit-address-family +! diff --git a/tests/topotests/zebra_nht_double_recursion/r4/zebra.conf b/tests/topotests/zebra_nht_double_recursion/r4/zebra.conf new file mode 100644 index 000000000000..421569eab1fe --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/r4/zebra.conf @@ -0,0 +1,10 @@ +! +interface lo + ip address 10.0.0.4/32 +! +interface r4-eth0 + ip address 192.168.24.4/24 +! +interface r4-eth1 + ip address 192.168.34.4/24 +! diff --git a/tests/topotests/zebra_nht_double_recursion/test_nht_double_recusion.py b/tests/topotests/zebra_nht_double_recursion/test_nht_double_recusion.py new file mode 100644 index 000000000000..9ce9848d57ca --- /dev/null +++ b/tests/topotests/zebra_nht_double_recursion/test_nht_double_recusion.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: GPL-2.0-or-later + +# +# Copyright (C) 2024 by Palo Alto Networks, Inc. +# Enke Chen +# + +""" +Test to make sure the nexthop being tracked is resolved recursivley to +an OSPF route so that the right IGP metric is given to BGP for bestpath +calculation. + +In this test, on r2/r3, the "next-hop-self" is not configured, and the +connected routes are redistributed into BGP for the nexthop reachability +of the EBGP routes. + +IP Addresses for the loopback interfaces: + + r1: 10.0.0.1/32 + r2: 10.0.0.2/32 + r3: 10.0.0.3/32 + r4: 10.0.0.4/32 + +10.0.0.4/32 is redistributed into BGP on r4, and is advertised to both +r2 and r3. Then r1 receives 10.0.0.4/32 from both r2 and r3, and each of +the nexthops is resolved to another BGP route which is then resolved to +an OSPF route. + +On r1, the best path for 10.0.0.4/32 should be the one from r2 due to +the lower IGP metric (10 vs 30) to the nexthop. + + + 10 30 + r2 ------- r1 (UUT) ------- r3 + | | + ebgp | | ebgp + | | + +----------- r4 -------------+ + .24.4 .34.4 + + +On r1 (UUT): + +[BGP] 10.0.0.4/32: nexthop 192.168.24.4 + nexthop 192.168.34.4 + +192.168.24.4 --> [BGP] 192.168.24.0/24 +192.168.34.4 --> [BGP] 192.168.34.0/24 + +[BGP] 192.168.24.0/24: nexthop 10.0.0.2 +[BGP] 192.168.34.0/24: nexthop 10.0.0.3 + +10.0.0.2 --> [OSPF] 10.0.0.2/32, nexthop xxx, intf xxx +10.0.0.3 --> [OSPF] 10.0.0.3/32, nexthop xxx, intf xxx +""" + +import os +import sys +import json +import pytest +import functools + +CWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(CWD, "../")) + +# pylint: disable=C0413 +from lib import topotest +from lib.topogen import Topogen, TopoRouter, get_topogen +from lib.common_config import step + +pytestmark = [pytest.mark.bgpd] + + +def build_topo(tgen): + for routern in range(1, 5): + tgen.add_router("r{}".format(routern)) + + switch = tgen.add_switch("s1") + switch.add_link(tgen.gears["r1"]) + switch.add_link(tgen.gears["r2"]) + + switch = tgen.add_switch("s2") + switch.add_link(tgen.gears["r1"]) + switch.add_link(tgen.gears["r3"]) + + switch = tgen.add_switch("s3") + switch.add_link(tgen.gears["r2"]) + switch.add_link(tgen.gears["r4"]) + + switch = tgen.add_switch("s4") + switch.add_link(tgen.gears["r3"]) + switch.add_link(tgen.gears["r4"]) + + +def setup_module(mod): + tgen = Topogen(build_topo, mod.__name__) + tgen.start_topology() + + router_list = tgen.routers() + + for i, (rname, router) in enumerate(router_list.items(), 1): + router.load_config( + TopoRouter.RD_ZEBRA, os.path.join(CWD, "{}/zebra.conf".format(rname)) + ) + router.load_config( + TopoRouter.RD_OSPF, os.path.join(CWD, "{}/ospfd.conf".format(rname)) + ) + router.load_config( + TopoRouter.RD_BGP, os.path.join(CWD, "{}/bgpd.conf".format(rname)) + ) + + tgen.start_router() + + +def teardown_module(mod): + tgen = get_topogen() + tgen.stop_topology() + + +def test_nht_recursive(): + tgen = get_topogen() + + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + r1 = tgen.gears["r1"] + r2 = tgen.gears["r2"] + r3 = tgen.gears["r3"] + r4 = tgen.gears["r4"] + + def _check_ip_nht_24_4(): + output = json.loads(r1.vtysh_cmd("show ip nht 192.168.24.4 json")) + expected = { + "default":{ + "ipv4":{ + "192.168.24.4":{ + "prefixes":[ + { + "prefix":"192.168.24.0/24" + }, + { + "prefix":"10.0.0.2/32" + } + ], + "resolvedProtocol":"ospf" + } + } + } + } + return topotest.json_cmp(output, expected) + + def _check_ip_nht_34_4(): + output = json.loads(r1.vtysh_cmd("show ip nht 192.168.34.4 json")) + expected = { + "default":{ + "ipv4":{ + "192.168.34.4":{ + "prefixes":[ + { + "prefix":"192.168.34.0/24" + }, + { + "prefix":"10.0.0.3/32" + } + ], + "resolvedProtocol":"ospf" + } + } + } + } + return topotest.json_cmp(output, expected) + + def _bgp_check_igp_metric_bestpath(): + output = json.loads(r1.vtysh_cmd("show bgp ipv4 unicast 10.0.0.4/32 json")) + expected = { + "paths": [ + { + "bestpath": {"selectionReason": "IGP Metric"}, + "nexthops": [{"ip": "192.168.24.4", "hostname": "r2", "metric": 10, "accessible": True}], + }, + { + "nexthops": [{"ip": "192.168.34.4", "hostname": "r3", "metric": 30, "accessible": True}], + }, + ] + } + return topotest.json_cmp(output, expected) + + # Check ip nht resolved to ospf + test_func = functools.partial(_check_ip_nht_24_4) + _, result = topotest.run_and_expect(test_func, None, count=20, wait=2) + assert result is None, "ip nht for 24.4 not resolved to ospf" + + # Check ip nht resolved to ospf + test_func = functools.partial(_check_ip_nht_34_4) + _, result = topotest.run_and_expect(test_func, None, count=20, wait=2) + assert result is None, "ip nht for 34.4 not resolved to ospf" + + # Check BGP bestpath selected on IGP metric + test_func = functools.partial(_bgp_check_igp_metric_bestpath) + _, result = topotest.run_and_expect(test_func, None, count=20, wait=2) + assert result is None, "BGP bestpath not selected on igp metric" + + +if __name__ == "__main__": + args = ["-s"] + sys.argv[1:] + sys.exit(pytest.main(args)) diff --git a/zebra/rib.h b/zebra/rib.h index 8484fe1291a2..bc628e8cc478 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -33,6 +33,13 @@ DECLARE_MTYPE(RE); PREDECL_LIST(rnh_list); +struct rnh; +struct rnh_item { + struct rnh_list_item next; + struct rnh *rnh; + struct prefix resolved_route; +}; + /* Nexthop structure. */ struct rnh { uint8_t flags; @@ -50,7 +57,6 @@ struct rnh { uint32_t seqno; struct route_entry *state; - struct prefix resolved_route; struct list *client_list; /* pseudowires dependent on this nh */ @@ -63,7 +69,20 @@ struct rnh { */ int filtered[ZEBRA_ROUTE_MAX]; - struct rnh_list_item rnh_list_item; +/* Level of recursions in nht */ +#define ZEBRA_NHT_RECURSION_MAX 8 + + /* + * For a resolved rnh that has a valid route entry ("state"), the array + * contains one or more routes (multiples in case of recursion). The one + * indexed by (resolved_count - 1) corresponds to the fully-resolved + * route. + * + * For an unresolved rnh, the resolved_count is 1 and the item is queued + * to 0.0.0.0/0, or 0::0/0. + */ + uint8_t resolved_count; + struct rnh_item rnh_item[ZEBRA_NHT_RECURSION_MAX]; }; #define DISTANCE_INFINITY 255 @@ -233,7 +252,7 @@ typedef struct rib_dest_t_ { } rib_dest_t; -DECLARE_LIST(rnh_list, struct rnh, rnh_list_item); +DECLARE_LIST(rnh_list, struct rnh_item, next); DECLARE_LIST(re_list, struct route_entry, next); #define RIB_ROUTE_QUEUED(x) (1 << (x)) diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index ab55998af046..a8f4c6e94734 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -677,7 +677,7 @@ static int zsend_nexthop_lookup(struct zserv *client, struct ipaddr *addr, struc stream_putw(s, 0); nhg = rib_get_fib_nhg(re); for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { - if (rnh_nexthop_valid(re, nexthop)) + if (rnh_nexthop_valid(re, nexthop, false)) num += zserv_encode_nexthop(s, nexthop); } diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 0226c355c8a9..f8a020e6cc8e 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -713,6 +713,7 @@ void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq, bool rt_delete) { rib_dest_t *dest = rib_dest_from_rnode(rn); + struct rnh_item *rnh_item; struct rnh *rnh; /* @@ -813,7 +814,8 @@ void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq, * nht resolution and as such we need to call the * nexthop tracking evaluation code */ - frr_each_safe(rnh_list, &dest->nht, rnh) { + frr_each_safe(rnh_list, &dest->nht, rnh_item) { + rnh = rnh_item->rnh; struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(rnh->vrf_id); struct prefix *p = &rnh->node->p; diff --git a/zebra/zebra_rnh.c b/zebra/zebra_rnh.c index 640e6551a774..3811c6b8570d 100644 --- a/zebra/zebra_rnh.c +++ b/zebra/zebra_rnh.c @@ -83,18 +83,20 @@ static void zebra_rnh_remove_from_routing_table(struct rnh *rnh) if (!table) return; - rn = route_node_match(table, &rnh->resolved_route); - if (!rn) - return; + for (int i = 0; i < rnh->resolved_count; i++) { + rn = route_node_match(table, &rnh->rnh_item[i].resolved_route); + if (!rn) + continue; - if (IS_ZEBRA_DEBUG_NHT_DETAILED) - zlog_debug("%s: %s(%u):%pRN removed from tracking on %pRN", - __func__, VRF_LOGNAME(zvrf->vrf), rnh->vrf_id, - rnh->node, rn); + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: %s(%u):%pRN removed from tracking on %pRN", + __func__, VRF_LOGNAME(zvrf->vrf), rnh->vrf_id, + rnh->node, rn); - dest = rib_dest_from_rnode(rn); - rnh_list_del(&dest->nht, rnh); - route_unlock_node(rn); + dest = rib_dest_from_rnode(rn); + rnh_list_del(&dest->nht, &rnh->rnh_item[i]); + route_unlock_node(rn); + } } static void zebra_rnh_store_in_routing_table(struct rnh *rnh) @@ -104,18 +106,20 @@ static void zebra_rnh_store_in_routing_table(struct rnh *rnh) struct route_node *rn; rib_dest_t *dest; - rn = route_node_match(table, &rnh->resolved_route); - if (!rn) - return; + for (int i = 0; i < rnh->resolved_count; i++) { + rn = route_node_match(table, &rnh->rnh_item[i].resolved_route); + if (!rn) + continue; - if (IS_ZEBRA_DEBUG_NHT_DETAILED) - zlog_debug("%s: %s(%u):%pRN added for tracking on %pRN", - __func__, VRF_LOGNAME(zvrf->vrf), rnh->vrf_id, - rnh->node, rn); + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: %s(%u):%pRN added for tracking on %pRN", + __func__, VRF_LOGNAME(zvrf->vrf), rnh->vrf_id, + rnh->node, rn); - dest = rib_dest_from_rnode(rn); - rnh_list_add_tail(&dest->nht, rnh); - route_unlock_node(rn); + dest = rib_dest_from_rnode(rn); + rnh_list_add_tail(&dest->nht, &rnh->rnh_item[i]); + route_unlock_node(rn); + } } struct rnh *zebra_add_rnh(struct prefix *p, vrf_id_t vrfid, safi_t safi, @@ -159,7 +163,12 @@ struct rnh *zebra_add_rnh(struct prefix *p, vrf_id_t vrfid, safi_t safi, * we should set the family so that future * comparisons can just be done */ - rnh->resolved_route.family = p->family; + rnh->resolved_count = 1; + for (int i = 0; i < ZEBRA_NHT_RECURSION_MAX; i++) { + rnh->rnh_item[i].rnh = rnh; + rnh->rnh_item[i].resolved_route.family = p->family; + } + rnh->client_list = list_new(); rnh->vrf_id = vrfid; rnh->seqno = 0; @@ -204,6 +213,8 @@ void zebra_free_rnh(struct rnh *rnh) { struct zebra_vrf *zvrf; struct route_table *table; + struct rnh_item *item; + uint8_t family; zebra_rnh_remove_from_routing_table(rnh); rnh->flags |= ZEBRA_NHT_DELETED; @@ -211,18 +222,22 @@ void zebra_free_rnh(struct rnh *rnh) list_delete(&rnh->zebra_pseudowire_list); zvrf = zebra_vrf_lookup_by_id(rnh->vrf_id); - table = zvrf->table[family2afi(rnh->resolved_route.family)][rnh->safi]; + family = rnh->rnh_item[0].resolved_route.family; + table = zvrf->table[family2afi(family)][rnh->safi]; if (table) { struct route_node *rern; - rern = route_node_match(table, &rnh->resolved_route); - if (rern) { - rib_dest_t *dest; + for (int i = 0; i < rnh->resolved_count; i++) { + item = &rnh->rnh_item[i]; + rern = route_node_match(table, &item->resolved_route); + if (rern) { + rib_dest_t *dest; - dest = rib_dest_from_rnode(rern); - rnh_list_del(&dest->nht, rnh); - route_unlock_node(rern); + dest = rib_dest_from_rnode(rern); + rnh_list_del(&dest->nht, item); + route_unlock_node(rern); + } } } free_state(rnh->vrf_id, rnh->state, rnh->node); @@ -467,30 +482,37 @@ static void zebra_rnh_notify_protocol_clients(struct zebra_vrf *zvrf, afi_t afi, * check in a couple of places, so this is a single home for the logic we * use. */ - -static const int RNH_INVALID_NH_FLAGS = (NEXTHOP_FLAG_RECURSIVE | - NEXTHOP_FLAG_DUPLICATE | - NEXTHOP_FLAG_RNH_FILTERED); - -bool rnh_nexthop_valid(const struct route_entry *re, const struct nexthop *nh) +bool rnh_nexthop_valid(const struct route_entry *re, const struct nexthop *nh, + bool recursive) { - return (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) - && CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE) - && !CHECK_FLAG(nh->flags, RNH_INVALID_NH_FLAGS)); + if (recursive) { + return (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) && + CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE) && + CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE) && + !CHECK_FLAG(nh->flags, (NEXTHOP_FLAG_DUPLICATE | + NEXTHOP_FLAG_RNH_FILTERED))); + } else { + return (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) && + CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE) && + !CHECK_FLAG(nh->flags, (NEXTHOP_FLAG_RECURSIVE | + NEXTHOP_FLAG_DUPLICATE | + NEXTHOP_FLAG_RNH_FILTERED))); + } } /* * Determine whether an re's nexthops are valid for tracking. */ -static bool rnh_check_re_nexthops(const struct route_entry *re, - const struct rnh *rnh) +static struct nexthop *rnh_check_re_nexthops(const struct route_entry *re, + const struct rnh *rnh, + bool recursive) { bool ret = false; - const struct nexthop *nexthop = NULL; + struct nexthop *nexthop; /* Check route's nexthops */ for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) { - if (rnh_nexthop_valid(re, nexthop)) + if (rnh_nexthop_valid(re, nexthop, recursive)) break; } @@ -498,7 +520,7 @@ static bool rnh_check_re_nexthops(const struct route_entry *re, if (nexthop == NULL && re->nhe->backup_info && re->nhe->backup_info->nhe) { for (ALL_NEXTHOPS(re->nhe->backup_info->nhe->nhg, nexthop)) { - if (rnh_nexthop_valid(re, nexthop)) + if (rnh_nexthop_valid(re, nexthop, recursive)) break; } } @@ -536,7 +558,24 @@ static bool rnh_check_re_nexthops(const struct route_entry *re, } done: - return ret; + return ret ? nexthop : NULL; +} + +static void rnh_init_resolved_route(afi_t afi, uint8_t *count, + struct prefix *route) +{ + *count = 1; + memset(route, 0, sizeof(struct prefix)); + route->family = afi2family(afi); +} + +static void rnh_copy_resolved_route(struct rnh *rnh, uint8_t count, + struct prefix *route) +{ + rnh->resolved_count = count; + + for (int i = 0; i < count; i++) + prefix_copy(&rnh->rnh_item[i].resolved_route, route + i); } /* @@ -546,13 +585,18 @@ static bool rnh_check_re_nexthops(const struct route_entry *re, static struct route_entry * zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, struct route_node *nrn, const struct rnh *rnh, - struct route_node **prn) + struct route_node **prn, uint8_t *res_count, + struct prefix *res_route) { struct route_table *route_table; struct route_node *rn; struct route_entry *re; + struct nexthop *nexthop; + struct prefix p; + int count; *prn = NULL; + rnh_init_resolved_route(afi, res_count, res_route); route_table = zvrf->table[afi][rnh->safi]; if (!route_table) @@ -568,6 +612,7 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, /* While resolving nexthops, we may need to walk up the tree from the * most-specific match. Do similar logic as in zebra_rib.c */ + count = 0; while (rn) { if (IS_ZEBRA_DEBUG_NHT_DETAILED) zlog_debug("%s: %s(%u):%pRN Possible Match to %pRN", @@ -586,6 +631,8 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, CHECK_FLAG( rnh->flags, ZEBRA_NHT_RESOLVE_VIA_DEFAULT)); + + rnh_init_resolved_route(afi, res_count, res_route); return NULL; } @@ -618,14 +665,48 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, /* Just being SELECTED isn't quite enough - must * have an installed nexthop to be useful. */ - if (rnh_check_re_nexthops(re, rnh)) + if (rnh_check_re_nexthops(re, rnh, false)) break; } - /* Route entry found, we're done; else, walk up the tree. */ + /* Route entry found, we're done; else, walk up the tree. + * + * In case of a recursive route (e.g., bgp), though, we need + * to keep resolving on the nexthop and get a fully-resolved + * route so that consistent parameters (incl. protocol type, + * route metric, and nexthops) are sent to NHT clients. + */ if (re) { - *prn = rn; - return re; + prefix_copy(res_route + count, &rn->p); + *res_count = ++count; + + nexthop = rnh_check_re_nexthops(re, rnh, true); + if (!nexthop) { + *prn = rn; + return re; + } + + if ((afi != AFI_IP) && (afi != AFI_IP6)) { + *prn = rn; + return re; + } + + if (count >= ZEBRA_NHT_RECURSION_MAX) { + zlog_warn("NHT reached max recursion for %pFX", + &nrn->p); + *prn = rn; + return re; + } + + addr2hostprefix(afi2family(afi), &nexthop->gate, &p); + rn = route_node_match(route_table, &p); + if (rn) + route_unlock_node(rn); + + if (IS_ZEBRA_DEBUG_NHT_DETAILED) { + zlog_debug(" continue resolve on nh %pFX, count %d", + &p, count); + } } else { /* Resolve the nexthop recursively by finding matching * route with lower prefix length @@ -634,6 +715,7 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, } } + rnh_init_resolved_route(afi, res_count, res_route); return NULL; } @@ -655,35 +737,33 @@ static void zebra_rnh_eval_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, int force, struct route_node *nrn, struct rnh *rnh, struct route_node *prn, - struct route_entry *re) + struct route_entry *re, + uint8_t res_count, + struct prefix *res_route) { int state_changed = 0; + assert((res_count > 0) && (res_count <= ZEBRA_NHT_RECURSION_MAX)); + assert((rnh->resolved_count > 0) && + (rnh->resolved_count <= ZEBRA_NHT_RECURSION_MAX)); + /* If we're resolving over a different route, resolution has changed or * the resolving route has some change (e.g., metric), there is a state * change. */ zebra_rnh_remove_from_routing_table(rnh); - if (!prefix_same(&rnh->resolved_route, prn ? &prn->p : NULL)) { - if (prn) - prefix_copy(&rnh->resolved_route, &prn->p); - else { - /* - * Just quickly store the family of the resolved - * route so that we can reset it in a second here - */ - int family = rnh->resolved_route.family; - - memset(&rnh->resolved_route, 0, sizeof(struct prefix)); - rnh->resolved_route.family = family; - } + /* Compare the fully-resolved route */ + if (!prefix_same(&rnh->rnh_item[rnh->resolved_count - 1].resolved_route, + res_route + res_count - 1)) { copy_state(rnh, re, nrn); state_changed = 1; } else if (compare_state(re, rnh->state)) { copy_state(rnh, re, nrn); state_changed = 1; } + + rnh_copy_resolved_route(rnh, res_count, res_route); zebra_rnh_store_in_routing_table(rnh); if (state_changed || force) { @@ -706,6 +786,8 @@ static void zebra_rnh_evaluate_entry(struct zebra_vrf *zvrf, afi_t afi, struct rnh *rnh; struct route_entry *re; struct route_node *prn; + uint8_t res_count; + struct prefix res_prefix[ZEBRA_NHT_RECURSION_MAX]; if (IS_ZEBRA_DEBUG_NHT) { zlog_debug("%s(%u):%pRN: Evaluate RNH, %s", @@ -716,7 +798,8 @@ static void zebra_rnh_evaluate_entry(struct zebra_vrf *zvrf, afi_t afi, rnh = nrn->info; /* Identify route entry (RE) resolving this tracked entry. */ - re = zebra_rnh_resolve_nexthop_entry(zvrf, afi, nrn, rnh, &prn); + re = zebra_rnh_resolve_nexthop_entry(zvrf, afi, nrn, rnh, &prn, + &res_count, &res_prefix[0]); /* If the entry cannot be resolved and that is also the existing state, * there is nothing further to do. @@ -725,7 +808,8 @@ static void zebra_rnh_evaluate_entry(struct zebra_vrf *zvrf, afi_t afi, return; /* Process based on type of entry. */ - zebra_rnh_eval_nexthop_entry(zvrf, afi, force, nrn, rnh, prn, re); + zebra_rnh_eval_nexthop_entry(zvrf, afi, force, nrn, rnh, prn, re, + res_count, &res_prefix[0]); } /* @@ -743,11 +827,14 @@ static void zebra_rnh_clear_nhc_flag(struct zebra_vrf *zvrf, afi_t afi, struct rnh *rnh; struct route_entry *re; struct route_node *prn; + uint8_t res_count; + struct prefix res_prefix[ZEBRA_NHT_RECURSION_MAX]; rnh = nrn->info; /* Identify route entry (RIB) resolving this tracked entry. */ - re = zebra_rnh_resolve_nexthop_entry(zvrf, afi, nrn, rnh, &prn); + re = zebra_rnh_resolve_nexthop_entry(zvrf, afi, nrn, rnh, &prn, + &res_count, &res_prefix[0]); if (re) UNSET_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED); @@ -890,7 +977,7 @@ static struct nexthop *next_valid_primary_nh(struct route_entry *re, nh = nexthop_next(nh); while (nh) { - if (rnh_nexthop_valid(re, nh)) + if (rnh_nexthop_valid(re, nh, false)) break; else nh = nexthop_next(nh); @@ -1069,10 +1156,10 @@ static bool compare_valid_nexthops(struct route_entry *r1, while (1) { /* Find each backup list's next valid nexthop */ - while ((nh1 != NULL) && !rnh_nexthop_valid(r1, nh1)) + while ((nh1 != NULL) && !rnh_nexthop_valid(r1, nh1, false)) nh1 = nexthop_next(nh1); - while ((nh2 != NULL) && !rnh_nexthop_valid(r2, nh2)) + while ((nh2 != NULL) && !rnh_nexthop_valid(r2, nh2, false)) nh2 = nexthop_next(nh2); if (nh1 && nh2) { @@ -1145,6 +1232,7 @@ int zebra_send_rnh_update(struct rnh *rnh, struct zserv *client, struct route_node *rn; int ret; uint32_t message = 0; + struct prefix *resolved; rn = rnh->node; re = rnh->state; @@ -1182,14 +1270,19 @@ int zebra_send_rnh_update(struct rnh *rnh, struct zserv *client, /* * What we matched against */ - stream_putw(s, rnh->resolved_route.family); - stream_putc(s, rnh->resolved_route.prefixlen); - switch (rnh->resolved_route.family) { + assert((rnh->resolved_count > 0) && + (rnh->resolved_count <= ZEBRA_NHT_RECURSION_MAX)); + + resolved = &rnh->rnh_item[rnh->resolved_count - 1].resolved_route; + + stream_putw(s, resolved->family); + stream_putc(s, resolved->prefixlen); + switch (resolved->family) { case AF_INET: - stream_put_in_addr(s, &rnh->resolved_route.u.prefix4); + stream_put_in_addr(s, &resolved->u.prefix4); break; case AF_INET6: - stream_put(s, &rnh->resolved_route.u.prefix6, IPV6_MAX_BYTELEN); + stream_put(s, &resolved->u.prefix6, IPV6_MAX_BYTELEN); break; default: flog_err(EC_ZEBRA_RNH_UNKNOWN_FAMILY, @@ -1215,7 +1308,7 @@ int zebra_send_rnh_update(struct rnh *rnh, struct zserv *client, nhg = rib_get_fib_nhg(re); for (ALL_NEXTHOPS_PTR(nhg, nh)) - if (rnh_nexthop_valid(re, nh)) { + if (rnh_nexthop_valid(re, nh, false)) { zapi_nexthop_from_nexthop(&znh, nh); ret = zapi_nexthop_encode(s, &znh, 0, message); if (ret < 0) @@ -1227,7 +1320,7 @@ int zebra_send_rnh_update(struct rnh *rnh, struct zserv *client, nhg = rib_get_fib_backup_nhg(re); if (nhg) { for (ALL_NEXTHOPS_PTR(nhg, nh)) - if (rnh_nexthop_valid(re, nh)) { + if (rnh_nexthop_valid(re, nh, false)) { zapi_nexthop_from_nexthop(&znh, nh); ret = zapi_nexthop_encode( s, &znh, 0 /* flags */, @@ -1315,6 +1408,8 @@ static void print_rnh(struct route_node *rn, struct vty *vty, json_object *json) json_object *json_client = NULL; json_object *json_nexthop_array = NULL; json_object *json_nexthop = NULL; + json_object *json_prefix_array = NULL; + json_object *json_prefix = NULL; rnh = rn->info; @@ -1322,6 +1417,7 @@ static void print_rnh(struct route_node *rn, struct vty *vty, json_object *json) json_nht = json_object_new_object(); json_nexthop_array = json_object_new_array(); json_client_array = json_object_new_array(); + json_prefix_array = json_object_new_array(); json_object_object_add( json, @@ -1334,6 +1430,7 @@ static void print_rnh(struct route_node *rn, struct vty *vty, json_object *json) json_client_array); json_object_object_add(json_nht, "nexthops", json_nexthop_array); + json_object_object_add(json_nht, "prefixes", json_prefix_array); } else { vty_out(vty, "%s%s\n", inet_ntop(rn->p.family, &rn->p.u.prefix, buf, BUFSIZ), @@ -1347,12 +1444,25 @@ static void print_rnh(struct route_node *rn, struct vty *vty, json_object *json) json_object_string_add( json_nht, "resolvedProtocol", zebra_route_string(rnh->state->type)); - json_object_string_addf(json_nht, "prefix", "%pFX", - &rnh->resolved_route); + + for (int i = 0; i < rnh->resolved_count; i++) { + json_prefix = json_object_new_object(); + json_object_array_add(json_prefix_array, + json_prefix); + + json_object_string_addf(json_prefix, + "prefix", "%pFX", + &rnh->rnh_item[i].resolved_route); + } } else { - vty_out(vty, " resolved via %s, prefix %pFX\n", - zebra_route_string(rnh->state->type), - &rnh->resolved_route); + vty_out(vty, " resolved via %s, prefix", + zebra_route_string(rnh->state->type)); + + for (int i = 0; i < rnh->resolved_count; i++) { + vty_out(vty, " %pFX", + &rnh->rnh_item[i].resolved_route); + } + vty_out(vty, "\n"); } for (nexthop = rnh->state->nhe->nhg.nexthop; nexthop; diff --git a/zebra/zebra_rnh.h b/zebra/zebra_rnh.h index f0b10d825c84..8b66a3126695 100644 --- a/zebra/zebra_rnh.h +++ b/zebra/zebra_rnh.h @@ -37,7 +37,7 @@ extern void zebra_print_rnh_table(vrf_id_t vrfid, afi_t afi, safi_t safi, extern int rnh_resolve_via_default(struct zebra_vrf *zvrf, int family); extern bool rnh_nexthop_valid(const struct route_entry *re, - const struct nexthop *nh); + const struct nexthop *nh, bool recursive); /* UI control to avoid notifications if backup nexthop status changes */ void rnh_set_hide_backups(bool hide_p);