From be0a10717806614f24b1d51a46205f9a28bf117a Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Thu, 2 Nov 2023 16:29:05 +0100 Subject: [PATCH] zebra: fix handle netlink state reachable message When creating an NHRP shortcut entry between two spokes, the cache entry created has very often a field 'used' set to false, despite a continuous traffic towards the 11.255.255.1 IP address. > north-vm# show ip nhrp cache > Iface Type Protocol NBMA Claimed NBMA Flags Identity > gre1 local 11.255.255.2 10.125.0.2 10.125.0.2 - > gre1 nhs 11.255.255.1 10.125.0.1 10.125.0.1 T west <---- Actually, that flag reflects the protocol address reachability. A neighbor entry is maintained in the kernel. As an ARP probe operates only with MAC address, NHRP will take the place of ARP, and will refresh the NBMA link IP address, every time the neighbor entry enters in PROBE state. The neighbor entry state goes in REACHABLE state, and the expectation is that the REACHABLE state is notified to the NHRP daemon, which does not happen. The below dump indicates 3 netlink messages over the gre1 interface: > # trace from 'ip monitor neigh' > miss 11.255.255.1 dev gre1 lladdr 10.125.0.1 PROBE proto 191 > 11.255.255.1 dev gre1 lladdr 10.125.0.1 PROBE proto 191 > 11.255.255.1 dev gre1 lladdr 10.125.0.1 REACHABLE proto 191 The NHRP/ZEBRA traces indicate the first 2 netlink messages received, followed by the update of the neighbor entry by the NHRP daemon. But there is no reception or confirmation that the new neighbor state moved to REACHABLE. > # trace from nhrp > 2023/11/06 09:46:21 ZEBRA: [V8KNF-8EXH8] netlink_recv_msg: << netlink message dump [recv] > 2023/11/06 09:46:21 ZEBRA: [JAS4D-NCWGP] nlmsghdr [len=80 type=(30) GETNEIGH flags=(0x0001) {REQUEST} seq=0 pid=0] > 2023/11/06 09:46:21 ZEBRA: [S4WS4-PS3KF] netlink_parse_info: netlink-listen (NS 0) type RTM_GETNEIGH(30), len=80, seq=0, pid=0 > 2023/11/06 09:46:21 ZEBRA: [V8KNF-8EXH8] netlink_recv_msg: << netlink message dump [recv] > 2023/11/06 09:46:21 ZEBRA: [JAS4D-NCWGP] nlmsghdr [len=80 type=(28) NEWNEIGH flags=(0x0000) {} seq=0 pid=0] > 2023/11/06 09:46:21 ZEBRA: [T4YQJ-83R8H] ndm [family=2 (AF_INET) ifindex=10 state=0x0010 {PROBE} flags=0x0000 {} type=1 (UNICAST)] > 2023/11/06 09:46:21 ZEBRA: [KFBSR-XYJV1] rta [len=8 (payload=4) type=(1) DST] > 2023/11/06 09:46:21 ZEBRA: [M8QV4-KY9C0] 11.255.255.1 > 2023/11/06 09:46:21 ZEBRA: [KFBSR-XYJV1] rta [len=8 (payload=4) type=(2) LLADDR] > 2023/11/06 09:46:21 ZEBRA: [V74GD-NYS6Y] 0A:7D:00:01 > 2023/11/06 09:46:21 ZEBRA: [KFBSR-XYJV1] rta [len=8 (payload=4) type=(4) PROBES] > 2023/11/06 09:46:21 ZEBRA: [KFBSR-XYJV1] rta [len=20 (payload=16) type=(3) CACHEINFO] > 2023/11/06 09:46:21 ZEBRA: [KFBSR-XYJV1] rta [len=5 (payload=1) type=(12) UNKNOWN] > 2023/11/06 09:46:21 ZEBRA: [S4WS4-PS3KF] netlink_parse_info: netlink-listen (NS 0) type RTM_NEWNEIGH(28), len=76, seq=0, pid=0 > 2023/11/06 09:46:21 ZEBRA: [TDS34-MNEJW] Neighbor Entry received is not on a VLAN or a BRIDGE, ignoring > 2023/11/06 09:46:21 NHRP: [QQ0NK-1H449] Netlink: who-has 11.255.255.1 dev gre1 lladdr 10.125.0.1 nud 0x10 cache used 0 type 5 > 2023/11/06 09:46:21 NHRP: [QVXNM-NVHEQ] Netlink: update binding for 11.255.255.1 dev gre1 from c (unspec) peer.vc.nbma 10.125.0.1 to lladdr 10.125.0.1 > 2023/11/06 09:46:21 NHRP: [QQ0NK-1H449] Netlink: new-neigh 11.255.255.1 dev gre1 lladdr 10.125.0.1 nud 0x10 cache used 1 type 5 > 2023/11/06 09:46:21 ZEBRA: [NH6N7-54CD1] Tx RTM_NEWNEIGH family ipv4 IF gre1(10) Neigh 11.255.255.1 Link 10.125.0.1 flags 0x0 state 0x2 ext_flags 0x0 > 2023/11/06 09:46:21 ZEBRA: [HYEHE-CQZ9G] nl_batch_send: netlink-dp (NS 0), batch size=52, msg cnt=1 The NHRP daemon relies on zebra netlink layer. Read and write operations are done on two different sockets. As a filter is attached to the read socket to prevent from reading notifications from the write socket, the NEWNEIGH operations from NHRP are ignored. Fix this by adding an exception in the netlink filter to autorise NEWNEIGH notifications. Consequently, the REACHABLE state is notified to NHRP. > north-vm# show ip nhrp cache > Iface Type Protocol NBMA Claimed NBMA Flags Identity > gre1 local 11.255.255.2 10.125.0.2 10.125.0.2 - > gre1 nhs 11.255.255.1 10.125.0.1 10.125.0.1 UT west <---- Link: https://flylib.com/books/3/475/1/html/2/images/0131777203/graphics/15fig06.gif Fixes: b3b751046495 ("nhrpd: link layer registration to notificationas") Signed-off-by: Philippe Guibert --- zebra/kernel_netlink.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index 8a64a1ea48ea..d63aadca26ba 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -555,7 +555,7 @@ static void netlink_install_filter(int sock, uint32_t pid, uint32_t dplane_pid) * nlmsg_pid == dplane_pid) { * if (the incoming nlmsg_type == * RTM_NEWADDR || RTM_DELADDR || RTM_NEWNETCONF || - * RTM_DELNETCONF) + * RTM_DELNETCONF || RTM_NEWNEIGH * keep this message * else * skip this message @@ -574,7 +574,7 @@ static void netlink_install_filter(int sock, uint32_t pid, uint32_t dplane_pid) /* * 2: Compare to dplane pid */ - BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 6), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 7), /* * 3: Load the nlmsg_type into BPF register */ @@ -583,27 +583,29 @@ static void netlink_install_filter(int sock, uint32_t pid, uint32_t dplane_pid) /* * 4: Compare to RTM_NEWADDR */ - BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 4, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 5, 0), /* * 5: Compare to RTM_DELADDR */ - BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 3, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 4, 0), /* * 6: Compare to RTM_NEWNETCONF */ - BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWNETCONF), 2, - 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWNETCONF), 3, 0), /* * 7: Compare to RTM_DELNETCONF */ - BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELNETCONF), 1, - 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELNETCONF), 2, 0), /* - * 8: This is the end state of we want to skip the + * 8: Compare to RTM_NEWNEIGH + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWNEIGH), 1, 0), + /* + * 9: This is the end state of we want to skip the * message */ BPF_STMT(BPF_RET | BPF_K, 0), - /* 9: This is the end state of we want to keep + /* 10: This is the end state of we want to keep * the message */ BPF_STMT(BPF_RET | BPF_K, 0xffff),