diff --git a/src/sonic-frr/patch/0029-zebra-nhg-fix-on-intf-up.patch b/src/sonic-frr/patch/0029-zebra-nhg-fix-on-intf-up.patch new file mode 100644 index 000000000000..64e8ac5d5713 --- /dev/null +++ b/src/sonic-frr/patch/0029-zebra-nhg-fix-on-intf-up.patch @@ -0,0 +1,338 @@ +From 4a58bc9e856c4ca953474548d61f06aad2fdf45b Mon Sep 17 00:00:00 2001 +From: Ashwini Reddy +Date: Wed, 19 Apr 2023 11:35:25 -0700 +Subject: [PATCH 1/3] zebra: re-install nhg on interface up + +Intermittently zebra and kernel are out of sync +when interface flaps and the add's/dels are in +same processing queue and zebra assumes no change in nexthop. +Hence we need to bring in a reinstall to kernel +of the nexthops and routes to sync their states. + +Upon interface flap kernel would have deleted NHGs +associated to a interface (the one flapped), +zebra retains NHGs for 3 mins even though upper +layer protocol removes the nexthops (associated NHG). +As part of interface address add , +re-add singleton NHGs associated to interface. + +Ticket: #3173663 +Issue: 3173663 + +Signed-off-by: Ashwini Reddy +Signed-off-by: Chirag Shah +--- + lib/nexthop.c | 9 +++++++++ + lib/nexthop.h | 3 +++ + zebra/redistribute.c | 4 ++++ + zebra/zebra_nhg.c | 46 ++++++++++++++++++++++++++++++++++++++++++++ + zebra/zebra_nhg.h | 1 + + 5 files changed, 63 insertions(+) + +diff --git a/lib/nexthop.c b/lib/nexthop.c +index 52679388fd..e3bdbfb9e8 100644 +--- a/lib/nexthop.c ++++ b/lib/nexthop.c +@@ -1093,3 +1093,12 @@ static ssize_t printfrr_nh(struct fbuf *buf, struct printfrr_eargs *ea, + } + return -1; + } ++ ++bool nexthop_is_ifindex_type(const struct nexthop *nh) ++{ ++ if (nh->type == NEXTHOP_TYPE_IFINDEX || ++ nh->type == NEXTHOP_TYPE_IPV4_IFINDEX || ++ nh->type == NEXTHOP_TYPE_IPV6_IFINDEX) ++ return true; ++ return false; ++} +diff --git a/lib/nexthop.h b/lib/nexthop.h +index f6fb6ec2b7..a52ae02172 100644 +--- a/lib/nexthop.h ++++ b/lib/nexthop.h +@@ -260,6 +260,9 @@ extern struct nexthop *nexthop_dup(const struct nexthop *nexthop, + extern struct nexthop *nexthop_dup_no_recurse(const struct nexthop *nexthop, + struct nexthop *rparent); + ++/* Check nexthop of IFINDEX type */ ++extern bool nexthop_is_ifindex_type(const struct nexthop *nh); ++ + /* + * Parse one or more backup index values, as comma-separated numbers, + * into caller's array of uint8_ts. The array must be NEXTHOP_MAX_BACKUPS +diff --git a/zebra/redistribute.c b/zebra/redistribute.c +index 4a8fe938ed..fccbee7d85 100644 +--- a/zebra/redistribute.c ++++ b/zebra/redistribute.c +@@ -561,6 +561,10 @@ void zebra_interface_address_add_update(struct interface *ifp, + client, ifp, ifc); + } + } ++ /* interface associated NHGs may have been deleted, ++ * re-sync zebra -> dplane NHGs ++ */ ++ zebra_interface_nhg_reinstall(ifp); + } + + /* Interface address deletion. */ +diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c +index 54c66fbf18..753efef7f8 100644 +--- a/zebra/zebra_nhg.c ++++ b/zebra/zebra_nhg.c +@@ -3003,6 +3003,12 @@ void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe) + /* Resolve it first */ + nhe = zebra_nhg_resolve(nhe); + ++ if (zebra_nhg_set_valid_if_active(nhe)) { ++ if (IS_ZEBRA_DEBUG_NHG_DETAIL) ++ zlog_debug("%s: valid flag set for nh %pNG", __func__, ++ nhe); ++ } ++ + /* Make sure all depends are installed/queued */ + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + zebra_nhg_install_kernel(rb_node_dep->nhe); +@@ -3586,3 +3592,43 @@ static ssize_t printfrr_nhghe(struct fbuf *buf, struct printfrr_eargs *ea, + ret += bputs(buf, "]"); + return ret; + } ++ ++/* ++ * On interface add the nexthop that resolves to this intf needs ++ * a re-install. There are following scenarios when the nexthop group update ++ * gets skipped: ++ * 1. When upper level protocol sends removal of NHG, there is ++ * timer running to keep NHG for 180 seconds, during this interval, same route ++ * with same set of nexthops installation is given , the same NHG is used ++ * but since NHG is not reinstalled on interface address add, it is not aware ++ * in Dplan/Kernel. ++ * 2. Due to a quick port flap due to interface add and delete ++ * to be processed in same queue one after another. Zebra believes that ++ * there is no change in nhg in this case. Hence this re-install will ++ * make sure the nexthop group gets updated to Dplan/Kernel. ++ */ ++void zebra_interface_nhg_reinstall(struct interface *ifp) ++{ ++ struct nhg_connected *rb_node_dep = NULL; ++ struct zebra_if *zif = ifp->info; ++ struct nexthop *nh; ++ ++ if (IS_ZEBRA_DEBUG_NHG_DETAIL) ++ zlog_debug( ++ "%s: Installing interface %s associated NHGs into kernel", ++ __func__, ifp->name); ++ ++ frr_each (nhg_connected_tree, &zif->nhg_dependents, rb_node_dep) { ++ nh = rb_node_dep->nhe->nhg.nexthop; ++ if (zebra_nhg_set_valid_if_active(rb_node_dep->nhe)) { ++ if (IS_ZEBRA_DEBUG_NHG_DETAIL) ++ zlog_debug( ++ "%s: Setting the valid flag for nhe %pNG, interface: %s", ++ __func__, rb_node_dep->nhe, ifp->name); ++ } ++ /* Check for singleton NHG associated to interface */ ++ if (nexthop_is_ifindex_type(nh) && ++ zebra_nhg_depends_is_empty(rb_node_dep->nhe)) ++ zebra_nhg_install_kernel(rb_node_dep->nhe); ++ } ++} +diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h +index 9b925bf10f..18914b7856 100644 +--- a/zebra/zebra_nhg.h ++++ b/zebra/zebra_nhg.h +@@ -374,6 +374,7 @@ extern uint8_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe, + /* Dataplane install/uninstall */ + extern void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe); + extern void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe); ++extern void zebra_interface_nhg_reinstall(struct interface *ifp); + + /* Forward ref of dplane update context type */ + struct zebra_dplane_ctx; +-- +2.17.1 + + +From 2b99f5ccf7768eff1393e07db2b77abc104dbbd7 Mon Sep 17 00:00:00 2001 +From: Chirag Shah +Date: Fri, 28 Apr 2023 19:09:55 -0700 +Subject: [PATCH 2/3] zebra:re-install dependent nhgs on interface up + +Upon interface up associated singleton NHG's +dependent NHGs needs to be reinstalled as +kernel would have deleted if there is no route +referencing it. + +Ticket:#3416477 +Issue:3416477 +Testing Done: +flap interfaces which are part of route NHG, +upon interfaces up event, NHGs are resynced +into dplane. + +Signed-off-by: Chirag Shah +--- + zebra/zebra_nhg.c | 43 ++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 38 insertions(+), 5 deletions(-) + +diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c +index 753efef7f8..e5efbf8d5f 100644 +--- a/zebra/zebra_nhg.c ++++ b/zebra/zebra_nhg.c +@@ -1140,13 +1140,23 @@ static void zebra_nhg_handle_uninstall(struct nhg_hash_entry *nhe) + zebra_nhg_free(nhe); + } + +-static void zebra_nhg_handle_install(struct nhg_hash_entry *nhe) ++static void zebra_nhg_handle_install(struct nhg_hash_entry *nhe, bool install) + { + /* Update validity of groups depending on it */ + struct nhg_connected *rb_node_dep; + +- frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) ++ frr_each_safe (nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) { + zebra_nhg_set_valid(rb_node_dep->nhe); ++ /* install dependent NHG into kernel */ ++ if (install) { ++ if (IS_ZEBRA_DEBUG_NHG_DETAIL) ++ zlog_debug( ++ "%s nh id %u (flags 0x%x) associated dependent NHG %pNG install", ++ __func__, nhe->id, nhe->flags, ++ rb_node_dep->nhe); ++ zebra_nhg_install_kernel(rb_node_dep->nhe); ++ } ++ } + } + + /* +@@ -3035,7 +3045,7 @@ void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe) + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); +- zebra_nhg_handle_install(nhe); ++ zebra_nhg_handle_install(nhe, false); + break; + } + } +@@ -3109,7 +3119,7 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx) + if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) { + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); +- zebra_nhg_handle_install(nhe); ++ zebra_nhg_handle_install(nhe, true); + + /* If daemon nhg, send it an update */ + if (PROTO_OWNED(nhe)) +@@ -3628,7 +3638,30 @@ void zebra_interface_nhg_reinstall(struct interface *ifp) + } + /* Check for singleton NHG associated to interface */ + if (nexthop_is_ifindex_type(nh) && +- zebra_nhg_depends_is_empty(rb_node_dep->nhe)) ++ zebra_nhg_depends_is_empty(rb_node_dep->nhe)) { ++ struct nhg_connected *rb_node_dependent; ++ ++ if (IS_ZEBRA_DEBUG_NHG) ++ zlog_debug( ++ "%s install nhe %pNG nh type %u flags 0x%x", ++ __func__, rb_node_dep->nhe, nh->type, ++ rb_node_dep->nhe->flags); + zebra_nhg_install_kernel(rb_node_dep->nhe); ++ ++ /* mark depedent uninstall, when interface associated ++ * singleton is installed, install depedent ++ */ ++ frr_each_safe (nhg_connected_tree, ++ &rb_node_dep->nhe->nhg_dependents, ++ rb_node_dependent) { ++ if (IS_ZEBRA_DEBUG_NHG) ++ zlog_debug( ++ "%s dependent nhe %pNG unset installed flag", ++ __func__, ++ rb_node_dependent->nhe); ++ UNSET_FLAG(rb_node_dependent->nhe->flags, ++ NEXTHOP_GROUP_INSTALLED); ++ } ++ } + } + } +-- +2.17.1 + + +From be767bd66143c2dcdd564a40a852400ae9e251e5 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Fri, 19 Apr 2024 12:13:32 -0400 +Subject: [PATCH 3/3] lib, zebra: Check for not being a blackhole route + +In zebra_interface_nhg_reinstall zebra is checking that the +nhg is a singleton and not a blackhole nhg. This was originally +done with checking that the nexthop is a NEXTHOP_TYPE_IFINDEX, +NEXTHOP_TYPE_IPV4_IFINDEX and NEXTHOP_TYPE_IPV6_IFINDEX. This +was excluding NEXTHOP_TYPE_IPV4 and NEXTHOP_TYPE_IPV6. These +were both possible to be received and maintained from the upper +level protocol for when a route is being recursively resolved. +If we have gotten to this point in zebra_interface_nhg_reinstall +the nexthop group has already been installed at least once +and we *know* that it is actually a valid nexthop. What the +test is really trying to do is ensure that we are not reinstalling +a blackhole nexthop group( Which is not possible to even be +here by the way, but safety first! ). So let's change +to test for that instead. + +Signed-off-by: Donald Sharp +--- + lib/nexthop.c | 8 ++------ + lib/nexthop.h | 4 ++-- + zebra/zebra_nhg.c | 3 ++- + 3 files changed, 6 insertions(+), 9 deletions(-) + +diff --git a/lib/nexthop.c b/lib/nexthop.c +index e3bdbfb9e8..884255f158 100644 +--- a/lib/nexthop.c ++++ b/lib/nexthop.c +@@ -1094,11 +1094,7 @@ static ssize_t printfrr_nh(struct fbuf *buf, struct printfrr_eargs *ea, + return -1; + } + +-bool nexthop_is_ifindex_type(const struct nexthop *nh) ++bool nexthop_is_blackhole(const struct nexthop *nh) + { +- if (nh->type == NEXTHOP_TYPE_IFINDEX || +- nh->type == NEXTHOP_TYPE_IPV4_IFINDEX || +- nh->type == NEXTHOP_TYPE_IPV6_IFINDEX) +- return true; +- return false; ++ return nh->type == NEXTHOP_TYPE_BLACKHOLE; + } +diff --git a/lib/nexthop.h b/lib/nexthop.h +index a52ae02172..bd1c0514fc 100644 +--- a/lib/nexthop.h ++++ b/lib/nexthop.h +@@ -260,8 +260,8 @@ extern struct nexthop *nexthop_dup(const struct nexthop *nexthop, + extern struct nexthop *nexthop_dup_no_recurse(const struct nexthop *nexthop, + struct nexthop *rparent); + +-/* Check nexthop of IFINDEX type */ +-extern bool nexthop_is_ifindex_type(const struct nexthop *nh); ++/* Is this nexthop a blackhole? */ ++extern bool nexthop_is_blackhole(const struct nexthop *nh); + + /* + * Parse one or more backup index values, as comma-separated numbers, +diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c +index e5efbf8d5f..33d2344f51 100644 +--- a/zebra/zebra_nhg.c ++++ b/zebra/zebra_nhg.c +@@ -3636,8 +3636,9 @@ void zebra_interface_nhg_reinstall(struct interface *ifp) + "%s: Setting the valid flag for nhe %pNG, interface: %s", + __func__, rb_node_dep->nhe, ifp->name); + } ++ + /* Check for singleton NHG associated to interface */ +- if (nexthop_is_ifindex_type(nh) && ++ if (!nexthop_is_blackhole(nh) && + zebra_nhg_depends_is_empty(rb_node_dep->nhe)) { + struct nhg_connected *rb_node_dependent; + +-- +2.17.1 + diff --git a/src/sonic-frr/patch/series b/src/sonic-frr/patch/series index f2cd08ca666c..445bc9dd9653 100644 --- a/src/sonic-frr/patch/series +++ b/src/sonic-frr/patch/series @@ -26,3 +26,4 @@ 0026-bgp-fib-suppress-announce-fix.patch 0027-lib-Do-not-convert-EVPN-prefixes-into-IPv4-IPv6-if-n.patch 0028-zebra-fix-parse-attr-problems-for-encap.patch +0029-zebra-nhg-fix-on-intf-up.patch