diff --git a/go.mod b/go.mod index 15a7c1b1..68e2a760 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8 github.com/stretchr/testify v1.9.0 - github.com/vishvananda/netlink v1.2.1-beta.2 + github.com/vishvananda/netlink v1.2.1 golang.org/x/crypto v0.26.0 golang.org/x/sync v0.8.0 golang.org/x/sys v0.24.0 @@ -39,7 +39,7 @@ require ( github.com/pierrec/lz4/v4 v4.1.14 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/u-root/uio v0.0.0-20240224005618-d2acac8f3701 // indirect - github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 // indirect + github.com/vishvananda/netns v0.0.4 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/mod v0.19.0 // indirect golang.org/x/net v0.27.0 // indirect diff --git a/go.sum b/go.sum index 3440f272..7b9e453c 100644 --- a/go.sum +++ b/go.sum @@ -90,11 +90,10 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/u-root/uio v0.0.0-20240224005618-d2acac8f3701 h1:pyC9PaHYZFgEKFdlp3G8RaCKgVpHZnecvArXvPXcFkM= github.com/u-root/uio v0.0.0-20240224005618-d2acac8f3701/go.mod h1:P3a5rG4X7tI17Nn3aOIAYr5HbIMukwXG0urG0WuL8OA= -github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs= -github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= -github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= -github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 h1:gga7acRE695APm9hlsSMoOoE65U4/TcqNj90mc69Rlg= -github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= +github.com/vishvananda/netlink v1.2.1 h1:pfLv/qlJUwOTPvtWREA7c3PI4u81YkqZw1DYhI2HmLA= +github.com/vishvananda/netlink v1.2.1/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs= +github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= +github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -126,12 +125,12 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= diff --git a/vendor/github.com/vishvananda/netlink/.gitignore b/vendor/github.com/vishvananda/netlink/.gitignore index 9f11b755..66f8fb50 100644 --- a/vendor/github.com/vishvananda/netlink/.gitignore +++ b/vendor/github.com/vishvananda/netlink/.gitignore @@ -1 +1,2 @@ .idea/ +.vscode/ diff --git a/vendor/github.com/vishvananda/netlink/addr_linux.go b/vendor/github.com/vishvananda/netlink/addr_linux.go index 72862ce1..218ab237 100644 --- a/vendor/github.com/vishvananda/netlink/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/addr_linux.go @@ -74,17 +74,19 @@ func (h *Handle) AddrDel(link Link, addr *Addr) error { } func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error { - base := link.Attrs() - if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) { - return fmt.Errorf("label must begin with interface name") - } - h.ensureIndex(base) - family := nl.GetIPFamily(addr.IP) - msg := nl.NewIfAddrmsg(family) - msg.Index = uint32(base.Index) msg.Scope = uint8(addr.Scope) + if link == nil { + msg.Index = uint32(addr.LinkIndex) + } else { + base := link.Attrs() + if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) { + return fmt.Errorf("label must begin with interface name") + } + h.ensureIndex(base) + msg.Index = uint32(base.Index) + } mask := addr.Mask if addr.Peer != nil { mask = addr.Peer.Mask @@ -296,23 +298,24 @@ type AddrUpdate struct { // AddrSubscribe takes a chan down which notifications will be sent // when addresses change. Close the 'done' chan to stop subscription. func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil) + return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // AddrSubscribeAt works like AddrSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil) + return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // AddrSubscribeOptions contains a set of options to use with // AddrSubscribeWithOptions. type AddrSubscribeOptions struct { - Namespace *netns.NsHandle - ErrorCallback func(error) - ListExisting bool - ReceiveBufferSize int - ReceiveTimeout *unix.Timeval + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // AddrSubscribeWithOptions work like AddrSubscribe but enable to @@ -323,10 +326,12 @@ func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, option none := netns.None() options.Namespace = &none } - return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize, options.ReceiveTimeout) + return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int, rcvTimeout *unix.Timeval) error { +func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvBufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR) if err != nil { return err @@ -336,19 +341,18 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c return err } } - + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvBufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done s.Close() }() } - if rcvbuf != 0 { - err = pkgHandle.SetSocketReceiveBufferSize(rcvbuf, false) - if err != nil { - return err - } - } if listExisting { req := pkgHandle.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP) diff --git a/vendor/github.com/vishvananda/netlink/bridge_linux.go b/vendor/github.com/vishvananda/netlink/bridge_linux.go index 6e1224c4..6c340b0c 100644 --- a/vendor/github.com/vishvananda/netlink/bridge_linux.go +++ b/vendor/github.com/vishvananda/netlink/bridge_linux.go @@ -63,7 +63,19 @@ func BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) err // BridgeVlanAdd adds a new vlan filter entry // Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` func (h *Handle) BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error { - return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, pvid, untagged, self, master) + return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, 0, pvid, untagged, self, master) +} + +// BridgeVlanAddRange adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanAddRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanAddRange(link, vid, vidEnd, pvid, untagged, self, master) +} + +// BridgeVlanAddRange adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanAddRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, vidEnd, pvid, untagged, self, master) } // BridgeVlanDel adds a new vlan filter entry @@ -75,10 +87,22 @@ func BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) err // BridgeVlanDel adds a new vlan filter entry // Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` func (h *Handle) BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error { - return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, pvid, untagged, self, master) + return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, 0, pvid, untagged, self, master) } -func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged, self, master bool) error { +// BridgeVlanDelRange adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanDelRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanDelRange(link, vid, vidEnd, pvid, untagged, self, master) +} + +// BridgeVlanDelRange adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanDelRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, vidEnd, pvid, untagged, self, master) +} + +func (h *Handle) bridgeVlanModify(cmd int, link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { base := link.Attrs() h.ensureIndex(base) req := h.newNetlinkRequest(cmd, unix.NLM_F_ACK) @@ -105,7 +129,20 @@ func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged if untagged { vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED } - br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + + if vidEnd != 0 { + vlanEndInfo := &nl.BridgeVlanInfo{Vid: vidEnd} + vlanEndInfo.Flags = vlanInfo.Flags + + vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_RANGE_BEGIN + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + + vlanEndInfo.Flags |= nl.BRIDGE_VLAN_INFO_RANGE_END + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanEndInfo.Serialize()) + } else { + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + } + req.AddData(br) _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err diff --git a/vendor/github.com/vishvananda/netlink/chain.go b/vendor/github.com/vishvananda/netlink/chain.go new file mode 100644 index 00000000..1d1c144e --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/chain.go @@ -0,0 +1,22 @@ +package netlink + +import ( + "fmt" +) + +// Chain contains the attributes of a Chain +type Chain struct { + Parent uint32 + Chain uint32 +} + +func (c Chain) String() string { + return fmt.Sprintf("{Parent: %d, Chain: %d}", c.Parent, c.Chain) +} + +func NewChain(parent uint32, chain uint32) Chain { + return Chain{ + Parent: parent, + Chain: chain, + } +} diff --git a/vendor/github.com/vishvananda/netlink/chain_linux.go b/vendor/github.com/vishvananda/netlink/chain_linux.go new file mode 100644 index 00000000..d9f44161 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/chain_linux.go @@ -0,0 +1,112 @@ +package netlink + +import ( + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// ChainDel will delete a chain from the system. +func ChainDel(link Link, chain Chain) error { + // Equivalent to: `tc chain del $chain` + return pkgHandle.ChainDel(link, chain) +} + +// ChainDel will delete a chain from the system. +// Equivalent to: `tc chain del $chain` +func (h *Handle) ChainDel(link Link, chain Chain) error { + return h.chainModify(unix.RTM_DELCHAIN, 0, link, chain) +} + +// ChainAdd will add a chain to the system. +// Equivalent to: `tc chain add` +func ChainAdd(link Link, chain Chain) error { + return pkgHandle.ChainAdd(link, chain) +} + +// ChainAdd will add a chain to the system. +// Equivalent to: `tc chain add` +func (h *Handle) ChainAdd(link Link, chain Chain) error { + return h.chainModify( + unix.RTM_NEWCHAIN, + unix.NLM_F_CREATE|unix.NLM_F_EXCL, + link, + chain) +} + +func (h *Handle) chainModify(cmd, flags int, link Link, chain Chain) error { + req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + Parent: chain.Parent, + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(chain.Chain))) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// ChainList gets a list of chains in the system. +// Equivalent to: `tc chain list`. +// The list can be filtered by link. +func ChainList(link Link, parent uint32) ([]Chain, error) { + return pkgHandle.ChainList(link, parent) +} + +// ChainList gets a list of chains in the system. +// Equivalent to: `tc chain list`. +// The list can be filtered by link. +func (h *Handle) ChainList(link Link, parent uint32) ([]Chain, error) { + req := h.newNetlinkRequest(unix.RTM_GETCHAIN, unix.NLM_F_DUMP) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + Parent: parent, + } + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWCHAIN) + if err != nil { + return nil, err + } + + var res []Chain + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + // skip chains from other interfaces + if link != nil && msg.Ifindex != index { + continue + } + + var chain Chain + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_CHAIN: + chain.Chain = native.Uint32(attr.Value) + chain.Parent = parent + } + } + res = append(res, chain) + } + + return res, nil +} diff --git a/vendor/github.com/vishvananda/netlink/class.go b/vendor/github.com/vishvananda/netlink/class.go index 10ceffed..e686f674 100644 --- a/vendor/github.com/vishvananda/netlink/class.go +++ b/vendor/github.com/vishvananda/netlink/class.go @@ -47,6 +47,7 @@ type ClassStatistics struct { Basic *GnetStatsBasic Queue *GnetStatsQueue RateEst *GnetStatsRateEst + BasicHw *GnetStatsBasic // Hardward statistics added in kernel 4.20 } // NewClassStatistics Construct a ClassStatistics struct which fields are all initialized by 0. @@ -55,6 +56,7 @@ func NewClassStatistics() *ClassStatistics { Basic: &GnetStatsBasic{}, Queue: &GnetStatsQueue{}, RateEst: &GnetStatsRateEst{}, + BasicHw: &GnetStatsBasic{}, } } diff --git a/vendor/github.com/vishvananda/netlink/class_linux.go b/vendor/github.com/vishvananda/netlink/class_linux.go index 6f542ba4..a82eb09d 100644 --- a/vendor/github.com/vishvananda/netlink/class_linux.go +++ b/vendor/github.com/vishvananda/netlink/class_linux.go @@ -388,6 +388,11 @@ func parseTcStats2(data []byte) (*ClassStatistics, error) { return nil, fmt.Errorf("Failed to parse ClassStatistics.RateEst with: %v\n%s", err, hex.Dump(datum.Value)) } + case nl.TCA_STATS_BASIC_HW: + if err := parseGnetStats(datum.Value, stats.BasicHw); err != nil { + return nil, fmt.Errorf("Failed to parse ClassStatistics.BasicHw with: %v\n%s", + err, hex.Dump(datum.Value)) + } } } diff --git a/vendor/github.com/vishvananda/netlink/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/conntrack_linux.go index 03ea1b98..d2cd68af 100644 --- a/vendor/github.com/vishvananda/netlink/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/conntrack_linux.go @@ -55,10 +55,28 @@ func ConntrackTableFlush(table ConntrackTableType) error { return pkgHandle.ConntrackTableFlush(table) } +// ConntrackCreate creates a new conntrack flow in the desired table +// conntrack -I [table] Create a conntrack or expectation +func ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + return pkgHandle.ConntrackCreate(table, family, flow) +} + +// ConntrackUpdate updates an existing conntrack flow in the desired table using the handle +// conntrack -U [table] Update a conntrack +func ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + return pkgHandle.ConntrackUpdate(table, family, flow) +} + // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter // conntrack -D [table] parameters Delete conntrack or expectation func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { - return pkgHandle.ConntrackDeleteFilter(table, family, filter) + return pkgHandle.ConntrackDeleteFilters(table, family, filter) +} + +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters +// conntrack -D [table] parameters Delete conntrack or expectation +func ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { + return pkgHandle.ConntrackDeleteFilters(table, family, filters...) } // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed @@ -87,9 +105,43 @@ func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { return err } -// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed +// ConntrackCreate creates a new conntrack flow in the desired table using the handle +// conntrack -I [table] Create a conntrack or expectation +func (h *Handle) ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_CREATE) + attr, err := flow.toNlData() + if err != nil { + return err + } + + for _, a := range attr { + req.AddData(a) + } + + _, err = req.Execute(unix.NETLINK_NETFILTER, 0) + return err +} + +// ConntrackUpdate updates an existing conntrack flow in the desired table using the handle +// conntrack -U [table] Update a conntrack +func (h *Handle) ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_REPLACE) + attr, err := flow.toNlData() + if err != nil { + return err + } + + for _, a := range attr { + req.AddData(a) + } + + _, err = req.Execute(unix.NETLINK_NETFILTER, 0) + return err +} + +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters using the netlink handle passed // conntrack -D [table] parameters Delete conntrack or expectation -func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { +func (h *Handle) ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { res, err := h.dumpConntrackTable(table, family) if err != nil { return 0, err @@ -98,12 +150,16 @@ func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFami var matched uint for _, dataRaw := range res { flow := parseRawData(dataRaw) - if match := filter.MatchConntrackFlow(flow); match { - req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) - // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already - req2.AddRawData(dataRaw[4:]) - req2.Execute(unix.NETLINK_NETFILTER, 0) - matched++ + for _, filter := range filters { + if match := filter.MatchConntrackFlow(flow); match { + req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) + // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already + req2.AddRawData(dataRaw[4:]) + req2.Execute(unix.NETLINK_NETFILTER, 0) + matched++ + // flow is already deleted, no need to match on other filters and continue to the next flow. + break + } } } @@ -128,10 +184,44 @@ func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) return req.Execute(unix.NETLINK_NETFILTER, 0) } +// ProtoInfo wraps an L4-protocol structure - roughly corresponds to the +// __nfct_protoinfo union found in libnetfilter_conntrack/include/internal/object.h. +// Currently, only protocol names, and TCP state is supported. +type ProtoInfo interface { + Protocol() string +} + +// ProtoInfoTCP corresponds to the `tcp` struct of the __nfct_protoinfo union. +// Only TCP state is currently supported. +type ProtoInfoTCP struct { + State uint8 +} +// Protocol returns "tcp". +func (*ProtoInfoTCP) Protocol() string {return "tcp"} +func (p *ProtoInfoTCP) toNlData() ([]*nl.RtAttr, error) { + ctProtoInfo := nl.NewRtAttr(unix.NLA_F_NESTED | nl.CTA_PROTOINFO, []byte{}) + ctProtoInfoTCP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_PROTOINFO_TCP, []byte{}) + ctProtoInfoTCPState := nl.NewRtAttr(nl.CTA_PROTOINFO_TCP_STATE, nl.Uint8Attr(p.State)) + ctProtoInfoTCP.AddChild(ctProtoInfoTCPState) + ctProtoInfo.AddChild(ctProtoInfoTCP) + + return []*nl.RtAttr{ctProtoInfo}, nil +} + +// ProtoInfoSCTP only supports the protocol name. +type ProtoInfoSCTP struct {} +// Protocol returns "sctp". +func (*ProtoInfoSCTP) Protocol() string {return "sctp"} + +// ProtoInfoDCCP only supports the protocol name. +type ProtoInfoDCCP struct {} +// Protocol returns "dccp". +func (*ProtoInfoDCCP) Protocol() string {return "dccp"} + // The full conntrack flow structure is very complicated and can be found in the file: // http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h // For the time being, the structure below allows to parse and extract the base information of a flow -type ipTuple struct { +type IPTuple struct { Bytes uint64 DstIP net.IP DstPort uint16 @@ -141,28 +231,150 @@ type ipTuple struct { SrcPort uint16 } +// toNlData generates the inner fields of a nested tuple netlink datastructure +// does not generate the "nested"-flagged outer message. +func (t *IPTuple) toNlData(family uint8) ([]*nl.RtAttr, error) { + + var srcIPsFlag, dstIPsFlag int + if family == nl.FAMILY_V4 { + srcIPsFlag = nl.CTA_IP_V4_SRC + dstIPsFlag = nl.CTA_IP_V4_DST + } else if family == nl.FAMILY_V6 { + srcIPsFlag = nl.CTA_IP_V6_SRC + dstIPsFlag = nl.CTA_IP_V6_DST + } else { + return []*nl.RtAttr{}, fmt.Errorf("couldn't generate netlink message for tuple due to unrecognized FamilyType '%d'", family) + } + + ctTupleIP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_IP, nil) + ctTupleIPSrc := nl.NewRtAttr(srcIPsFlag, t.SrcIP) + ctTupleIP.AddChild(ctTupleIPSrc) + ctTupleIPDst := nl.NewRtAttr(dstIPsFlag, t.DstIP) + ctTupleIP.AddChild(ctTupleIPDst) + + ctTupleProto := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_PROTO, nil) + ctTupleProtoNum := nl.NewRtAttr(nl.CTA_PROTO_NUM, []byte{t.Protocol}) + ctTupleProto.AddChild(ctTupleProtoNum) + ctTupleProtoSrcPort := nl.NewRtAttr(nl.CTA_PROTO_SRC_PORT, nl.BEUint16Attr(t.SrcPort)) + ctTupleProto.AddChild(ctTupleProtoSrcPort) + ctTupleProtoDstPort := nl.NewRtAttr(nl.CTA_PROTO_DST_PORT, nl.BEUint16Attr(t.DstPort)) + ctTupleProto.AddChild(ctTupleProtoDstPort, ) + + return []*nl.RtAttr{ctTupleIP, ctTupleProto}, nil +} + type ConntrackFlow struct { FamilyType uint8 - Forward ipTuple - Reverse ipTuple + Forward IPTuple + Reverse IPTuple Mark uint32 + Zone uint16 TimeStart uint64 TimeStop uint64 TimeOut uint32 + Labels []byte + ProtoInfo ProtoInfo } func (s *ConntrackFlow) String() string { // conntrack cmd output: - // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 + // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 labels=0x00000000050012ac4202010000000000 zone=100 // start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec) start := time.Unix(0, int64(s.TimeStart)) stop := time.Unix(0, int64(s.TimeStop)) timeout := int32(s.TimeOut) - return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x start=%v stop=%v timeout=%d(sec)", + res := fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x ", nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol, s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes, s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes, - s.Mark, start, stop, timeout) + s.Mark) + if len(s.Labels) > 0 { + res += fmt.Sprintf("labels=0x%x ", s.Labels) + } + if s.Zone != 0 { + res += fmt.Sprintf("zone=%d ", s.Zone) + } + res += fmt.Sprintf("start=%v stop=%v timeout=%d(sec)", start, stop, timeout) + return res +} + +// toNlData generates netlink messages representing the flow. +func (s *ConntrackFlow) toNlData() ([]*nl.RtAttr, error) { + var payload []*nl.RtAttr + // The message structure is built as follows: + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + + // CTA_TUPLE_ORIG + ctTupleOrig := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_ORIG, nil) + forwardFlowAttrs, err := s.Forward.toNlData(s.FamilyType) + if err != nil { + return nil, fmt.Errorf("couldn't generate netlink data for conntrack forward flow: %w", err) + } + for _, a := range forwardFlowAttrs { + ctTupleOrig.AddChild(a) + } + + // CTA_TUPLE_REPLY + ctTupleReply := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_REPLY, nil) + reverseFlowAttrs, err := s.Reverse.toNlData(s.FamilyType) + if err != nil { + return nil, fmt.Errorf("couldn't generate netlink data for conntrack reverse flow: %w", err) + } + for _, a := range reverseFlowAttrs { + ctTupleReply.AddChild(a) + } + + ctMark := nl.NewRtAttr(nl.CTA_MARK, nl.BEUint32Attr(s.Mark)) + ctTimeout := nl.NewRtAttr(nl.CTA_TIMEOUT, nl.BEUint32Attr(s.TimeOut)) + + payload = append(payload, ctTupleOrig, ctTupleReply, ctMark, ctTimeout) + + if s.ProtoInfo != nil { + switch p := s.ProtoInfo.(type) { + case *ProtoInfoTCP: + attrs, err := p.toNlData() + if err != nil { + return nil, fmt.Errorf("couldn't generate netlink data for conntrack flow's TCP protoinfo: %w", err) + } + payload = append(payload, attrs...) + default: + return nil, errors.New("couldn't generate netlink data for conntrack: field 'ProtoInfo' only supports TCP or nil") + } + } + + return payload, nil } // This method parse the ip tuple structure @@ -172,7 +384,7 @@ func (s *ConntrackFlow) String() string { // // // -func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { +func parseIpTuple(reader *bytes.Reader, tpl *IPTuple) uint8 { for i := 0; i < 2; i++ { _, t, _, v := parseNfAttrTLV(reader) switch t { @@ -191,7 +403,7 @@ func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { tpl.Protocol = uint8(v[0]) } // We only parse TCP & UDP headers. Skip the others. - if tpl.Protocol != 6 && tpl.Protocol != 17 { + if tpl.Protocol != unix.IPPROTO_TCP && tpl.Protocol != unix.IPPROTO_UDP { // skip the rest bytesRemaining := protoInfoTotalLen - protoInfoBytesRead reader.Seek(int64(bytesRemaining), seekCurrent) @@ -240,9 +452,13 @@ func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) { return isNested, attrType, len } -func skipNfAttrValue(r *bytes.Reader, len uint16) { +// skipNfAttrValue seeks `r` past attr of length `len`. +// Maintains buffer alignment. +// Returns length of the seek performed. +func skipNfAttrValue(r *bytes.Reader, len uint16) uint16 { len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1) r.Seek(int64(len), seekCurrent) + return len } func parseBERaw16(r *bytes.Reader, v *uint16) { @@ -257,6 +473,10 @@ func parseBERaw64(r *bytes.Reader, v *uint64) { binary.Read(r, binary.BigEndian, v) } +func parseRaw32(r *bytes.Reader, v *uint32) { + binary.Read(r, nl.NativeEndian(), v) +} + func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) { for i := 0; i < 2; i++ { switch _, t, _ := parseNfAttrTL(r); t { @@ -296,6 +516,60 @@ func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) { } +func parseProtoInfoTCPState(r *bytes.Reader) (s uint8) { + binary.Read(r, binary.BigEndian, &s) + r.Seek(nl.SizeofNfattr - 1, seekCurrent) + return s +} + +// parseProtoInfoTCP reads the entire nested protoinfo structure, but only parses the state attr. +func parseProtoInfoTCP(r *bytes.Reader, attrLen uint16) (*ProtoInfoTCP) { + p := new(ProtoInfoTCP) + bytesRead := 0 + for bytesRead < int(attrLen) { + _, t, l := parseNfAttrTL(r) + bytesRead += nl.SizeofNfattr + + switch t { + case nl.CTA_PROTOINFO_TCP_STATE: + p.State = parseProtoInfoTCPState(r) + bytesRead += nl.SizeofNfattr + default: + bytesRead += int(skipNfAttrValue(r, l)) + } + } + + return p +} + +func parseProtoInfo(r *bytes.Reader, attrLen uint16) (p ProtoInfo) { + bytesRead := 0 + for bytesRead < int(attrLen) { + _, t, l := parseNfAttrTL(r) + bytesRead += nl.SizeofNfattr + + switch t { + case nl.CTA_PROTOINFO_TCP: + p = parseProtoInfoTCP(r, l) + bytesRead += int(l) + // No inner fields of DCCP / SCTP currently supported. + case nl.CTA_PROTOINFO_DCCP: + p = new(ProtoInfoDCCP) + skipped := skipNfAttrValue(r, l) + bytesRead += int(skipped) + case nl.CTA_PROTOINFO_SCTP: + p = new(ProtoInfoSCTP) + skipped := skipNfAttrValue(r, l) + bytesRead += int(skipped) + default: + skipped := skipNfAttrValue(r, l) + bytesRead += int(skipped) + } + } + + return p +} + func parseTimeOut(r *bytes.Reader) (ttimeout uint32) { parseBERaw32(r, &ttimeout) return @@ -306,6 +580,18 @@ func parseConnectionMark(r *bytes.Reader) (mark uint32) { return } +func parseConnectionLabels(r *bytes.Reader) (label []byte) { + label = make([]byte, 16) // netfilter defines 128 bit labels value + binary.Read(r, nl.NativeEndian(), &label) + return +} + +func parseConnectionZone(r *bytes.Reader) (zone uint16) { + parseBERaw16(r, &zone) + r.Seek(2, seekCurrent) + return +} + func parseRawData(data []byte) *ConntrackFlow { s := &ConntrackFlow{} // First there is the Nfgenmsg header @@ -343,7 +629,7 @@ func parseRawData(data []byte) *ConntrackFlow { case nl.CTA_TIMESTAMP: s.TimeStart, s.TimeStop = parseTimeStamp(reader, l) case nl.CTA_PROTOINFO: - skipNfAttrValue(reader, l) + s.ProtoInfo = parseProtoInfo(reader, l) default: skipNfAttrValue(reader, l) } @@ -351,10 +637,14 @@ func parseRawData(data []byte) *ConntrackFlow { switch t { case nl.CTA_MARK: s.Mark = parseConnectionMark(reader) + case nl.CTA_LABELS: + s.Labels = parseConnectionLabels(reader) case nl.CTA_TIMEOUT: s.TimeOut = parseTimeOut(reader) - case nl.CTA_STATUS, nl.CTA_USE, nl.CTA_ID: + case nl.CTA_ID, nl.CTA_STATUS, nl.CTA_USE: skipNfAttrValue(reader, l) + case nl.CTA_ZONE: + s.Zone = parseConnectionZone(reader) default: skipNfAttrValue(reader, l) } @@ -399,16 +689,18 @@ func parseRawData(data []byte) *ConntrackFlow { type ConntrackFilterType uint8 const ( - ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction - ConntrackOrigDstIP // -orig-dst ip Destination address from original direction - ConntrackReplySrcIP // --reply-src ip Reply Source IP - ConntrackReplyDstIP // --reply-dst ip Reply Destination IP - ConntrackReplyAnyIP // Match source or destination reply IP - ConntrackOrigSrcPort // --orig-port-src port Source port in original direction - ConntrackOrigDstPort // --orig-port-dst port Destination port in original direction - ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP - ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP - ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP + ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction + ConntrackOrigDstIP // -orig-dst ip Destination address from original direction + ConntrackReplySrcIP // --reply-src ip Reply Source IP + ConntrackReplyDstIP // --reply-dst ip Reply Destination IP + ConntrackReplyAnyIP // Match source or destination reply IP + ConntrackOrigSrcPort // --orig-port-src port Source port in original direction + ConntrackOrigDstPort // --orig-port-dst port Destination port in original direction + ConntrackMatchLabels // --label label1,label2 Labels used in entry + ConntrackUnmatchLabels // --label label1,label2 Labels not used in entry + ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP + ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP + ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP ) type CustomConntrackFilter interface { @@ -421,6 +713,8 @@ type ConntrackFilter struct { ipNetFilter map[ConntrackFilterType]*net.IPNet portFilter map[ConntrackFilterType]uint16 protoFilter uint8 + labelFilter map[ConntrackFilterType][][]byte + zoneFilter *uint16 } // AddIPNet adds a IP subnet to the conntrack filter @@ -474,10 +768,43 @@ func (f *ConntrackFilter) AddProtocol(proto uint8) error { return nil } +// AddLabels adds the provided list (zero or more) of labels to the conntrack filter +// ConntrackFilterType here can be either: +// 1. ConntrackMatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0) +// against the list of provided labels. If `flow.Labels` contains ALL the provided labels +// it is considered a match. This can be used when you want to match flows that contain +// one or more labels. +// 2. ConntrackUnmatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0) +// against the list of provided labels. If `flow.Labels` does NOT contain ALL the provided labels +// it is considered a match. This can be used when you want to match flows that don't contain +// one or more labels. +func (f *ConntrackFilter) AddLabels(tp ConntrackFilterType, labels [][]byte) error { + if len(labels) == 0 { + return errors.New("Invalid length for provided labels") + } + if f.labelFilter == nil { + f.labelFilter = make(map[ConntrackFilterType][][]byte) + } + if _, ok := f.labelFilter[tp]; ok { + return errors.New("Filter attribute already present") + } + f.labelFilter[tp] = labels + return nil +} + +// AddZone adds a zone to the conntrack filter +func (f *ConntrackFilter) AddZone(zone uint16) error { + if f.zoneFilter != nil { + return errors.New("Filter attribute already present") + } + f.zoneFilter = &zone + return nil +} + // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter // false otherwise func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool { - if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 { + if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 && len(f.labelFilter) == 0 && f.zoneFilter == nil { // empty filter always not match return false } @@ -488,6 +815,11 @@ func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool { return false } + // Conntrack zone filter + if f.zoneFilter != nil && *f.zoneFilter != flow.Zone { + return false + } + match := true // IP conntrack filter @@ -531,6 +863,29 @@ func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool { } } + // Label filter + if len(f.labelFilter) > 0 { + if len(flow.Labels) > 0 { + // --label label1,label2 in conn entry; + // every label passed should be contained in flow.Labels for a match to be true + if elem, found := f.labelFilter[ConntrackMatchLabels]; match && found { + for _, label := range elem { + match = match && (bytes.Contains(flow.Labels, label)) + } + } + // --label label1,label2 in conn entry; + // every label passed should be not contained in flow.Labels for a match to be true + if elem, found := f.labelFilter[ConntrackUnmatchLabels]; match && found { + for _, label := range elem { + match = match && !(bytes.Contains(flow.Labels, label)) + } + } + } else { + // flow doesn't contain labels, so it doesn't contain or notContain any provided matches + match = false + } + } + return match } diff --git a/vendor/github.com/vishvananda/netlink/devlink_linux.go b/vendor/github.com/vishvananda/netlink/devlink_linux.go index 358b232c..d98801db 100644 --- a/vendor/github.com/vishvananda/netlink/devlink_linux.go +++ b/vendor/github.com/vishvananda/netlink/devlink_linux.go @@ -84,6 +84,270 @@ type DevlinkDeviceInfo struct { FwUndi string } +// DevlinkResource represents a device resource +type DevlinkResource struct { + Name string + ID uint64 + Size uint64 + SizeNew uint64 + SizeMin uint64 + SizeMax uint64 + SizeGranularity uint64 + PendingChange bool + Unit uint8 + SizeValid bool + OCCValid bool + OCCSize uint64 + Parent *DevlinkResource + Children []DevlinkResource +} + +// parseAttributes parses provided Netlink Attributes and populates DevlinkResource, returns error if occured +func (dlr *DevlinkResource) parseAttributes(attrs map[uint16]syscall.NetlinkRouteAttr) error { + var attr syscall.NetlinkRouteAttr + var ok bool + + // mandatory attributes + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_ID] + if !ok { + return fmt.Errorf("missing resource id") + } + dlr.ID = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_NAME] + if !ok { + return fmt.Errorf("missing resource name") + } + dlr.Name = nl.BytesToString(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE] + if !ok { + return fmt.Errorf("missing resource size") + } + dlr.Size = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_GRAN] + if !ok { + return fmt.Errorf("missing resource size granularity") + } + dlr.SizeGranularity = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_UNIT] + if !ok { + return fmt.Errorf("missing resource unit") + } + dlr.Unit = uint8(attr.Value[0]) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_MIN] + if !ok { + return fmt.Errorf("missing resource size min") + } + dlr.SizeMin = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_MAX] + if !ok { + return fmt.Errorf("missing resource size max") + } + dlr.SizeMax = native.Uint64(attr.Value) + + // optional attributes + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_OCC] + if ok { + dlr.OCCSize = native.Uint64(attr.Value) + dlr.OCCValid = true + } + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_VALID] + if ok { + dlr.SizeValid = uint8(attr.Value[0]) != 0 + } + + dlr.SizeNew = dlr.Size + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_NEW] + if ok { + dlr.SizeNew = native.Uint64(attr.Value) + } + + dlr.PendingChange = dlr.Size != dlr.SizeNew + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_LIST] + if ok { + // handle nested resoruces recursively + subResources, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + + for _, subresource := range subResources { + resource := DevlinkResource{Parent: dlr} + attrs, err := nl.ParseRouteAttrAsMap(subresource.Value) + if err != nil { + return err + } + err = resource.parseAttributes(attrs) + if err != nil { + return fmt.Errorf("failed to parse child resource, parent:%s. %w", dlr.Name, err) + } + dlr.Children = append(dlr.Children, resource) + } + } + return nil +} + +// DevlinkResources represents all devlink resources of a devlink device +type DevlinkResources struct { + Bus string + Device string + Resources []DevlinkResource +} + +// parseAttributes parses provided Netlink Attributes and populates DevlinkResources, returns error if occured +func (dlrs *DevlinkResources) parseAttributes(attrs map[uint16]syscall.NetlinkRouteAttr) error { + var attr syscall.NetlinkRouteAttr + var ok bool + + // Bus + attr, ok = attrs[nl.DEVLINK_ATTR_BUS_NAME] + if !ok { + return fmt.Errorf("missing bus name") + } + dlrs.Bus = nl.BytesToString(attr.Value) + + // Device + attr, ok = attrs[nl.DEVLINK_ATTR_DEV_NAME] + if !ok { + return fmt.Errorf("missing device name") + } + dlrs.Device = nl.BytesToString(attr.Value) + + // Resource List + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_LIST] + if !ok { + return fmt.Errorf("missing resource list") + } + + resourceAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + + for _, resourceAttr := range resourceAttrs { + resource := DevlinkResource{} + attrs, err := nl.ParseRouteAttrAsMap(resourceAttr.Value) + if err != nil { + return err + } + err = resource.parseAttributes(attrs) + if err != nil { + return fmt.Errorf("failed to parse root resoruces, %w", err) + } + dlrs.Resources = append(dlrs.Resources, resource) + } + + return nil +} + +// DevlinkParam represents parameter of the device +type DevlinkParam struct { + Name string + IsGeneric bool + Type uint8 // possible values are in nl.DEVLINK_PARAM_TYPE_* constants + Values []DevlinkParamValue +} + +// DevlinkParamValue contains values of the parameter +// Data field contains specific type which can be casted by unsing info from the DevlinkParam.Type field +type DevlinkParamValue struct { + rawData []byte + Data interface{} + CMODE uint8 // possible values are in nl.DEVLINK_PARAM_CMODE_* constants +} + +// parseAttributes parses provided Netlink Attributes and populates DevlinkParam, returns error if occured +func (dlp *DevlinkParam) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + var valuesList [][]syscall.NetlinkRouteAttr + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.DEVLINK_ATTR_PARAM: + nattrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + for _, nattr := range nattrs { + switch nattr.Attr.Type { + case nl.DEVLINK_ATTR_PARAM_NAME: + dlp.Name = nl.BytesToString(nattr.Value) + case nl.DEVLINK_ATTR_PARAM_GENERIC: + dlp.IsGeneric = true + case nl.DEVLINK_ATTR_PARAM_TYPE: + if len(nattr.Value) == 1 { + dlp.Type = nattr.Value[0] + } + case nl.DEVLINK_ATTR_PARAM_VALUES_LIST: + nnattrs, err := nl.ParseRouteAttr(nattr.Value) + if err != nil { + return err + } + valuesList = append(valuesList, nnattrs) + } + } + } + } + for _, valAttr := range valuesList { + v := DevlinkParamValue{} + if err := v.parseAttributes(valAttr, dlp.Type); err != nil { + return err + } + dlp.Values = append(dlp.Values, v) + } + return nil +} + +func (dlpv *DevlinkParamValue) parseAttributes(attrs []syscall.NetlinkRouteAttr, paramType uint8) error { + for _, attr := range attrs { + nattrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + var rawData []byte + for _, nattr := range nattrs { + switch nattr.Attr.Type { + case nl.DEVLINK_ATTR_PARAM_VALUE_DATA: + rawData = nattr.Value + case nl.DEVLINK_ATTR_PARAM_VALUE_CMODE: + if len(nattr.Value) == 1 { + dlpv.CMODE = nattr.Value[0] + } + } + } + switch paramType { + case nl.DEVLINK_PARAM_TYPE_U8: + dlpv.Data = uint8(0) + if rawData != nil && len(rawData) == 1 { + dlpv.Data = uint8(rawData[0]) + } + case nl.DEVLINK_PARAM_TYPE_U16: + dlpv.Data = uint16(0) + if rawData != nil { + dlpv.Data = native.Uint16(rawData) + } + case nl.DEVLINK_PARAM_TYPE_U32: + dlpv.Data = uint32(0) + if rawData != nil { + dlpv.Data = native.Uint32(rawData) + } + case nl.DEVLINK_PARAM_TYPE_STRING: + dlpv.Data = "" + if rawData != nil { + dlpv.Data = nl.BytesToString(rawData) + } + case nl.DEVLINK_PARAM_TYPE_BOOL: + dlpv.Data = rawData != nil + } + } + return nil +} + func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) { devices := make([]*DevlinkDevice, 0, len(msgs)) for _, m := range msgs { @@ -443,6 +707,173 @@ func (h *Handle) DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint return port, err } +// DevlinkGetDeviceResources returns devlink device resources +func DevlinkGetDeviceResources(bus string, device string) (*DevlinkResources, error) { + return pkgHandle.DevlinkGetDeviceResources(bus, device) +} + +// DevlinkGetDeviceResources returns devlink device resources +func (h *Handle) DevlinkGetDeviceResources(bus string, device string) (*DevlinkResources, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_RESOURCE_DUMP, bus, device) + if err != nil { + return nil, err + } + + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + + var resources DevlinkResources + for _, m := range respmsg { + attrs, err := nl.ParseRouteAttrAsMap(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + resources.parseAttributes(attrs) + } + + return &resources, nil +} + +// DevlinkGetDeviceParams returns parameters for devlink device +// Equivalent to: `devlink dev param show /` +func (h *Handle) DevlinkGetDeviceParams(bus string, device string) ([]*DevlinkParam, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_GET, bus, device) + if err != nil { + return nil, err + } + req.Flags |= unix.NLM_F_DUMP + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + var params []*DevlinkParam + for _, m := range respmsg { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + p := &DevlinkParam{} + if err := p.parseAttributes(attrs); err != nil { + return nil, err + } + params = append(params, p) + } + + return params, nil +} + +// DevlinkGetDeviceParams returns parameters for devlink device +// Equivalent to: `devlink dev param show /` +func DevlinkGetDeviceParams(bus string, device string) ([]*DevlinkParam, error) { + return pkgHandle.DevlinkGetDeviceParams(bus, device) +} + +// DevlinkGetDeviceParamByName returns specific parameter for devlink device +// Equivalent to: `devlink dev param show / name ` +func (h *Handle) DevlinkGetDeviceParamByName(bus string, device string, param string) (*DevlinkParam, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_GET, bus, device) + if err != nil { + return nil, err + } + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_NAME, nl.ZeroTerminated(param))) + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + if len(respmsg) == 0 { + return nil, fmt.Errorf("unexpected response") + } + attrs, err := nl.ParseRouteAttr(respmsg[0][nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + p := &DevlinkParam{} + if err := p.parseAttributes(attrs); err != nil { + return nil, err + } + return p, nil +} + +// DevlinkGetDeviceParamByName returns specific parameter for devlink device +// Equivalent to: `devlink dev param show / name ` +func DevlinkGetDeviceParamByName(bus string, device string, param string) (*DevlinkParam, error) { + return pkgHandle.DevlinkGetDeviceParamByName(bus, device, param) +} + +// DevlinkSetDeviceParam set specific parameter for devlink device +// Equivalent to: `devlink dev param set / name cmode value ` +// cmode argument should contain valid cmode value as uint8, modes are define in nl.DEVLINK_PARAM_CMODE_* constants +// value argument should have one of the following types: uint8, uint16, uint32, string, bool +func (h *Handle) DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error { + // retrive the param type + p, err := h.DevlinkGetDeviceParamByName(bus, device, param) + if err != nil { + return fmt.Errorf("failed to get device param: %v", err) + } + paramType := p.Type + + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_SET, bus, device) + if err != nil { + return err + } + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_TYPE, nl.Uint8Attr(paramType))) + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_NAME, nl.ZeroTerminated(param))) + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_VALUE_CMODE, nl.Uint8Attr(cmode))) + + var valueAsBytes []byte + switch paramType { + case nl.DEVLINK_PARAM_TYPE_U8: + v, ok := value.(uint8) + if !ok { + return fmt.Errorf("unepected value type required: uint8, actual: %T", value) + } + valueAsBytes = nl.Uint8Attr(v) + case nl.DEVLINK_PARAM_TYPE_U16: + v, ok := value.(uint16) + if !ok { + return fmt.Errorf("unepected value type required: uint16, actual: %T", value) + } + valueAsBytes = nl.Uint16Attr(v) + case nl.DEVLINK_PARAM_TYPE_U32: + v, ok := value.(uint32) + if !ok { + return fmt.Errorf("unepected value type required: uint32, actual: %T", value) + } + valueAsBytes = nl.Uint32Attr(v) + case nl.DEVLINK_PARAM_TYPE_STRING: + v, ok := value.(string) + if !ok { + return fmt.Errorf("unepected value type required: string, actual: %T", value) + } + valueAsBytes = nl.ZeroTerminated(v) + case nl.DEVLINK_PARAM_TYPE_BOOL: + v, ok := value.(bool) + if !ok { + return fmt.Errorf("unepected value type required: bool, actual: %T", value) + } + if v { + valueAsBytes = []byte{} + } + default: + return fmt.Errorf("unsupported parameter type: %d", paramType) + } + if valueAsBytes != nil { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_VALUE_DATA, valueAsBytes)) + } + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevlinkSetDeviceParam set specific parameter for devlink device +// Equivalent to: `devlink dev param set / name cmode value ` +// cmode argument should contain valid cmode value as uint8, modes are define in nl.DEVLINK_PARAM_CMODE_* constants +// value argument should have one of the following types: uint8, uint16, uint32, string, bool +func DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error { + return pkgHandle.DevlinkSetDeviceParam(bus, device, param, cmode, value) +} + // DevLinkGetPortByIndex provides a pointer to devlink portand nil error, // otherwise returns an error code. func DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) { diff --git a/vendor/github.com/vishvananda/netlink/filter.go b/vendor/github.com/vishvananda/netlink/filter.go index 2d798b0f..84e1ca7a 100644 --- a/vendor/github.com/vishvananda/netlink/filter.go +++ b/vendor/github.com/vishvananda/netlink/filter.go @@ -19,6 +19,7 @@ type FilterAttrs struct { Parent uint32 Priority uint16 // lower is higher priority Protocol uint16 // unix.ETH_P_* + Chain *uint32 } func (q FilterAttrs) String() string { @@ -27,6 +28,11 @@ func (q FilterAttrs) String() string { type TcAct int32 +const ( + TC_ACT_EXT_SHIFT = 28 + TC_ACT_EXT_VAL_MASK = (1 << TC_ACT_EXT_SHIFT) - 1 +) + const ( TC_ACT_UNSPEC TcAct = -1 TC_ACT_OK TcAct = 0 @@ -40,6 +46,22 @@ const ( TC_ACT_JUMP TcAct = 0x10000000 ) +func getTcActExt(local int32) int32 { + return local << TC_ACT_EXT_SHIFT +} + +func getTcActGotoChain() TcAct { + return TcAct(getTcActExt(2)) +} + +func getTcActExtOpcode(combined int32) int32 { + return combined & (^TC_ACT_EXT_VAL_MASK) +} + +func TcActExtCmp(combined int32, opcode int32) bool { + return getTcActExtOpcode(combined) == opcode +} + func (a TcAct) String() string { switch a { case TC_ACT_UNSPEC: @@ -63,6 +85,9 @@ func (a TcAct) String() string { case TC_ACT_JUMP: return "jump" } + if TcActExtCmp(int32(a), int32(getTcActGotoChain())) { + return "goto" + } return fmt.Sprintf("0x%x", int32(a)) } @@ -93,17 +118,32 @@ func (a TcPolAct) String() string { } type ActionAttrs struct { - Index int - Capab int - Action TcAct - Refcnt int - Bindcnt int + Index int + Capab int + Action TcAct + Refcnt int + Bindcnt int + Statistics *ActionStatistic + Timestamp *ActionTimestamp } func (q ActionAttrs) String() string { return fmt.Sprintf("{Index: %d, Capab: %x, Action: %s, Refcnt: %d, Bindcnt: %d}", q.Index, q.Capab, q.Action.String(), q.Refcnt, q.Bindcnt) } +type ActionTimestamp struct { + Installed uint64 + LastUsed uint64 + Expires uint64 + FirstUsed uint64 +} + +func (t ActionTimestamp) String() string { + return fmt.Sprintf("Installed %d LastUsed %d Expires %d FirstUsed %d", t.Installed, t.LastUsed, t.Expires, t.FirstUsed) +} + +type ActionStatistic ClassStatistics + // Action represents an action in any supported filter. type Action interface { Attrs() *ActionAttrs @@ -112,6 +152,7 @@ type Action interface { type GenericAction struct { ActionAttrs + Chain int32 } func (action *GenericAction) Type() string { @@ -275,6 +316,7 @@ type SkbEditAction struct { PType *uint16 Priority *uint32 Mark *uint32 + Mask *uint32 } func (action *SkbEditAction) Type() string { @@ -348,6 +390,7 @@ type FwFilter struct { InDev string Mask uint32 Police *PoliceAction + Actions []Action } func (filter *FwFilter) Attrs() *FilterAttrs { @@ -390,3 +433,30 @@ func (filter *GenericFilter) Attrs() *FilterAttrs { func (filter *GenericFilter) Type() string { return filter.FilterType } + +type PeditAction struct { + ActionAttrs + Proto uint8 + SrcMacAddr net.HardwareAddr + DstMacAddr net.HardwareAddr + SrcIP net.IP + DstIP net.IP + SrcPort uint16 + DstPort uint16 +} + +func (p *PeditAction) Attrs() *ActionAttrs { + return &p.ActionAttrs +} + +func (p *PeditAction) Type() string { + return "pedit" +} + +func NewPeditAction() *PeditAction { + return &PeditAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} diff --git a/vendor/github.com/vishvananda/netlink/filter_linux.go b/vendor/github.com/vishvananda/netlink/filter_linux.go index 4c6d1cf7..87cd18f8 100644 --- a/vendor/github.com/vishvananda/netlink/filter_linux.go +++ b/vendor/github.com/vishvananda/netlink/filter_linux.go @@ -41,6 +41,7 @@ type U32 struct { RedirIndex int Sel *TcU32Sel Actions []Action + Police *PoliceAction } func (filter *U32) Attrs() *FilterAttrs { @@ -64,6 +65,11 @@ type Flower struct { EncSrcIPMask net.IPMask EncDestPort uint16 EncKeyId uint32 + SkipHw bool + SkipSw bool + IPProto *nl.IPProto + DestPort uint16 + SrcPort uint16 Actions []Action } @@ -129,6 +135,39 @@ func (filter *Flower) encode(parent *nl.RtAttr) error { if filter.EncKeyId != 0 { parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_KEY_ID, htonl(filter.EncKeyId)) } + if filter.IPProto != nil { + ipproto := *filter.IPProto + parent.AddRtAttr(nl.TCA_FLOWER_KEY_IP_PROTO, ipproto.Serialize()) + if filter.SrcPort != 0 { + switch ipproto { + case nl.IPPROTO_TCP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_SRC, htons(filter.SrcPort)) + case nl.IPPROTO_UDP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_SRC, htons(filter.SrcPort)) + case nl.IPPROTO_SCTP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_SRC, htons(filter.SrcPort)) + } + } + if filter.DestPort != 0 { + switch ipproto { + case nl.IPPROTO_TCP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_DST, htons(filter.DestPort)) + case nl.IPPROTO_UDP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_DST, htons(filter.DestPort)) + case nl.IPPROTO_SCTP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_DST, htons(filter.DestPort)) + } + } + } + + var flags uint32 = 0 + if filter.SkipHw { + flags |= nl.TCA_CLS_FLAGS_SKIP_HW + } + if filter.SkipSw { + flags |= nl.TCA_CLS_FLAGS_SKIP_SW + } + parent.AddRtAttr(nl.TCA_FLOWER_FLAGS, htonl(flags)) actionsAttr := parent.AddRtAttr(nl.TCA_FLOWER_ACT, nil) if err := EncodeActions(actionsAttr, filter.Actions); err != nil { @@ -162,6 +201,14 @@ func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error { filter.EncDestPort = ntohs(datum.Value) case nl.TCA_FLOWER_KEY_ENC_KEY_ID: filter.EncKeyId = ntohl(datum.Value) + case nl.TCA_FLOWER_KEY_IP_PROTO: + val := new(nl.IPProto) + *val = nl.IPProto(datum.Value[0]) + filter.IPProto = val + case nl.TCA_FLOWER_KEY_TCP_SRC, nl.TCA_FLOWER_KEY_UDP_SRC, nl.TCA_FLOWER_KEY_SCTP_SRC: + filter.SrcPort = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_TCP_DST, nl.TCA_FLOWER_KEY_UDP_DST, nl.TCA_FLOWER_KEY_SCTP_DST: + filter.DestPort = ntohs(datum.Value) case nl.TCA_FLOWER_ACT: tables, err := nl.ParseRouteAttr(datum.Value) if err != nil { @@ -171,6 +218,16 @@ func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error { if err != nil { return err } + case nl.TCA_FLOWER_FLAGS: + attr := nl.DeserializeUint32Bitfield(datum.Value) + skipSw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_HW + skipHw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_SW + if skipSw != 0 { + filter.SkipSw = true + } + if skipHw != 0 { + filter.SkipHw = true + } } } return nil @@ -185,19 +242,7 @@ func FilterDel(filter Filter) error { // FilterDel will delete a filter from the system. // Equivalent to: `tc filter del $filter` func (h *Handle) FilterDel(filter Filter) error { - req := h.newNetlinkRequest(unix.RTM_DELTFILTER, unix.NLM_F_ACK) - base := filter.Attrs() - msg := &nl.TcMsg{ - Family: nl.FAMILY_ALL, - Ifindex: int32(base.LinkIndex), - Handle: base.Handle, - Parent: base.Parent, - Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), - } - req.AddData(msg) - - _, err := req.Execute(unix.NETLINK_ROUTE, 0) - return err + return h.filterModify(filter, unix.RTM_DELTFILTER, 0) } // FilterAdd will add a filter to the system. @@ -209,7 +254,7 @@ func FilterAdd(filter Filter) error { // FilterAdd will add a filter to the system. // Equivalent to: `tc filter add $filter` func (h *Handle) FilterAdd(filter Filter) error { - return h.filterModify(filter, unix.NLM_F_CREATE|unix.NLM_F_EXCL) + return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE|unix.NLM_F_EXCL) } // FilterReplace will replace a filter. @@ -221,11 +266,11 @@ func FilterReplace(filter Filter) error { // FilterReplace will replace a filter. // Equivalent to: `tc filter replace $filter` func (h *Handle) FilterReplace(filter Filter) error { - return h.filterModify(filter, unix.NLM_F_CREATE) + return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE) } -func (h *Handle) filterModify(filter Filter, flags int) error { - req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK) +func (h *Handle) filterModify(filter Filter, proto, flags int) error { + req := h.newNetlinkRequest(proto, flags|unix.NLM_F_ACK) base := filter.Attrs() msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, @@ -235,6 +280,9 @@ func (h *Handle) filterModify(filter Filter, flags int) error { Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), } req.AddData(msg) + if filter.Attrs().Chain != nil { + req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(*filter.Attrs().Chain))) + } req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) @@ -284,6 +332,12 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if filter.Link != 0 { options.AddRtAttr(nl.TCA_U32_LINK, nl.Uint32Attr(filter.Link)) } + if filter.Police != nil { + police := options.AddRtAttr(nl.TCA_U32_POLICE, nil) + if err := encodePolice(police, filter.Police); err != nil { + return err + } + } actionsAttr := options.AddRtAttr(nl.TCA_U32_ACT, nil) // backwards compatibility if filter.RedirIndex != 0 { @@ -312,6 +366,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error { native.PutUint32(b, filter.ClassId) options.AddRtAttr(nl.TCA_FW_CLASSID, b) } + actionsAttr := options.AddRtAttr(nl.TCA_FW_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } case *BpfFilter: var bpfFlags uint32 if filter.ClassId != 0 { @@ -340,7 +398,6 @@ func (h *Handle) filterModify(filter Filter, flags int) error { return err } } - req.AddData(options) _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err @@ -446,6 +503,10 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { default: detailed = true } + case nl.TCA_CHAIN: + val := new(uint32) + *val = native.Uint32(attr.Value) + base.Chain = val } } // only return the detailed version of the filter @@ -474,6 +535,14 @@ func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) { attrs.Bindcnt = int(tcgen.Bindcnt) } +func toTimeStamp(tcf *nl.Tcf) *ActionTimestamp { + return &ActionTimestamp{ + Installed: tcf.Install, + LastUsed: tcf.LastUse, + Expires: tcf.Expires, + FirstUsed: tcf.FirstUse} +} + func encodePolice(attr *nl.RtAttr, action *PoliceAction) error { var rtab [256]uint32 var ptab [256]uint32 @@ -597,6 +666,9 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { if action.Mark != nil { aopts.AddRtAttr(nl.TCA_SKBEDIT_MARK, nl.Uint32Attr(*action.Mark)) } + if action.Mask != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_MASK, nl.Uint32Attr(*action.Mask)) + } case *ConnmarkAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -635,6 +707,29 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { gen := nl.TcGen{} toTcGen(action.Attrs(), &gen) aopts.AddRtAttr(nl.TCA_GACT_PARMS, gen.Serialize()) + case *PeditAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + pedit := nl.TcPedit{} + if action.SrcMacAddr != nil { + pedit.SetEthSrc(action.SrcMacAddr) + } + if action.DstMacAddr != nil { + pedit.SetEthDst(action.DstMacAddr) + } + if action.SrcIP != nil { + pedit.SetSrcIP(action.SrcIP) + } + if action.DstIP != nil { + pedit.SetDstIP(action.DstIP) + } + if action.SrcPort != 0 { + pedit.SetSrcPort(action.SrcPort, action.Proto) + } + if action.DstPort != 0 { + pedit.SetDstPort(action.DstPort, action.Proto) + } + pedit.Encode(table) } } return nil @@ -668,6 +763,8 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { for _, table := range tables { var action Action var actionType string + var actionnStatistic *ActionStatistic + var actionTimestamp *ActionTimestamp aattrs, err := nl.ParseRouteAttr(table.Value) if err != nil { return nil, err @@ -695,6 +792,8 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action = &SkbEditAction{} case "police": action = &PoliceAction{} + case "pedit": + action = &PeditAction{} default: break nextattr } @@ -713,7 +812,11 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { toAttrs(&mirred.TcGen, action.Attrs()) action.(*MirredAction).Ifindex = int(mirred.Ifindex) action.(*MirredAction).MirredAction = MirredAct(mirred.Eaction) + case nl.TCA_MIRRED_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } + case "tunnel_key": switch adatum.Attr.Type { case nl.TCA_TUNNEL_KEY_PARMS: @@ -729,6 +832,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*TunnelKeyAction).DstAddr = adatum.Value[:] case nl.TCA_TUNNEL_KEY_ENC_DST_PORT: action.(*TunnelKeyAction).DestPort = ntohs(adatum.Value) + case nl.TCA_TUNNEL_KEY_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "skbedit": switch adatum.Attr.Type { @@ -739,6 +845,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { case nl.TCA_SKBEDIT_MARK: mark := native.Uint32(adatum.Value[0:4]) action.(*SkbEditAction).Mark = &mark + case nl.TCA_SKBEDIT_MASK: + mask := native.Uint32(adatum.Value[0:4]) + action.(*SkbEditAction).Mask = &mask case nl.TCA_SKBEDIT_PRIORITY: priority := native.Uint32(adatum.Value[0:4]) action.(*SkbEditAction).Priority = &priority @@ -748,6 +857,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { case nl.TCA_SKBEDIT_QUEUE_MAPPING: mapping := native.Uint16(adatum.Value[0:2]) action.(*SkbEditAction).QueueMapping = &mapping + case nl.TCA_SKBEDIT_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "bpf": switch adatum.Attr.Type { @@ -758,6 +870,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*BpfAction).Fd = int(native.Uint32(adatum.Value[0:4])) case nl.TCA_ACT_BPF_NAME: action.(*BpfAction).Name = string(adatum.Value[:len(adatum.Value)-1]) + case nl.TCA_ACT_BPF_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "connmark": switch adatum.Attr.Type { @@ -766,6 +881,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*ConnmarkAction).ActionAttrs = ActionAttrs{} toAttrs(&connmark.TcGen, action.Attrs()) action.(*ConnmarkAction).Zone = connmark.Zone + case nl.TCA_CONNMARK_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "csum": switch adatum.Attr.Type { @@ -774,19 +892,36 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*CsumAction).ActionAttrs = ActionAttrs{} toAttrs(&csum.TcGen, action.Attrs()) action.(*CsumAction).UpdateFlags = CsumUpdateFlags(csum.UpdateFlags) + case nl.TCA_CSUM_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "gact": switch adatum.Attr.Type { case nl.TCA_GACT_PARMS: gen := *nl.DeserializeTcGen(adatum.Value) toAttrs(&gen, action.Attrs()) + if action.Attrs().Action.String() == "goto" { + action.(*GenericAction).Chain = TC_ACT_EXT_VAL_MASK & gen.Action + } + case nl.TCA_GACT_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "police": parsePolice(adatum, action.(*PoliceAction)) } } + case nl.TCA_ACT_STATS: + s, err := parseTcStats2(aattr.Value) + if err != nil { + return nil, err + } + actionnStatistic = (*ActionStatistic)(s) } } + action.Attrs().Statistics = actionnStatistic + action.Attrs().Timestamp = actionTimestamp actions = append(actions, action) } return actions, nil @@ -824,6 +959,13 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) u32.RedirIndex = int(action.Ifindex) } } + case nl.TCA_U32_POLICE: + var police PoliceAction + adata, _ := nl.ParseRouteAttr(datum.Value) + for _, aattr := range adata { + parsePolice(aattr, &police) + } + u32.Police = &police case nl.TCA_U32_CLASSID: u32.ClassId = native.Uint32(datum.Value) case nl.TCA_U32_DIVISOR: @@ -855,6 +997,15 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { parsePolice(aattr, &police) } fw.Police = &police + case nl.TCA_FW_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + fw.Actions, err = parseActions(tables) + if err != nil { + return detailed, err + } } } return detailed, nil diff --git a/vendor/github.com/vishvananda/netlink/handle_unspecified.go b/vendor/github.com/vishvananda/netlink/handle_unspecified.go index cc94a4e0..3fe03642 100644 --- a/vendor/github.com/vishvananda/netlink/handle_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/handle_unspecified.go @@ -79,6 +79,10 @@ func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { return ErrNotImplemented } +func (h *Handle) LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + return ErrNotImplemented +} + func (h *Handle) LinkSetVfTxRate(link Link, vf, rate int) error { return ErrNotImplemented } @@ -163,6 +167,22 @@ func (h *Handle) LinkSetGroup(link Link, group int) error { return ErrNotImplemented } +func (h *Handle) LinkSetGSOMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func (h *Handle) LinkSetGROMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func (h *Handle) LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func (h *Handle) LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { return ErrNotImplemented } @@ -243,6 +263,10 @@ func (h *Handle) RouteAppend(route *Route) error { return ErrNotImplemented } +func (h *Handle) RouteChange(route *Route) error { + return ErrNotImplemented +} + func (h *Handle) RouteDel(route *Route) error { return ErrNotImplemented } diff --git a/vendor/github.com/vishvananda/netlink/inet_diag.go b/vendor/github.com/vishvananda/netlink/inet_diag.go index bee391a8..2904d964 100644 --- a/vendor/github.com/vishvananda/netlink/inet_diag.go +++ b/vendor/github.com/vishvananda/netlink/inet_diag.go @@ -21,6 +21,10 @@ const ( INET_DIAG_BBRINFO INET_DIAG_CLASS_ID INET_DIAG_MD5SIG + INET_DIAG_ULP_INFO + INET_DIAG_SK_BPF_STORAGES + INET_DIAG_CGROUP_ID + INET_DIAG_SOCKOPT INET_DIAG_MAX ) @@ -29,3 +33,8 @@ type InetDiagTCPInfoResp struct { TCPInfo *TCPInfo TCPBBRInfo *TCPBBRInfo } + +type InetDiagUDPInfoResp struct { + InetDiagMsg *Socket + Memory *MemInfo +} diff --git a/vendor/github.com/vishvananda/netlink/ipset_linux.go b/vendor/github.com/vishvananda/netlink/ipset_linux.go index 1f4eae81..f4c05229 100644 --- a/vendor/github.com/vishvananda/netlink/ipset_linux.go +++ b/vendor/github.com/vishvananda/netlink/ipset_linux.go @@ -67,11 +67,13 @@ type IpsetCreateOptions struct { Comments bool Skbinfo bool - Revision uint8 - IPFrom net.IP - IPTo net.IP - PortFrom uint16 - PortTo uint16 + Family uint8 + Revision uint8 + IPFrom net.IP + IPTo net.IP + PortFrom uint16 + PortTo uint16 + MaxElements uint32 } // IpsetProtocol returns the ipset protocol version from the kernel @@ -94,6 +96,11 @@ func IpsetFlush(setname string) error { return pkgHandle.IpsetFlush(setname) } +// IpsetSwap swaps two ipsets. +func IpsetSwap(setname, othersetname string) error { + return pkgHandle.IpsetSwap(setname, othersetname) +} + // IpsetList dumps an specific ipset. func IpsetList(setname string) (*IPSetResult, error) { return pkgHandle.IpsetList(setname) @@ -114,6 +121,11 @@ func IpsetDel(setname string, entry *IPSetEntry) error { return pkgHandle.IpsetDel(setname, entry) } +// IpsetTest tests whether an entry is in a set or not. +func IpsetTest(setname string, entry *IPSetEntry) (bool, error) { + return pkgHandle.IpsetTest(setname, entry) +} + func (h *Handle) IpsetProtocol() (protocol uint8, minVersion uint8, err error) { req := h.newIpsetRequest(nl.IPSET_CMD_PROTOCOL) msgs, err := req.Execute(unix.NETLINK_NETFILTER, 0) @@ -153,11 +165,18 @@ func (h *Handle) IpsetCreate(setname, typename string, options IpsetCreateOption data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_FROM|int(nl.NLA_F_NET_BYTEORDER), buf[:2])) data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_TO|int(nl.NLA_F_NET_BYTEORDER), buf[2:])) default: - family = unix.AF_INET + family = options.Family + if family == 0 { + family = unix.AF_INET + } } req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_FAMILY, nl.Uint8Attr(family))) + if options.MaxElements != 0 { + data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_MAXELEM | nl.NLA_F_NET_BYTEORDER, Value: options.MaxElements}) + } + if timeout := options.Timeout; timeout != nil { data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *timeout}) } @@ -197,6 +216,14 @@ func (h *Handle) IpsetFlush(setname string) error { return err } +func (h *Handle) IpsetSwap(setname, othersetname string) error { + req := h.newIpsetRequest(nl.IPSET_CMD_SWAP) + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname))) + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_TYPENAME, nl.ZeroTerminated(othersetname))) + _, err := ipsetExecute(req) + return err +} + func (h *Handle) IpsetList(name string) (*IPSetResult, error) { req := h.newIpsetRequest(nl.IPSET_CMD_LIST) req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(name))) @@ -236,18 +263,23 @@ func (h *Handle) IpsetDel(setname string, entry *IPSetEntry) error { return h.ipsetAddDel(nl.IPSET_CMD_DEL, setname, entry) } -func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error { - req := h.newIpsetRequest(nlCmd) - req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname))) - - if entry.Comment != "" { - req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_COMMENT, nl.ZeroTerminated(entry.Comment))) +func encodeIP(ip net.IP) (*nl.RtAttr, error) { + typ := int(nl.NLA_F_NET_BYTEORDER) + if ip4 := ip.To4(); ip4 != nil { + typ |= nl.IPSET_ATTR_IPADDR_IPV4 + ip = ip4 + } else { + typ |= nl.IPSET_ATTR_IPADDR_IPV6 } + return nl.NewRtAttr(typ, ip), nil +} + +func buildEntryData(entry *IPSetEntry) (*nl.RtAttr, error) { data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil) - if !entry.Replace { - req.Flags |= unix.NLM_F_EXCL + if entry.Comment != "" { + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_COMMENT, nl.ZeroTerminated(entry.Comment))) } if entry.Timeout != nil { @@ -255,7 +287,10 @@ func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error } if entry.IP != nil { - nestedData := nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NET_BYTEORDER), entry.IP) + nestedData, err := encodeIP(entry.IP) + if err != nil { + return nil, err + } data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NESTED), nestedData.Serialize())) } @@ -268,7 +303,10 @@ func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error } if entry.IP2 != nil { - nestedData := nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NET_BYTEORDER), entry.IP2) + nestedData, err := encodeIP(entry.IP2) + if err != nil { + return nil, err + } data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP2|int(nl.NLA_F_NESTED), nestedData.Serialize())) } @@ -295,14 +333,53 @@ func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error if entry.Mark != nil { data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER, Value: *entry.Mark}) } + return data, nil +} +func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error { + req := h.newIpsetRequest(nlCmd) + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname))) + + if !entry.Replace { + req.Flags |= unix.NLM_F_EXCL + } + + data, err := buildEntryData(entry) + if err != nil { + return err + } data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_LINENO | nl.NLA_F_NET_BYTEORDER, Value: 0}) req.AddData(data) - _, err := ipsetExecute(req) + _, err = ipsetExecute(req) return err } +func (h *Handle) IpsetTest(setname string, entry *IPSetEntry) (bool, error) { + req := h.newIpsetRequest(nl.IPSET_CMD_TEST) + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname))) + + if !entry.Replace { + req.Flags |= unix.NLM_F_EXCL + } + + data, err := buildEntryData(entry) + if err != nil { + return false, err + } + req.AddData(data) + + _, err = ipsetExecute(req) + if err != nil { + if err == nl.IPSetError(nl.IPSET_ERR_EXIST) { + // not exist + return false, nil + } + return false, err + } + return true, nil +} + func (h *Handle) newIpsetRequest(cmd int) *nl.NetlinkRequest { req := h.newNetlinkRequest(cmd|(unix.NFNL_SUBSYS_IPSET<<8), nl.GetIpsetFlags(cmd)) @@ -466,7 +543,7 @@ func parseIPSetEntry(data []byte) (entry IPSetEntry) { case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: for attr := range nl.ParseAttributes(attr.Value) { switch attr.Type { - case nl.IPSET_ATTR_IP: + case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6: entry.IP = net.IP(attr.Value) default: log.Printf("unknown nested ADT attribute from kernel: %+v", attr) @@ -475,7 +552,7 @@ func parseIPSetEntry(data []byte) (entry IPSetEntry) { case nl.IPSET_ATTR_IP2 | nl.NLA_F_NESTED: for attr := range nl.ParseAttributes(attr.Value) { switch attr.Type { - case nl.IPSET_ATTR_IP: + case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6: entry.IP2 = net.IP(attr.Value) default: log.Printf("unknown nested ADT attribute from kernel: %+v", attr) diff --git a/vendor/github.com/vishvananda/netlink/link.go b/vendor/github.com/vishvananda/netlink/link.go index 33c87233..f820cdb6 100644 --- a/vendor/github.com/vishvananda/netlink/link.go +++ b/vendor/github.com/vishvananda/netlink/link.go @@ -22,34 +22,41 @@ type ( // LinkAttrs represents data shared by most link types type LinkAttrs struct { - Index int - MTU int - TxQLen int // Transmit Queue Length - Name string - HardwareAddr net.HardwareAddr - Flags net.Flags - RawFlags uint32 - ParentIndex int // index of the parent link device - MasterIndex int // must be the index of a bridge - Namespace interface{} // nil | NsPid | NsFd - Alias string - Statistics *LinkStatistics - Promisc int - Allmulti int - Multi int - Xdp *LinkXdp - EncapType string - Protinfo *Protinfo - OperState LinkOperState - PhysSwitchID int - NetNsID int - NumTxQueues int - NumRxQueues int - GSOMaxSize uint32 - GSOMaxSegs uint32 - Vfs []VfInfo // virtual functions available on link - Group uint32 - Slave LinkSlave + Index int + MTU int + TxQLen int // Transmit Queue Length + Name string + HardwareAddr net.HardwareAddr + Flags net.Flags + RawFlags uint32 + ParentIndex int // index of the parent link device + MasterIndex int // must be the index of a bridge + Namespace interface{} // nil | NsPid | NsFd + Alias string + AltNames []string + Statistics *LinkStatistics + Promisc int + Allmulti int + Multi int + Xdp *LinkXdp + EncapType string + Protinfo *Protinfo + OperState LinkOperState + PhysSwitchID int + NetNsID int + NumTxQueues int + NumRxQueues int + TSOMaxSegs uint32 + TSOMaxSize uint32 + GSOMaxSegs uint32 + GSOMaxSize uint32 + GROMaxSize uint32 + GSOIPv4MaxSize uint32 + GROIPv4MaxSize uint32 + Vfs []VfInfo // virtual functions available on link + Group uint32 + PermHWAddr net.HardwareAddr + Slave LinkSlave } // LinkSlave represents a slave device. @@ -63,6 +70,7 @@ type VfInfo struct { Mac net.HardwareAddr Vlan int Qos int + VlanProto int TxRate int // IFLA_VF_TX_RATE Max TxRate Spoofchk bool LinkState uint32 @@ -265,6 +273,8 @@ type Bridge struct { AgeingTime *uint32 HelloTime *uint32 VlanFiltering *bool + VlanDefaultPVID *uint16 + GroupFwdMask *uint16 } func (bridge *Bridge) Attrs() *LinkAttrs { @@ -308,6 +318,9 @@ type Macvlan struct { // MACAddrs is only populated for Macvlan SOURCE links MACAddrs []net.HardwareAddr + + BCQueueLen uint32 + UsedBCQueueLen uint32 } func (macvlan *Macvlan) Attrs() *LinkAttrs { @@ -350,6 +363,46 @@ func (tuntap *Tuntap) Type() string { return "tuntap" } +type NetkitMode uint32 + +const ( + NETKIT_MODE_L2 NetkitMode = iota + NETKIT_MODE_L3 +) + +type NetkitPolicy int + +const ( + NETKIT_POLICY_FORWARD NetkitPolicy = 0 + NETKIT_POLICY_BLACKHOLE NetkitPolicy = 2 +) + +func (n *Netkit) IsPrimary() bool { + return n.isPrimary +} + +// SetPeerAttrs will not take effect if trying to modify an existing netkit device +func (n *Netkit) SetPeerAttrs(Attrs *LinkAttrs) { + n.peerLinkAttrs = *Attrs +} + +type Netkit struct { + LinkAttrs + Mode NetkitMode + Policy NetkitPolicy + PeerPolicy NetkitPolicy + isPrimary bool + peerLinkAttrs LinkAttrs +} + +func (n *Netkit) Attrs() *LinkAttrs { + return &n.LinkAttrs +} + +func (n *Netkit) Type() string { + return "netkit" +} + // Veth devices must specify PeerName on create type Veth struct { LinkAttrs @@ -703,6 +756,7 @@ const ( BOND_XMIT_HASH_POLICY_LAYER2_3 BOND_XMIT_HASH_POLICY_ENCAP2_3 BOND_XMIT_HASH_POLICY_ENCAP3_4 + BOND_XMIT_HASH_POLICY_VLAN_SRCMAC BOND_XMIT_HASH_POLICY_UNKNOWN ) @@ -712,6 +766,7 @@ var bondXmitHashPolicyToString = map[BondXmitHashPolicy]string{ BOND_XMIT_HASH_POLICY_LAYER2_3: "layer2+3", BOND_XMIT_HASH_POLICY_ENCAP2_3: "encap2+3", BOND_XMIT_HASH_POLICY_ENCAP3_4: "encap3+4", + BOND_XMIT_HASH_POLICY_VLAN_SRCMAC: "vlan+srcmac", } var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{ "layer2": BOND_XMIT_HASH_POLICY_LAYER2, @@ -719,6 +774,7 @@ var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{ "layer2+3": BOND_XMIT_HASH_POLICY_LAYER2_3, "encap2+3": BOND_XMIT_HASH_POLICY_ENCAP2_3, "encap3+4": BOND_XMIT_HASH_POLICY_ENCAP3_4, + "vlan+srcmac": BOND_XMIT_HASH_POLICY_VLAN_SRCMAC, } // BondLacpRate type @@ -974,16 +1030,18 @@ func (v *VrfSlave) SlaveType() string { // https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/drivers/net/geneve.c#L1209-L1223 type Geneve struct { LinkAttrs - ID uint32 // vni - Remote net.IP - Ttl uint8 - Tos uint8 - Dport uint16 - UdpCsum uint8 - UdpZeroCsum6Tx uint8 - UdpZeroCsum6Rx uint8 - Link uint32 - FlowBased bool + ID uint32 // vni + Remote net.IP + Ttl uint8 + Tos uint8 + Dport uint16 + UdpCsum uint8 + UdpZeroCsum6Tx uint8 + UdpZeroCsum6Rx uint8 + Link uint32 + FlowBased bool + InnerProtoInherit bool + Df GeneveDf } func (geneve *Geneve) Attrs() *LinkAttrs { @@ -994,6 +1052,15 @@ func (geneve *Geneve) Type() string { return "geneve" } +type GeneveDf uint8 + +const ( + GENEVE_DF_UNSET GeneveDf = iota + GENEVE_DF_SET + GENEVE_DF_INHERIT + GENEVE_DF_MAX +) + // Gretap devices must specify LocalIP and RemoteIP on create type Gretap struct { LinkAttrs @@ -1064,6 +1131,7 @@ type Ip6tnl struct { EncapFlags uint16 EncapSport uint16 EncapDport uint16 + FlowBased bool } func (ip6tnl *Ip6tnl) Attrs() *LinkAttrs { @@ -1165,6 +1233,7 @@ type Gretun struct { EncapFlags uint16 EncapSport uint16 EncapDport uint16 + FlowBased bool } func (gretun *Gretun) Attrs() *LinkAttrs { @@ -1208,6 +1277,7 @@ func (gtp *GTP) Type() string { } // Virtual XFRM Interfaces +// // Named "xfrmi" to prevent confusion with XFRM objects type Xfrmi struct { LinkAttrs diff --git a/vendor/github.com/vishvananda/netlink/link_linux.go b/vendor/github.com/vishvananda/netlink/link_linux.go index 276947a0..d713612a 100644 --- a/vendor/github.com/vishvananda/netlink/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/link_linux.go @@ -345,6 +345,16 @@ func (h *Handle) BridgeSetVlanFiltering(link Link, on bool) error { return h.linkModify(bridge, unix.NLM_F_ACK) } +func BridgeSetVlanDefaultPVID(link Link, pvid uint16) error { + return pkgHandle.BridgeSetVlanDefaultPVID(link, pvid) +} + +func (h *Handle) BridgeSetVlanDefaultPVID(link Link, pvid uint16) error { + bridge := link.(*Bridge) + bridge.VlanDefaultPVID = &pvid + return h.linkModify(bridge, unix.NLM_F_ACK) +} + func SetPromiscOn(link Link) error { return pkgHandle.SetPromiscOn(link) } @@ -487,6 +497,58 @@ func (h *Handle) LinkSetAlias(link Link, name string) error { return err } +// LinkAddAltName adds a new alternative name for the link device. +// Equivalent to: `ip link property add $link altname $name` +func LinkAddAltName(link Link, name string) error { + return pkgHandle.LinkAddAltName(link, name) +} + +// LinkAddAltName adds a new alternative name for the link device. +// Equivalent to: `ip link property add $link altname $name` +func (h *Handle) LinkAddAltName(link Link, name string) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINKPROP, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_PROP_LIST|unix.NLA_F_NESTED, nil) + data.AddRtAttr(unix.IFLA_ALT_IFNAME, []byte(name)) + + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkDelAltName delete an alternative name for the link device. +// Equivalent to: `ip link property del $link altname $name` +func LinkDelAltName(link Link, name string) error { + return pkgHandle.LinkDelAltName(link, name) +} + +// LinkDelAltName delete an alternative name for the link device. +// Equivalent to: `ip link property del $link altname $name` +func (h *Handle) LinkDelAltName(link Link, name string) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_DELLINKPROP, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_PROP_LIST|unix.NLA_F_NESTED, nil) + data.AddRtAttr(unix.IFLA_ALT_IFNAME, []byte(name)) + + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + // LinkSetHardwareAddr sets the hardware address of the link device. // Equivalent to: `ip link set $link address $hwaddr` func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { @@ -602,6 +664,43 @@ func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { return err } +// LinkSetVfVlanQosProto sets the vlan, qos and protocol of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos proto $proto` +func LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + return pkgHandle.LinkSetVfVlanQosProto(link, vf, vlan, qos, proto) +} + +// LinkSetVfVlanQosProto sets the vlan, qos and protocol of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos proto $proto` +func (h *Handle) LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + vfInfo := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfVlanList := vfInfo.AddRtAttr(nl.IFLA_VF_VLAN_LIST, nil) + + vfmsg := nl.VfVlanInfo{ + VfVlan: nl.VfVlan{ + Vf: uint32(vf), + Vlan: uint32(vlan), + Qos: uint32(qos), + }, + VlanProto: (uint16(proto)>>8)&0xFF | (uint16(proto)&0xFF)<<8, + } + + vfVlanList.AddRtAttr(nl.IFLA_VF_VLAN_INFO, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + // LinkSetVfTxRate sets the tx rate of a vf for the link. // Equivalent to: `ip link set $link vf $vf rate $rate` func LinkSetVfTxRate(link Link, vf, rate int) error { @@ -946,6 +1045,141 @@ func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { return err } +// LinkSetGSOMaxSegs sets the GSO maximum segment count of the link device. +// Equivalent to: `ip link set $link gso_max_segs $maxSegs` +func LinkSetGSOMaxSegs(link Link, maxSegs int) error { + return pkgHandle.LinkSetGSOMaxSegs(link, maxSegs) +} + +// LinkSetGSOMaxSegs sets the GSO maximum segment count of the link device. +// Equivalent to: `ip link set $link gso_max_segs $maxSegs` +func (h *Handle) LinkSetGSOMaxSegs(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_MAX_SEGS, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGSOMaxSize sets the IPv6 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_max_size $maxSize` +func LinkSetGSOMaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGSOMaxSize(link, maxSize) +} + +// LinkSetGSOMaxSize sets the IPv6 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_max_size $maxSize` +func (h *Handle) LinkSetGSOMaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGROMaxSize sets the IPv6 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_max_size $maxSize` +func LinkSetGROMaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGROMaxSize(link, maxSize) +} + +// LinkSetGROMaxSize sets the IPv6 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_max_size $maxSize` +func (h *Handle) LinkSetGROMaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GRO_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGSOIPv4MaxSize sets the IPv4 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_ipv4_max_size $maxSize` +func LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGSOIPv4MaxSize(link, maxSize) +} + +// LinkSetGSOIPv4MaxSize sets the IPv4 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_ipv4_max_size $maxSize` +func (h *Handle) LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_IPV4_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGROIPv4MaxSize sets the IPv4 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_ipv4_max_size $maxSize` +func LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGROIPv4MaxSize(link, maxSize) +} + +// LinkSetGROIPv4MaxSize sets the IPv4 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_ipv4_max_size $maxSize` +func (h *Handle) LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GRO_IPV4_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func boolAttr(val bool) []byte { var v uint8 if val { @@ -1401,6 +1635,21 @@ func (h *Handle) linkModify(link Link, flags int) error { req.AddData(gsoAttr) } + if base.GROMaxSize > 0 { + groAttr := nl.NewRtAttr(unix.IFLA_GRO_MAX_SIZE, nl.Uint32Attr(base.GROMaxSize)) + req.AddData(groAttr) + } + + if base.GSOIPv4MaxSize > 0 { + gsoAttr := nl.NewRtAttr(unix.IFLA_GSO_IPV4_MAX_SIZE, nl.Uint32Attr(base.GSOIPv4MaxSize)) + req.AddData(gsoAttr) + } + + if base.GROIPv4MaxSize > 0 { + groAttr := nl.NewRtAttr(unix.IFLA_GRO_IPV4_MAX_SIZE, nl.Uint32Attr(base.GROIPv4MaxSize)) + req.AddData(groAttr) + } + if base.Group > 0 { groupAttr := nl.NewRtAttr(unix.IFLA_GROUP, nl.Uint32Attr(base.Group)) req.AddData(groupAttr) @@ -1437,6 +1686,10 @@ func (h *Handle) linkModify(link Link, flags int) error { if link.VlanProtocol != VLAN_PROTOCOL_UNKNOWN { data.AddRtAttr(nl.IFLA_VLAN_PROTOCOL, htons(uint16(link.VlanProtocol))) } + case *Netkit: + if err := addNetkitAttrs(link, linkInfo, flags); err != nil { + return err + } case *Veth: data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) peer := data.AddRtAttr(nl.VETH_INFO_PEER, nil) @@ -1480,15 +1733,9 @@ func (h *Handle) linkModify(link Link, flags int) error { data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag))) case *Macvlan: - if link.Mode != MACVLAN_MODE_DEFAULT { - data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) - data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) - } + addMacvlanAttrs(link, linkInfo) case *Macvtap: - if link.Mode != MACVLAN_MODE_DEFAULT { - data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) - data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) - } + addMacvtapAttrs(link, linkInfo) case *Geneve: addGeneveAttrs(link, linkInfo) case *Gretap: @@ -1569,6 +1816,13 @@ func (h *Handle) linkByNameDump(name string) (Link, error) { if link.Attrs().Name == name { return link, nil } + + // support finding interfaces also via altnames + for _, altName := range link.Attrs().AltNames { + if altName == name { + return link, nil + } + } } return nil, LinkNotFoundError{fmt.Errorf("Link %s not found", name)} } @@ -1607,6 +1861,9 @@ func (h *Handle) LinkByName(name string) (Link, error) { req.AddData(attr) nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(name)) + if len(name) > 15 { + nameData = nl.NewRtAttr(unix.IFLA_ALT_IFNAME, nl.ZeroTerminated(name)) + } req.AddData(nameData) link, err := execGetLink(req) @@ -1712,9 +1969,6 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { base.Flags = linkFlags(msg.Flags) base.EncapType = msg.EncapType() base.NetNsID = -1 - if msg.Flags&unix.IFF_PROMISC != 0 { - base.Promisc = 1 - } if msg.Flags&unix.IFF_ALLMULTI != 0 { base.Allmulti = 1 } @@ -1750,6 +2004,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { link = &Bridge{} case "vlan": link = &Vlan{} + case "netkit": + link = &Netkit{} case "veth": link = &Veth{} case "wireguard": @@ -1807,6 +2063,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { return nil, err } switch linkType { + case "netkit": + parseNetkitData(link, data) case "vlan": parseVlanData(link, data) case "vxlan": @@ -1897,6 +2155,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { base.Name = string(attr.Value[:len(attr.Value)-1]) case unix.IFLA_MTU: base.MTU = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_PROMISCUITY: + base.Promisc = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_LINK: base.ParentIndex = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_MASTER: @@ -1931,16 +2191,38 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { protinfo := parseProtinfo(attrs) base.Protinfo = &protinfo } + case unix.IFLA_PROP_LIST | unix.NLA_F_NESTED: + attrs, err := nl.ParseRouteAttr(attr.Value[:]) + if err != nil { + return nil, err + } + + base.AltNames = []string{} + for _, attr := range attrs { + if attr.Attr.Type == unix.IFLA_ALT_IFNAME { + base.AltNames = append(base.AltNames, nl.BytesToString(attr.Value)) + } + } case unix.IFLA_OPERSTATE: base.OperState = LinkOperState(uint8(attr.Value[0])) case unix.IFLA_PHYS_SWITCH_ID: base.PhysSwitchID = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_LINK_NETNSID: base.NetNsID = int(native.Uint32(attr.Value[0:4])) - case unix.IFLA_GSO_MAX_SIZE: - base.GSOMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_TSO_MAX_SEGS: + base.TSOMaxSegs = native.Uint32(attr.Value[0:4]) + case unix.IFLA_TSO_MAX_SIZE: + base.TSOMaxSize = native.Uint32(attr.Value[0:4]) case unix.IFLA_GSO_MAX_SEGS: base.GSOMaxSegs = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GSO_MAX_SIZE: + base.GSOMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GRO_MAX_SIZE: + base.GROMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GSO_IPV4_MAX_SIZE: + base.GSOIPv4MaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GRO_IPV4_MAX_SIZE: + base.GROIPv4MaxSize = native.Uint32(attr.Value[0:4]) case unix.IFLA_VFINFO_LIST: data, err := nl.ParseRouteAttr(attr.Value) if err != nil { @@ -1957,6 +2239,13 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { base.NumRxQueues = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_GROUP: base.Group = native.Uint32(attr.Value[0:4]) + case unix.IFLA_PERM_ADDRESS: + for _, b := range attr.Value { + if b != 0 { + base.PermHWAddr = attr.Value[:] + break + } + } } } @@ -2069,21 +2358,24 @@ type LinkUpdate struct { // LinkSubscribe takes a chan down which notifications will be sent // when links change. Close the 'done' chan to stop subscription. func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) + return linkSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // LinkSubscribeAt works like LinkSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func LinkSubscribeAt(ns netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribeAt(ns, netns.None(), ch, done, nil, false) + return linkSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // LinkSubscribeOptions contains a set of options to use with // LinkSubscribeWithOptions. type LinkSubscribeOptions struct { - Namespace *netns.NsHandle - ErrorCallback func(error) - ListExisting bool + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // LinkSubscribeWithOptions work like LinkSubscribe but enable to @@ -2094,14 +2386,27 @@ func LinkSubscribeWithOptions(ch chan<- LinkUpdate, done <-chan struct{}, option none := netns.None() options.Namespace = &none } - return linkSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) + return linkSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { +func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_LINK) if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done @@ -2182,6 +2487,16 @@ func (h *Handle) LinkSetGuard(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_GUARD) } +// LinkSetBRSlaveGroupFwdMask set the group_fwd_mask of a bridge slave interface +func LinkSetBRSlaveGroupFwdMask(link Link, mask uint16) error { + return pkgHandle.LinkSetBRSlaveGroupFwdMask(link, mask) +} + +// LinkSetBRSlaveGroupFwdMask set the group_fwd_mask of a bridge slave interface +func (h *Handle) LinkSetBRSlaveGroupFwdMask(link Link, mask uint16) error { + return h.setProtinfoAttrRawVal(link, nl.Uint16Attr(mask), nl.IFLA_BRPORT_GROUP_FWD_MASK) +} + func LinkSetFastLeave(link Link, mode bool) error { return pkgHandle.LinkSetFastLeave(link, mode) } @@ -2214,6 +2529,14 @@ func (h *Handle) LinkSetFlood(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_UNICAST_FLOOD) } +func LinkSetIsolated(link Link, mode bool) error { + return pkgHandle.LinkSetIsolated(link, mode) +} + +func (h *Handle) LinkSetIsolated(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_ISOLATED) +} + func LinkSetBrProxyArp(link Link, mode bool) error { return pkgHandle.LinkSetBrProxyArp(link, mode) } @@ -2230,7 +2553,15 @@ func (h *Handle) LinkSetBrProxyArpWiFi(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROXYARP_WIFI) } -func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { +func LinkSetBrNeighSuppress(link Link, mode bool) error { + return pkgHandle.LinkSetBrNeighSuppress(link, mode) +} + +func (h *Handle) LinkSetBrNeighSuppress(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_NEIGH_SUPPRESS) +} + +func (h *Handle) setProtinfoAttrRawVal(link Link, val []byte, attr int) error { base := link.Attrs() h.ensureIndex(base) req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) @@ -2240,7 +2571,7 @@ func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { req.AddData(msg) br := nl.NewRtAttr(unix.IFLA_PROTINFO|unix.NLA_F_NESTED, nil) - br.AddRtAttr(attr, boolToByte(mode)) + br.AddRtAttr(attr, val) req.AddData(br) _, err := req.Execute(unix.NETLINK_ROUTE, 0) if err != nil { @@ -2248,6 +2579,9 @@ func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { } return nil } +func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { + return h.setProtinfoAttrRawVal(link, boolToByte(mode), attr) +} // LinkSetTxQLen sets the transaction queue length for the link. // Equivalent to: `ip link set $link txqlen $qlen` @@ -2305,6 +2639,80 @@ func (h *Handle) LinkSetGroup(link Link, group int) error { return err } +func addNetkitAttrs(nk *Netkit, linkInfo *nl.RtAttr, flag int) error { + if nk.peerLinkAttrs.HardwareAddr != nil || nk.HardwareAddr != nil { + return fmt.Errorf("netkit doesn't support setting Ethernet") + } + + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + // Kernel will return error if trying to change the mode of an existing netkit device + data.AddRtAttr(nl.IFLA_NETKIT_MODE, nl.Uint32Attr(uint32(nk.Mode))) + data.AddRtAttr(nl.IFLA_NETKIT_POLICY, nl.Uint32Attr(uint32(nk.Policy))) + data.AddRtAttr(nl.IFLA_NETKIT_PEER_POLICY, nl.Uint32Attr(uint32(nk.PeerPolicy))) + + if (flag & unix.NLM_F_EXCL) == 0 { + // Modifying peer link attributes will not take effect + return nil + } + + peer := data.AddRtAttr(nl.IFLA_NETKIT_PEER_INFO, nil) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + if nk.peerLinkAttrs.Flags&net.FlagUp != 0 { + msg.Change = unix.IFF_UP + msg.Flags = unix.IFF_UP + } + if nk.peerLinkAttrs.Index != 0 { + msg.Index = int32(nk.peerLinkAttrs.Index) + } + peer.AddChild(msg) + if nk.peerLinkAttrs.Name != "" { + peer.AddRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(nk.peerLinkAttrs.Name)) + } + if nk.peerLinkAttrs.MTU > 0 { + peer.AddRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(nk.peerLinkAttrs.MTU))) + } + if nk.peerLinkAttrs.GSOMaxSegs > 0 { + peer.AddRtAttr(unix.IFLA_GSO_MAX_SEGS, nl.Uint32Attr(nk.peerLinkAttrs.GSOMaxSegs)) + } + if nk.peerLinkAttrs.GSOMaxSize > 0 { + peer.AddRtAttr(unix.IFLA_GSO_MAX_SIZE, nl.Uint32Attr(nk.peerLinkAttrs.GSOMaxSize)) + } + if nk.peerLinkAttrs.GSOIPv4MaxSize > 0 { + peer.AddRtAttr(unix.IFLA_GSO_IPV4_MAX_SIZE, nl.Uint32Attr(nk.peerLinkAttrs.GSOIPv4MaxSize)) + } + if nk.peerLinkAttrs.GROIPv4MaxSize > 0 { + peer.AddRtAttr(unix.IFLA_GRO_IPV4_MAX_SIZE, nl.Uint32Attr(nk.peerLinkAttrs.GROIPv4MaxSize)) + } + if nk.peerLinkAttrs.Namespace != nil { + switch ns := nk.peerLinkAttrs.Namespace.(type) { + case NsPid: + peer.AddRtAttr(unix.IFLA_NET_NS_PID, nl.Uint32Attr(uint32(ns))) + case NsFd: + peer.AddRtAttr(unix.IFLA_NET_NS_FD, nl.Uint32Attr(uint32(ns))) + } + } + return nil +} + +func parseNetkitData(link Link, data []syscall.NetlinkRouteAttr) { + netkit := link.(*Netkit) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_NETKIT_PRIMARY: + isPrimary := datum.Value[0:1][0] + if isPrimary != 0 { + netkit.isPrimary = true + } + case nl.IFLA_NETKIT_MODE: + netkit.Mode = NetkitMode(native.Uint32(datum.Value[0:4])) + case nl.IFLA_NETKIT_POLICY: + netkit.Policy = NetkitPolicy(native.Uint32(datum.Value[0:4])) + case nl.IFLA_NETKIT_PEER_POLICY: + netkit.PeerPolicy = NetkitPolicy(native.Uint32(datum.Value[0:4])) + } + } +} + func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { vlan := link.(*Vlan) for _, datum := range data { @@ -2539,11 +2947,30 @@ func parseIPVtapData(link Link, data []syscall.NetlinkRouteAttr) { } } +func addMacvtapAttrs(macvtap *Macvtap, linkInfo *nl.RtAttr) { + addMacvlanAttrs(&macvtap.Macvlan, linkInfo) +} + func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) { macv := link.(*Macvtap) parseMacvlanData(&macv.Macvlan, data) } +func addMacvlanAttrs(macvlan *Macvlan, linkInfo *nl.RtAttr) { + var data *nl.RtAttr + + if macvlan.Mode != MACVLAN_MODE_DEFAULT || macvlan.BCQueueLen > 0 { + data = linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + } + + if macvlan.Mode != MACVLAN_MODE_DEFAULT { + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macvlan.Mode])) + } + if macvlan.BCQueueLen > 0 { + data.AddRtAttr(nl.IFLA_MACVLAN_BC_QUEUE_LEN, nl.Uint32Attr(macvlan.BCQueueLen)) + } +} + func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { macv := link.(*Macvlan) for _, datum := range data { @@ -2571,6 +2998,10 @@ func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { for _, macDatum := range macs { macv.MACAddrs = append(macv.MACAddrs, net.HardwareAddr(macDatum.Value[0:6])) } + case nl.IFLA_MACVLAN_BC_QUEUE_LEN: + macv.BCQueueLen = native.Uint32(datum.Value[0:4]) + case nl.IFLA_MACVLAN_BC_QUEUE_LEN_USED: + macv.UsedBCQueueLen = native.Uint32(datum.Value[0:4]) } } } @@ -2599,10 +3030,13 @@ func linkFlags(rawFlags uint32) net.Flags { func addGeneveAttrs(geneve *Geneve, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if geneve.InnerProtoInherit { + data.AddRtAttr(nl.IFLA_GENEVE_INNER_PROTO_INHERIT, []byte{}) + } + if geneve.FlowBased { - // In flow based mode, no other attributes need to be configured - linkInfo.AddRtAttr(nl.IFLA_GENEVE_COLLECT_METADATA, boolAttr(geneve.FlowBased)) - return + geneve.ID = 0 + data.AddRtAttr(nl.IFLA_GENEVE_COLLECT_METADATA, []byte{}) } if ip := geneve.Remote; ip != nil { @@ -2628,6 +3062,8 @@ func addGeneveAttrs(geneve *Geneve, linkInfo *nl.RtAttr) { if geneve.Tos != 0 { data.AddRtAttr(nl.IFLA_GENEVE_TOS, nl.Uint8Attr(geneve.Tos)) } + + data.AddRtAttr(nl.IFLA_GENEVE_DF, nl.Uint8Attr(uint8(geneve.Df))) } func parseGeneveData(link Link, data []syscall.NetlinkRouteAttr) { @@ -2644,6 +3080,10 @@ func parseGeneveData(link Link, data []syscall.NetlinkRouteAttr) { geneve.Ttl = uint8(datum.Value[0]) case nl.IFLA_GENEVE_TOS: geneve.Tos = uint8(datum.Value[0]) + case nl.IFLA_GENEVE_COLLECT_METADATA: + geneve.FlowBased = true + case nl.IFLA_GENEVE_INNER_PROTO_INHERIT: + geneve.InnerProtoInherit = true } } } @@ -2653,7 +3093,7 @@ func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) { if gretap.FlowBased { // In flow based mode, no other attributes need to be configured - data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased)) + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, []byte{}) return } @@ -2736,6 +3176,12 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { func addGretunAttrs(gre *Gretun, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if gre.FlowBased { + // In flow based mode, no other attributes need to be configured + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, []byte{}) + return + } + if ip := gre.Local; ip != nil { if ip.To4() != nil { ip = ip.To4() @@ -2806,6 +3252,8 @@ func parseGretunData(link Link, data []syscall.NetlinkRouteAttr) { gre.EncapSport = ntohs(datum.Value[0:2]) case nl.IFLA_GRE_ENCAP_DPORT: gre.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_COLLECT_METADATA: + gre.FlowBased = true } } } @@ -2846,14 +3294,14 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) { } func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if iptun.FlowBased { // In flow based mode, no other attributes need to be configured - linkInfo.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, boolAttr(iptun.FlowBased)) + data.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, []byte{}) return } - data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) - ip := iptun.Local.To4() if ip != nil { data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip)) @@ -2880,10 +3328,6 @@ func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) { func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { iptun := link.(*Iptun) for _, datum := range data { - // NOTE: same with vxlan, ip tunnel may also has null datum.Value - if len(datum.Value) == 0 { - continue - } switch datum.Attr.Type { case nl.IFLA_IPTUN_LOCAL: iptun.Local = net.IP(datum.Value[0:4]) @@ -2914,6 +3358,12 @@ func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { func addIp6tnlAttrs(ip6tnl *Ip6tnl, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if ip6tnl.FlowBased { + // In flow based mode, no other attributes need to be configured + data.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, []byte{}) + return + } + if ip6tnl.Link != 0 { data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(ip6tnl.Link)) } @@ -2968,6 +3418,8 @@ func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) { ip6tnl.EncapSport = ntohs(datum.Value[0:2]) case nl.IFLA_IPTUN_ENCAP_DPORT: ip6tnl.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_COLLECT_METADATA: + ip6tnl.FlowBased = true } } } @@ -3115,6 +3567,12 @@ func addBridgeAttrs(bridge *Bridge, linkInfo *nl.RtAttr) { if bridge.VlanFiltering != nil { data.AddRtAttr(nl.IFLA_BR_VLAN_FILTERING, boolToByte(*bridge.VlanFiltering)) } + if bridge.VlanDefaultPVID != nil { + data.AddRtAttr(nl.IFLA_BR_VLAN_DEFAULT_PVID, nl.Uint16Attr(*bridge.VlanDefaultPVID)) + } + if bridge.GroupFwdMask != nil { + data.AddRtAttr(nl.IFLA_BR_GROUP_FWD_MASK, nl.Uint16Attr(*bridge.GroupFwdMask)) + } } func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { @@ -3133,6 +3591,12 @@ func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_BR_VLAN_FILTERING: vlanFiltering := datum.Value[0] == 1 br.VlanFiltering = &vlanFiltering + case nl.IFLA_BR_VLAN_DEFAULT_PVID: + vlanDefaultPVID := native.Uint16(datum.Value[0:2]) + br.VlanDefaultPVID = &vlanDefaultPVID + case nl.IFLA_BR_GROUP_FWD_MASK: + mask := native.Uint16(datum.Value[0:2]) + br.GroupFwdMask = &mask } } } @@ -3174,12 +3638,17 @@ func parseVfInfoList(data []syscall.NetlinkRouteAttr) ([]VfInfo, error) { if err != nil { return nil, err } - vfs = append(vfs, parseVfInfo(vfAttrs, i)) + + vf, err := parseVfInfo(vfAttrs, i) + if err != nil { + return nil, err + } + vfs = append(vfs, vf) } return vfs, nil } -func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { +func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) (VfInfo, error) { vf := VfInfo{ID: id} for _, element := range data { switch element.Attr.Type { @@ -3190,6 +3659,12 @@ func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { vl := nl.DeserializeVfVlan(element.Value[:]) vf.Vlan = int(vl.Vlan) vf.Qos = int(vl.Qos) + case nl.IFLA_VF_VLAN_LIST: + vfVlanInfoList, err := nl.DeserializeVfVlanList(element.Value[:]) + if err != nil { + return vf, err + } + vf.VlanProto = int(vfVlanInfoList[0].VlanProto) case nl.IFLA_VF_TX_RATE: txr := nl.DeserializeVfTxRate(element.Value[:]) vf.TxRate = int(txr.Rate) @@ -3223,7 +3698,7 @@ func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { vf.Trust = result.Setting } } - return vf + return vf, nil } func addXfrmiAttrs(xfrmi *Xfrmi, linkInfo *nl.RtAttr) { @@ -3246,8 +3721,7 @@ func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) { } } -// LinkSetBondSlave add slave to bond link via ioctl interface. -func LinkSetBondSlave(link Link, master *Bond) error { +func ioctlBondSlave(cmd uintptr, link Link, master *Bond) error { fd, err := getSocketUDP() if err != nil { return err @@ -3255,10 +3729,38 @@ func LinkSetBondSlave(link Link, master *Bond) error { defer syscall.Close(fd) ifreq := newIocltSlaveReq(link.Attrs().Name, master.Attrs().Name) - - _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), unix.SIOCBONDENSLAVE, uintptr(unsafe.Pointer(ifreq))) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), cmd, uintptr(unsafe.Pointer(ifreq))) if errno != 0 { - return fmt.Errorf("Failed to enslave %q to %q, errno=%v", link.Attrs().Name, master.Attrs().Name, errno) + return fmt.Errorf("errno=%v", errno) + } + return nil +} + +// LinkSetBondSlaveActive sets specified slave to ACTIVE in an `active-backup` bond link via ioctl interface. +// +// Multiple calls keeps the status unchanged(shown in the unit test). +func LinkSetBondSlaveActive(link Link, master *Bond) error { + err := ioctlBondSlave(unix.SIOCBONDCHANGEACTIVE, link, master) + if err != nil { + return fmt.Errorf("Failed to set slave %q active in %q, %v", link.Attrs().Name, master.Attrs().Name, err) + } + return nil +} + +// LinkSetBondSlave add slave to bond link via ioctl interface. +func LinkSetBondSlave(link Link, master *Bond) error { + err := ioctlBondSlave(unix.SIOCBONDENSLAVE, link, master) + if err != nil { + return fmt.Errorf("Failed to enslave %q to %q, %v", link.Attrs().Name, master.Attrs().Name, err) + } + return nil +} + +// LinkSetBondSlave removes specified slave from bond link via ioctl interface. +func LinkDelBondSlave(link Link, master *Bond) error { + err := ioctlBondSlave(unix.SIOCBONDRELEASE, link, master) + if err != nil { + return fmt.Errorf("Failed to del slave %q from %q, %v", link.Attrs().Name, master.Attrs().Name, err) } return nil } diff --git a/vendor/github.com/vishvananda/netlink/neigh_linux.go b/vendor/github.com/vishvananda/netlink/neigh_linux.go index 4c1e7663..2d93044a 100644 --- a/vendor/github.com/vishvananda/netlink/neigh_linux.go +++ b/vendor/github.com/vishvananda/netlink/neigh_linux.go @@ -339,13 +339,13 @@ func NeighDeserialize(m []byte) (*Neigh, error) { // NeighSubscribe takes a chan down which notifications will be sent // when neighbors are added or deleted. Close the 'done' chan to stop subscription. func NeighSubscribe(ch chan<- NeighUpdate, done <-chan struct{}) error { - return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) + return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // NeighSubscribeAt works like NeighSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func NeighSubscribeAt(ns netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}) error { - return neighSubscribeAt(ns, netns.None(), ch, done, nil, false) + return neighSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // NeighSubscribeOptions contains a set of options to use with @@ -354,6 +354,11 @@ type NeighSubscribeOptions struct { Namespace *netns.NsHandle ErrorCallback func(error) ListExisting bool + + // max size is based on value of /proc/sys/net/core/rmem_max + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // NeighSubscribeWithOptions work like NeighSubscribe but enable to @@ -364,16 +369,17 @@ func NeighSubscribeWithOptions(ch chan<- NeighUpdate, done <-chan struct{}, opti none := netns.None() options.Namespace = &none } - return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) + return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { +func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) makeRequest := func(family int) error { - req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, - unix.NLM_F_DUMP) - infmsg := nl.NewIfInfomsg(family) - req.AddData(infmsg) + req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP) + ndmsg := &Ndmsg{Family: uint8(family)} + req.AddData(ndmsg) if err := s.Send(req); err != nil { return err } @@ -382,6 +388,17 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done < if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done @@ -427,12 +444,12 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done < continue } if m.Header.Type == unix.NLMSG_ERROR { - error := int32(native.Uint32(m.Data[0:4])) - if error == 0 { + nError := int32(native.Uint32(m.Data[0:4])) + if nError == 0 { continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(syscall.Errno(-nError)) } return } diff --git a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go index 98d2c0db..da12c42a 100644 --- a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go @@ -52,6 +52,10 @@ func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { return ErrNotImplemented } +func LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + return ErrNotImplemented +} + func LinkSetVfTxRate(link Link, vf, rate int) error { return ErrNotImplemented } @@ -124,6 +128,22 @@ func LinkSetTxQLen(link Link, qlen int) error { return ErrNotImplemented } +func LinkSetGSOMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGROMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + func LinkAdd(link Link) error { return ErrNotImplemented } @@ -184,6 +204,10 @@ func RouteAppend(route *Route) error { return ErrNotImplemented } +func RouteChange(route *Route) error { + return ErrNotImplemented +} + func RouteDel(route *Route) error { return ErrNotImplemented } @@ -216,6 +240,10 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) { return nil, ErrNotImplemented } +func XfrmPolicyGet(policy *XfrmPolicy) (*XfrmPolicy, error) { + return nil, ErrNotImplemented +} + func XfrmStateAdd(policy *XfrmState) error { return ErrNotImplemented } @@ -255,3 +283,7 @@ func NeighDeserialize(m []byte) (*Neigh, error) { func SocketGet(local, remote net.Addr) (*Socket, error) { return nil, ErrNotImplemented } + +func SocketDestroy(local, remote net.Addr) (*Socket, error) { + return nil, ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go index 18360180..6989d1ed 100644 --- a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go @@ -15,6 +15,38 @@ var L4ProtoMap = map[uint8]string{ 17: "udp", } +// From https://git.netfilter.org/libnetfilter_conntrack/tree/include/libnetfilter_conntrack/libnetfilter_conntrack_tcp.h +// enum tcp_state { +// TCP_CONNTRACK_NONE, +// TCP_CONNTRACK_SYN_SENT, +// TCP_CONNTRACK_SYN_RECV, +// TCP_CONNTRACK_ESTABLISHED, +// TCP_CONNTRACK_FIN_WAIT, +// TCP_CONNTRACK_CLOSE_WAIT, +// TCP_CONNTRACK_LAST_ACK, +// TCP_CONNTRACK_TIME_WAIT, +// TCP_CONNTRACK_CLOSE, +// TCP_CONNTRACK_LISTEN, /* obsolete */ +// #define TCP_CONNTRACK_SYN_SENT2 TCP_CONNTRACK_LISTEN +// TCP_CONNTRACK_MAX, +// TCP_CONNTRACK_IGNORE +// }; +const ( + TCP_CONNTRACK_NONE = 0 + TCP_CONNTRACK_SYN_SENT = 1 + TCP_CONNTRACK_SYN_RECV = 2 + TCP_CONNTRACK_ESTABLISHED = 3 + TCP_CONNTRACK_FIN_WAIT = 4 + TCP_CONNTRACK_CLOSE_WAIT = 5 + TCP_CONNTRACK_LAST_ACK = 6 + TCP_CONNTRACK_TIME_WAIT = 7 + TCP_CONNTRACK_CLOSE = 8 + TCP_CONNTRACK_LISTEN = 9 + TCP_CONNTRACK_SYN_SENT2 = 9 + TCP_CONNTRACK_MAX = 10 + TCP_CONNTRACK_IGNORE = 11 +) + // All the following constants are coming from: // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -31,6 +63,7 @@ var L4ProtoMap = map[uint8]string{ // IPCTNL_MSG_MAX // }; const ( + IPCTNL_MSG_CT_NEW = 0 IPCTNL_MSG_CT_GET = 1 IPCTNL_MSG_CT_DELETE = 2 ) @@ -88,7 +121,10 @@ const ( CTA_COUNTERS_REPLY = 10 CTA_USE = 11 CTA_ID = 12 + CTA_ZONE = 18 CTA_TIMESTAMP = 20 + CTA_LABELS = 22 + CTA_LABELS_MASK = 23 ) // enum ctattr_tuple { @@ -149,7 +185,10 @@ const ( // }; // #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) const ( + CTA_PROTOINFO_UNSPEC = 0 CTA_PROTOINFO_TCP = 1 + CTA_PROTOINFO_DCCP = 2 + CTA_PROTOINFO_SCTP = 3 ) // enum ctattr_protoinfo_tcp { diff --git a/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go index 2995da49..956367b2 100644 --- a/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go @@ -9,39 +9,56 @@ const ( ) const ( - DEVLINK_CMD_GET = 1 - DEVLINK_CMD_PORT_GET = 5 - DEVLINK_CMD_PORT_SET = 6 - DEVLINK_CMD_PORT_NEW = 7 - DEVLINK_CMD_PORT_DEL = 8 - DEVLINK_CMD_ESWITCH_GET = 29 - DEVLINK_CMD_ESWITCH_SET = 30 - DEVLINK_CMD_INFO_GET = 51 + DEVLINK_CMD_GET = 1 + DEVLINK_CMD_PORT_GET = 5 + DEVLINK_CMD_PORT_SET = 6 + DEVLINK_CMD_PORT_NEW = 7 + DEVLINK_CMD_PORT_DEL = 8 + DEVLINK_CMD_ESWITCH_GET = 29 + DEVLINK_CMD_ESWITCH_SET = 30 + DEVLINK_CMD_RESOURCE_DUMP = 36 + DEVLINK_CMD_PARAM_GET = 38 + DEVLINK_CMD_PARAM_SET = 39 + DEVLINK_CMD_INFO_GET = 51 ) const ( - DEVLINK_ATTR_BUS_NAME = 1 - DEVLINK_ATTR_DEV_NAME = 2 - DEVLINK_ATTR_PORT_INDEX = 3 - DEVLINK_ATTR_PORT_TYPE = 4 - DEVLINK_ATTR_PORT_NETDEV_IFINDEX = 6 - DEVLINK_ATTR_PORT_NETDEV_NAME = 7 - DEVLINK_ATTR_PORT_IBDEV_NAME = 8 - DEVLINK_ATTR_ESWITCH_MODE = 25 - DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26 - DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62 - DEVLINK_ATTR_PORT_FLAVOUR = 77 - DEVLINK_ATTR_INFO_DRIVER_NAME = 98 - DEVLINK_ATTR_INFO_SERIAL_NUMBER = 99 - DEVLINK_ATTR_INFO_VERSION_FIXED = 100 - DEVLINK_ATTR_INFO_VERSION_RUNNING = 101 - DEVLINK_ATTR_INFO_VERSION_STORED = 102 - DEVLINK_ATTR_INFO_VERSION_NAME = 103 - DEVLINK_ATTR_INFO_VERSION_VALUE = 104 - DEVLINK_ATTR_PORT_PCI_PF_NUMBER = 127 - DEVLINK_ATTR_PORT_FUNCTION = 145 - DEVLINK_ATTR_PORT_CONTROLLER_NUMBER = 150 - DEVLINK_ATTR_PORT_PCI_SF_NUMBER = 164 + DEVLINK_ATTR_BUS_NAME = 1 + DEVLINK_ATTR_DEV_NAME = 2 + DEVLINK_ATTR_PORT_INDEX = 3 + DEVLINK_ATTR_PORT_TYPE = 4 + DEVLINK_ATTR_PORT_NETDEV_IFINDEX = 6 + DEVLINK_ATTR_PORT_NETDEV_NAME = 7 + DEVLINK_ATTR_PORT_IBDEV_NAME = 8 + DEVLINK_ATTR_ESWITCH_MODE = 25 + DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26 + DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62 + DEVLINK_ATTR_RESOURCE_LIST = 63 /* nested */ + DEVLINK_ATTR_RESOURCE = 64 /* nested */ + DEVLINK_ATTR_RESOURCE_NAME = 65 /* string */ + DEVLINK_ATTR_RESOURCE_ID = 66 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE = 67 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_NEW = 68 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_VALID = 69 /* u8 */ + DEVLINK_ATTR_RESOURCE_SIZE_MIN = 70 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_MAX = 71 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_GRAN = 72 /* u64 */ + DEVLINK_ATTR_RESOURCE_UNIT = 73 /* u8 */ + DEVLINK_ATTR_RESOURCE_OCC = 74 /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID = 75 /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS = 76 /* u64 */ + DEVLINK_ATTR_PORT_FLAVOUR = 77 + DEVLINK_ATTR_INFO_DRIVER_NAME = 98 + DEVLINK_ATTR_INFO_SERIAL_NUMBER = 99 + DEVLINK_ATTR_INFO_VERSION_FIXED = 100 + DEVLINK_ATTR_INFO_VERSION_RUNNING = 101 + DEVLINK_ATTR_INFO_VERSION_STORED = 102 + DEVLINK_ATTR_INFO_VERSION_NAME = 103 + DEVLINK_ATTR_INFO_VERSION_VALUE = 104 + DEVLINK_ATTR_PORT_PCI_PF_NUMBER = 127 + DEVLINK_ATTR_PORT_FUNCTION = 145 + DEVLINK_ATTR_PORT_CONTROLLER_NUMBER = 150 + DEVLINK_ATTR_PORT_PCI_SF_NUMBER = 164 ) const ( @@ -94,3 +111,32 @@ const ( DEVLINK_PORT_FN_OPSTATE_DETACHED = 0 DEVLINK_PORT_FN_OPSTATE_ATTACHED = 1 ) + +const ( + DEVLINK_RESOURCE_UNIT_ENTRY uint8 = 0 +) + +const ( + DEVLINK_ATTR_PARAM = iota + 80 /* nested */ + DEVLINK_ATTR_PARAM_NAME /* string */ + DEVLINK_ATTR_PARAM_GENERIC /* flag */ + DEVLINK_ATTR_PARAM_TYPE /* u8 */ + DEVLINK_ATTR_PARAM_VALUES_LIST /* nested */ + DEVLINK_ATTR_PARAM_VALUE /* nested */ + DEVLINK_ATTR_PARAM_VALUE_DATA /* dynamic */ + DEVLINK_ATTR_PARAM_VALUE_CMODE /* u8 */ +) + +const ( + DEVLINK_PARAM_TYPE_U8 = 1 + DEVLINK_PARAM_TYPE_U16 = 2 + DEVLINK_PARAM_TYPE_U32 = 3 + DEVLINK_PARAM_TYPE_STRING = 5 + DEVLINK_PARAM_TYPE_BOOL = 6 +) + +const ( + DEVLINK_PARAM_CMODE_RUNTIME = iota + DEVLINK_PARAM_CMODE_DRIVERINIT + DEVLINK_PARAM_CMODE_PERMANENT +) diff --git a/vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go b/vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go new file mode 100644 index 00000000..d5dd69e0 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go @@ -0,0 +1,21 @@ +package nl + +// id's of route attribute from https://elixir.bootlin.com/linux/v5.17.3/source/include/uapi/linux/lwtunnel.h#L38 +// the value's size are specified in https://elixir.bootlin.com/linux/v5.17.3/source/net/ipv4/ip_tunnel_core.c#L928 + +const ( + LWTUNNEL_IP6_UNSPEC = iota + LWTUNNEL_IP6_ID + LWTUNNEL_IP6_DST + LWTUNNEL_IP6_SRC + LWTUNNEL_IP6_HOPLIMIT + LWTUNNEL_IP6_TC + LWTUNNEL_IP6_FLAGS + LWTUNNEL_IP6_PAD // not implemented + LWTUNNEL_IP6_OPTS // not implemented + __LWTUNNEL_IP6_MAX +) + + + + diff --git a/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go index a60b4b09..89dd009d 100644 --- a/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go @@ -88,6 +88,11 @@ const ( SET_ATTR_CREATE_MAX ) +const ( + IPSET_ATTR_IPADDR_IPV4 = 1 + IPSET_ATTR_IPADDR_IPV6 = 2 +) + /* ADT specific attributes */ const ( IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + iota + 1 diff --git a/vendor/github.com/vishvananda/netlink/nl/link_linux.go b/vendor/github.com/vishvananda/netlink/nl/link_linux.go index e10edbc0..0b5be470 100644 --- a/vendor/github.com/vishvananda/netlink/nl/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/link_linux.go @@ -3,6 +3,7 @@ package nl import ( "bytes" "encoding/binary" + "fmt" "unsafe" ) @@ -30,6 +31,16 @@ const ( IFLA_VLAN_MAX = IFLA_VLAN_PROTOCOL ) +const ( + IFLA_NETKIT_UNSPEC = iota + IFLA_NETKIT_PEER_INFO + IFLA_NETKIT_PRIMARY + IFLA_NETKIT_POLICY + IFLA_NETKIT_PEER_POLICY + IFLA_NETKIT_MODE + IFLA_NETKIT_MAX = IFLA_NETKIT_MODE +) + const ( VETH_INFO_UNSPEC = iota VETH_INFO_PEER @@ -85,7 +96,37 @@ const ( IFLA_BRPORT_PROXYARP IFLA_BRPORT_LEARNING_SYNC IFLA_BRPORT_PROXYARP_WIFI - IFLA_BRPORT_MAX = IFLA_BRPORT_PROXYARP_WIFI + IFLA_BRPORT_ROOT_ID + IFLA_BRPORT_BRIDGE_ID + IFLA_BRPORT_DESIGNATED_PORT + IFLA_BRPORT_DESIGNATED_COST + IFLA_BRPORT_ID + IFLA_BRPORT_NO + IFLA_BRPORT_TOPOLOGY_CHANGE_ACK + IFLA_BRPORT_CONFIG_PENDING + IFLA_BRPORT_MESSAGE_AGE_TIMER + IFLA_BRPORT_FORWARD_DELAY_TIMER + IFLA_BRPORT_HOLD_TIMER + IFLA_BRPORT_FLUSH + IFLA_BRPORT_MULTICAST_ROUTER + IFLA_BRPORT_PAD + IFLA_BRPORT_MCAST_FLOOD + IFLA_BRPORT_MCAST_TO_UCAST + IFLA_BRPORT_VLAN_TUNNEL + IFLA_BRPORT_BCAST_FLOOD + IFLA_BRPORT_GROUP_FWD_MASK + IFLA_BRPORT_NEIGH_SUPPRESS + IFLA_BRPORT_ISOLATED + IFLA_BRPORT_BACKUP_PORT + IFLA_BRPORT_MRP_RING_OPEN + IFLA_BRPORT_MRP_IN_OPEN + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT + IFLA_BRPORT_LOCKED + IFLA_BRPORT_MAB + IFLA_BRPORT_MCAST_N_GROUPS + IFLA_BRPORT_MCAST_MAX_GROUPS + IFLA_BRPORT_MAX = IFLA_BRPORT_MCAST_MAX_GROUPS ) const ( @@ -103,7 +144,9 @@ const ( IFLA_MACVLAN_MACADDR IFLA_MACVLAN_MACADDR_DATA IFLA_MACVLAN_MACADDR_COUNT - IFLA_MACVLAN_MAX = IFLA_MACVLAN_FLAGS + IFLA_MACVLAN_BC_QUEUE_LEN + IFLA_MACVLAN_BC_QUEUE_LEN_USED + IFLA_MACVLAN_MAX = IFLA_MACVLAN_BC_QUEUE_LEN_USED ) const ( @@ -186,7 +229,10 @@ const ( IFLA_GENEVE_UDP_ZERO_CSUM6_TX IFLA_GENEVE_UDP_ZERO_CSUM6_RX IFLA_GENEVE_LABEL - IFLA_GENEVE_MAX = IFLA_GENEVE_LABEL + IFLA_GENEVE_TTL_INHERIT + IFLA_GENEVE_DF + IFLA_GENEVE_INNER_PROTO_INHERIT + IFLA_GENEVE_MAX = IFLA_GENEVE_INNER_PROTO_INHERIT ) const ( @@ -244,7 +290,15 @@ const ( IFLA_VF_TRUST /* Trust state of VF */ IFLA_VF_IB_NODE_GUID /* VF Infiniband node GUID */ IFLA_VF_IB_PORT_GUID /* VF Infiniband port GUID */ - IFLA_VF_MAX = IFLA_VF_IB_PORT_GUID + IFLA_VF_VLAN_LIST /* nested list of vlans, option for QinQ */ + + IFLA_VF_MAX = IFLA_VF_IB_PORT_GUID +) + +const ( + IFLA_VF_VLAN_INFO_UNSPEC = iota + IFLA_VF_VLAN_INFO /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX ) const ( @@ -269,6 +323,7 @@ const ( const ( SizeofVfMac = 0x24 SizeofVfVlan = 0x0c + SizeofVfVlanInfo = 0x10 SizeofVfTxRate = 0x08 SizeofVfRate = 0x0c SizeofVfSpoofchk = 0x08 @@ -324,6 +379,49 @@ func (msg *VfVlan) Serialize() []byte { return (*(*[SizeofVfVlan]byte)(unsafe.Pointer(msg)))[:] } +func DeserializeVfVlanList(b []byte) ([]*VfVlanInfo, error) { + var vfVlanInfoList []*VfVlanInfo + attrs, err := ParseRouteAttr(b) + if err != nil { + return nil, err + } + + for _, element := range attrs { + if element.Attr.Type == IFLA_VF_VLAN_INFO { + vfVlanInfoList = append(vfVlanInfoList, DeserializeVfVlanInfo(element.Value)) + } + } + + if len(vfVlanInfoList) == 0 { + return nil, fmt.Errorf("VF vlan list is defined but no vf vlan info elements were found") + } + + return vfVlanInfoList, nil +} + +// struct ifla_vf_vlan_info { +// __u32 vf; +// __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ +// __u32 qos; +// __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +// }; + +type VfVlanInfo struct { + VfVlan + VlanProto uint16 +} + +func DeserializeVfVlanInfo(b []byte) *VfVlanInfo { + return &VfVlanInfo{ + *(*VfVlan)(unsafe.Pointer(&b[0:SizeofVfVlan][0])), + binary.BigEndian.Uint16(b[SizeofVfVlan:SizeofVfVlanInfo]), + } +} + +func (msg *VfVlanInfo) Serialize() []byte { + return (*(*[SizeofVfVlanInfo]byte)(unsafe.Pointer(msg)))[:] +} + // struct ifla_vf_tx_rate { // __u32 vf; // __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ diff --git a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go index 600b942b..f4efae39 100644 --- a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go @@ -330,6 +330,19 @@ func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { return msg } +type Uint32Bitfield struct { + Value uint32 + Selector uint32 +} + +func (a *Uint32Bitfield) Serialize() []byte { + return (*(*[SizeofUint32Bitfield]byte)(unsafe.Pointer(a)))[:] +} + +func DeserializeUint32Bitfield(data []byte) *Uint32Bitfield { + return (*Uint32Bitfield)(unsafe.Pointer(&data[0:SizeofUint32Bitfield][0])) +} + type Uint32Attribute struct { Type uint16 Value uint32 @@ -475,10 +488,30 @@ func (req *NetlinkRequest) AddRawData(data []byte) { req.RawData = append(req.RawData, data...) } -// Execute the request against a the given sockType. +// Execute the request against the given sockType. // Returns a list of netlink messages in serialized format, optionally filtered // by resType. func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) { + var res [][]byte + err := req.ExecuteIter(sockType, resType, func(msg []byte) bool { + res = append(res, msg) + return true + }) + if err != nil { + return nil, err + } + return res, nil +} + +// ExecuteIter executes the request against the given sockType. +// Calls the provided callback func once for each netlink message. +// If the callback returns false, it is not called again, but +// the remaining messages are consumed/discarded. +// +// Thread safety: ExecuteIter holds a lock on the socket until +// it finishes iteration so the callback must not call back into +// the netlink API. +func (req *NetlinkRequest) ExecuteIter(sockType int, resType uint16, f func(msg []byte) bool) error { var ( s *NetlinkSocket err error @@ -495,18 +528,18 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro if s == nil { s, err = getNetlinkSocket(sockType) if err != nil { - return nil, err + return err } if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil { - return nil, err + return err } if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil { - return nil, err + return err } if EnableErrorMessageReporting { if err := s.SetExtAck(true); err != nil { - return nil, err + return err } } @@ -517,36 +550,44 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro } if err := s.Send(req); err != nil { - return nil, err + return err } pid, err := s.GetPid() if err != nil { - return nil, err + return err } - var res [][]byte - done: for { msgs, from, err := s.Receive() if err != nil { - return nil, err + return err } if from.Pid != PidKernel { - return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel) + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel) } for _, m := range msgs { if m.Header.Seq != req.Seq { if sharedSocket { continue } - return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq) + return fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq) } if m.Header.Pid != pid { continue } + + if m.Header.Flags&unix.NLM_F_DUMP_INTR != 0 { + return syscall.Errno(unix.EINTR) + } + if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR { + // NLMSG_DONE might have no payload, if so assume no error. + if m.Header.Type == unix.NLMSG_DONE && len(m.Data) == 0 { + break done + } + native := NativeEndian() errno := int32(native.Uint32(m.Data[0:4])) if errno == 0 { @@ -556,7 +597,7 @@ done: err = syscall.Errno(-errno) unreadData := m.Data[4:] - if m.Header.Flags|unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr { + if m.Header.Flags&unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr { // Skip the echoed request message. echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0])) unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):] @@ -568,8 +609,7 @@ done: switch attr.Type { case NLMSGERR_ATTR_MSG: - err = fmt.Errorf("%w: %s", err, string(attrData)) - + err = fmt.Errorf("%w: %s", err, unix.ByteSliceToString(attrData)) default: // TODO: handle other NLMSGERR_ATTR types } @@ -578,18 +618,26 @@ done: } } - return nil, err + return err } if resType != 0 && m.Header.Type != resType { continue } - res = append(res, m.Data) + if cont := f(m.Data); !cont { + // Drain the rest of the messages from the kernel but don't + // pass them to the iterator func. + f = dummyMsgIterFunc + } if m.Header.Flags&unix.NLM_F_MULTI == 0 { break done } } } - return res, nil + return nil +} + +func dummyMsgIterFunc(msg []byte) bool { + return true } // Create a new netlink request from proto and flags @@ -649,12 +697,14 @@ func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSock // In case of success, the caller is expected to execute the returned function // at the end of the code that needs to be executed in the network namespace. // Example: -// func jobAt(...) error { -// d, err := executeInNetns(...) -// if err != nil { return err} -// defer d() -// < code which needs to be executed in specific netns> -// } +// +// func jobAt(...) error { +// d, err := executeInNetns(...) +// if err != nil { return err} +// defer d() +// < code which needs to be executed in specific netns> +// } +// // TODO: his function probably belongs to netns pkg. func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) { var ( @@ -770,8 +820,9 @@ func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetli if nr < unix.NLMSG_HDRLEN { return nil, nil, fmt.Errorf("Got short response from netlink") } - rb2 := make([]byte, nr) - copy(rb2, rb[:nr]) + msgLen := nlmAlignOf(nr) + rb2 := make([]byte, msgLen) + copy(rb2, rb[:msgLen]) nl, err := syscall.ParseNetlinkMessage(rb2) if err != nil { return nil, nil, err @@ -793,6 +844,15 @@ func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error { return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout) } +// SetReceiveBufferSize allows to set a receive buffer size on the socket +func (s *NetlinkSocket) SetReceiveBufferSize(size int, force bool) error { + opt := unix.SO_RCVBUF + if force { + opt = unix.SO_RCVBUFFORCE + } + return unix.SetsockoptInt(int(s.fd), unix.SOL_SOCKET, opt, size) +} + // SetExtAck requests error messages to be reported on the socket func (s *NetlinkSocket) SetExtAck(enable bool) error { var enableN int @@ -849,6 +909,12 @@ func Uint16Attr(v uint16) []byte { return bytes } +func BEUint16Attr(v uint16) []byte { + bytes := make([]byte, 2) + binary.BigEndian.PutUint16(bytes, v) + return bytes +} + func Uint32Attr(v uint32) []byte { native := NativeEndian() bytes := make([]byte, 4) @@ -856,6 +922,12 @@ func Uint32Attr(v uint32) []byte { return bytes } +func BEUint32Attr(v uint32) []byte { + bytes := make([]byte, 4) + binary.BigEndian.PutUint32(bytes, v) + return bytes +} + func Uint64Attr(v uint64) []byte { native := NativeEndian() bytes := make([]byte, 8) @@ -863,6 +935,12 @@ func Uint64Attr(v uint64) []byte { return bytes } +func BEUint64Attr(v uint64) []byte { + bytes := make([]byte, 8) + binary.BigEndian.PutUint64(bytes, v) + return bytes +} + func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) { var attrs []syscall.NetlinkRouteAttr for len(b) >= unix.SizeofRtAttr { @@ -877,6 +955,22 @@ func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) { return attrs, nil } +// ParseRouteAttrAsMap parses provided buffer that contains raw RtAttrs and returns a map of parsed +// atttributes indexed by attribute type or error if occured. +func ParseRouteAttrAsMap(b []byte) (map[uint16]syscall.NetlinkRouteAttr, error) { + attrMap := make(map[uint16]syscall.NetlinkRouteAttr) + + attrs, err := ParseRouteAttr(b) + if err != nil { + return nil, err + } + + for _, attr := range attrs { + attrMap[attr.Attr.Type] = attr + } + return attrMap, nil +} + func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) { a := (*unix.RtAttr)(unsafe.Pointer(&b[0])) if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) { diff --git a/vendor/github.com/vishvananda/netlink/nl/route_linux.go b/vendor/github.com/vishvananda/netlink/nl/route_linux.go index 03c1900f..c26f3bf9 100644 --- a/vendor/github.com/vishvananda/netlink/nl/route_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/route_linux.go @@ -48,7 +48,9 @@ type RtNexthop struct { } func DeserializeRtNexthop(b []byte) *RtNexthop { - return (*RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0])) + return &RtNexthop{ + RtNexthop: *((*unix.RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0]))), + } } func (msg *RtNexthop) Len() int { diff --git a/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go index 15001772..8172b847 100644 --- a/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go @@ -12,6 +12,7 @@ const ( SEG6_LOCAL_NH6 SEG6_LOCAL_IIF SEG6_LOCAL_OIF + SEG6_LOCAL_BPF __SEG6_LOCAL_MAX ) const ( @@ -34,6 +35,7 @@ const ( SEG6_LOCAL_ACTION_END_S // 12 SEG6_LOCAL_ACTION_END_AS // 13 SEG6_LOCAL_ACTION_END_AM // 14 + SEG6_LOCAL_ACTION_END_BPF // 15 __SEG6_LOCAL_ACTION_MAX ) const ( @@ -71,6 +73,8 @@ func SEG6LocalActionString(action int) string { return "End.AS" case SEG6_LOCAL_ACTION_END_AM: return "End.AM" + case SEG6_LOCAL_ACTION_END_BPF: + return "End.BPF" } return "unknown" } diff --git a/vendor/github.com/vishvananda/netlink/nl/syscall.go b/vendor/github.com/vishvananda/netlink/nl/syscall.go index bdf6ba63..b5ba039a 100644 --- a/vendor/github.com/vishvananda/netlink/nl/syscall.go +++ b/vendor/github.com/vishvananda/netlink/nl/syscall.go @@ -46,6 +46,7 @@ const ( // socket diags related const ( SOCK_DIAG_BY_FAMILY = 20 /* linux.sock_diag.h */ + SOCK_DESTROY = 21 TCPDIAG_NOCOOKIE = 0xFFFFFFFF /* TCPDIAG_NOCOOKIE in net/ipv4/tcp_diag.h*/ ) diff --git a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go index eb05ff1c..0720729a 100644 --- a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go @@ -1,8 +1,13 @@ package nl import ( + "bytes" "encoding/binary" + "fmt" + "net" "unsafe" + + "golang.org/x/sys/unix" ) // LinkLayer @@ -42,7 +47,14 @@ const ( TCA_FCNT TCA_STATS2 TCA_STAB - TCA_MAX = TCA_STAB + TCA_PAD + TCA_DUMP_INVISIBLE + TCA_CHAIN + TCA_HW_OFFLOAD + TCA_INGRESS_BLOCK + TCA_EGRESS_BLOCK + TCA_DUMP_FLAGS + TCA_MAX = TCA_DUMP_FLAGS ) const ( @@ -56,6 +68,12 @@ const ( TCA_ACT_OPTIONS TCA_ACT_INDEX TCA_ACT_STATS + TCA_ACT_PAD + TCA_ACT_COOKIE + TCA_ACT_FLAGS + TCA_ACT_HW_STATS + TCA_ACT_USED_HW_STATS + TCA_ACT_IN_HW_COUNT TCA_ACT_MAX ) @@ -71,7 +89,11 @@ const ( TCA_STATS_RATE_EST TCA_STATS_QUEUE TCA_STATS_APP - TCA_STATS_MAX = TCA_STATS_APP + TCA_STATS_RATE_EST64 + TCA_STATS_PAD + TCA_STATS_BASIC_HW + TCA_STATS_PKT64 + TCA_STATS_MAX = TCA_STATS_PKT64 ) const ( @@ -83,12 +105,13 @@ const ( SizeofTcNetemCorr = 0x0c SizeofTcNetemReorder = 0x08 SizeofTcNetemCorrupt = 0x08 + SizeOfTcNetemRate = 0x10 SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14 SizeofTcHtbGlob = 0x14 SizeofTcU32Key = 0x10 SizeofTcU32Sel = 0x10 // without keys - SizeofTcGen = 0x14 + SizeofTcGen = 0x16 SizeofTcConnmark = SizeofTcGen + 0x04 SizeofTcCsum = SizeofTcGen + 0x04 SizeofTcMirred = SizeofTcGen + 0x08 @@ -98,6 +121,7 @@ const ( SizeofTcSfqQopt = 0x0b SizeofTcSfqRedStats = 0x18 SizeofTcSfqQoptV1 = SizeofTcSfqQopt + SizeofTcSfqRedStats + 0x1c + SizeofUint32Bitfield = 0x8 ) // struct tcmsg { @@ -131,6 +155,18 @@ func (x *TcMsg) Serialize() []byte { return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:] } +type Tcf struct { + Install uint64 + LastUse uint64 + Expires uint64 + FirstUse uint64 +} + +func DeserializeTcf(b []byte) *Tcf { + const size = int(unsafe.Sizeof(Tcf{})) + return (*Tcf)(unsafe.Pointer(&b[0:size][0])) +} + // struct tcamsg { // unsigned char tca_family; // unsigned char tca__pad1; @@ -337,6 +373,26 @@ func (x *TcNetemCorrupt) Serialize() []byte { return (*(*[SizeofTcNetemCorrupt]byte)(unsafe.Pointer(x)))[:] } +// TcNetemRate is a struct that represents the rate of a netem qdisc +type TcNetemRate struct { + Rate uint32 + PacketOverhead int32 + CellSize uint32 + CellOverhead int32 +} + +func (msg *TcNetemRate) Len() int { + return SizeofTcRateSpec +} + +func DeserializeTcNetemRate(b []byte) *TcNetemRate { + return (*TcNetemRate)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0])) +} + +func (msg *TcNetemRate) Serialize() []byte { + return (*(*[SizeOfTcNetemRate]byte)(unsafe.Pointer(msg)))[:] +} + // struct tc_tbf_qopt { // struct tc_ratespec rate; // struct tc_ratespec peakrate; @@ -804,7 +860,8 @@ const ( TCA_SKBEDIT_MARK TCA_SKBEDIT_PAD TCA_SKBEDIT_PTYPE - TCA_SKBEDIT_MAX = TCA_SKBEDIT_MARK + TCA_SKBEDIT_MASK + TCA_SKBEDIT_MAX ) type TcSkbEdit struct { @@ -891,6 +948,10 @@ const ( TCA_FQ_FLOW_REFILL_DELAY // flow credit refill delay in usec TCA_FQ_ORPHAN_MASK // mask applied to orphaned skb hashes TCA_FQ_LOW_RATE_THRESHOLD // per packet delay under this rate + TCA_FQ_CE_THRESHOLD // DCTCP-like CE-marking threshold + TCA_FQ_TIMER_SLACK // timer slack + TCA_FQ_HORIZON // time horizon in us + TCA_FQ_HORIZON_DROP // drop packets beyond horizon, or cap their EDT ) const ( @@ -1018,6 +1079,9 @@ const ( __TCA_FLOWER_MAX ) +const TCA_CLS_FLAGS_SKIP_HW = 1 << 0 /* don't offload filter to HW */ +const TCA_CLS_FLAGS_SKIP_SW = 1 << 1 /* don't use filter in SW */ + // struct tc_sfq_qopt { // unsigned quantum; /* Bytes per round allocated to flow */ // int perturb_period; /* Period of hash perturbation */ @@ -1046,14 +1110,14 @@ func (x *TcSfqQopt) Serialize() []byte { return (*(*[SizeofTcSfqQopt]byte)(unsafe.Pointer(x)))[:] } -// struct tc_sfqred_stats { -// __u32 prob_drop; /* Early drops, below max threshold */ -// __u32 forced_drop; /* Early drops, after max threshold */ -// __u32 prob_mark; /* Marked packets, below max threshold */ -// __u32 forced_mark; /* Marked packets, after max threshold */ -// __u32 prob_mark_head; /* Marked packets, below max threshold */ -// __u32 forced_mark_head;/* Marked packets, after max threshold */ -// }; +// struct tc_sfqred_stats { +// __u32 prob_drop; /* Early drops, below max threshold */ +// __u32 forced_drop; /* Early drops, after max threshold */ +// __u32 prob_mark; /* Marked packets, below max threshold */ +// __u32 forced_mark; /* Marked packets, after max threshold */ +// __u32 prob_mark_head; /* Marked packets, below max threshold */ +// __u32 forced_mark_head;/* Marked packets, after max threshold */ +// }; type TcSfqRedStats struct { ProbDrop uint32 ForcedDrop uint32 @@ -1075,22 +1139,26 @@ func (x *TcSfqRedStats) Serialize() []byte { return (*(*[SizeofTcSfqRedStats]byte)(unsafe.Pointer(x)))[:] } -// struct tc_sfq_qopt_v1 { -// struct tc_sfq_qopt v0; -// unsigned int depth; /* max number of packets per flow */ -// unsigned int headdrop; +// struct tc_sfq_qopt_v1 { +// struct tc_sfq_qopt v0; +// unsigned int depth; /* max number of packets per flow */ +// unsigned int headdrop; +// // /* SFQRED parameters */ -// __u32 limit; /* HARD maximal flow queue length (bytes) */ -// __u32 qth_min; /* Min average length threshold (bytes) */ -// __u32 qth_max; /* Max average length threshold (bytes) */ -// unsigned char Wlog; /* log(W) */ -// unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ -// unsigned char Scell_log; /* cell size for idle damping */ -// unsigned char flags; -// __u32 max_P; /* probability, high resolution */ +// +// __u32 limit; /* HARD maximal flow queue length (bytes) */ +// __u32 qth_min; /* Min average length threshold (bytes) */ +// __u32 qth_max; /* Max average length threshold (bytes) */ +// unsigned char Wlog; /* log(W) */ +// unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ +// unsigned char Scell_log; /* cell size for idle damping */ +// unsigned char flags; +// __u32 max_P; /* probability, high resolution */ +// // /* SFQRED stats */ -// struct tc_sfqred_stats stats; -// }; +// +// struct tc_sfqred_stats stats; +// }; type TcSfqQoptV1 struct { TcSfqQopt Depth uint32 @@ -1117,3 +1185,427 @@ func DeserializeTcSfqQoptV1(b []byte) *TcSfqQoptV1 { func (x *TcSfqQoptV1) Serialize() []byte { return (*(*[SizeofTcSfqQoptV1]byte)(unsafe.Pointer(x)))[:] } + +// IPProto represents Flower ip_proto attribute +type IPProto uint8 + +const ( + IPPROTO_TCP IPProto = unix.IPPROTO_TCP + IPPROTO_UDP IPProto = unix.IPPROTO_UDP + IPPROTO_SCTP IPProto = unix.IPPROTO_SCTP + IPPROTO_ICMP IPProto = unix.IPPROTO_ICMP + IPPROTO_ICMPV6 IPProto = unix.IPPROTO_ICMPV6 +) + +func (i IPProto) Serialize() []byte { + arr := make([]byte, 1) + arr[0] = byte(i) + return arr +} + +func (i IPProto) String() string { + switch i { + case IPPROTO_TCP: + return "tcp" + case IPPROTO_UDP: + return "udp" + case IPPROTO_SCTP: + return "sctp" + case IPPROTO_ICMP: + return "icmp" + case IPPROTO_ICMPV6: + return "icmpv6" + } + return fmt.Sprintf("%d", i) +} + +const ( + MaxOffs = 128 + SizeOfPeditSel = 24 + SizeOfPeditKey = 24 + + TCA_PEDIT_KEY_EX_HTYPE = 1 + TCA_PEDIT_KEY_EX_CMD = 2 +) + +const ( + TCA_PEDIT_UNSPEC = iota + TCA_PEDIT_TM + TCA_PEDIT_PARMS + TCA_PEDIT_PAD + TCA_PEDIT_PARMS_EX + TCA_PEDIT_KEYS_EX + TCA_PEDIT_KEY_EX +) + +// /* TCA_PEDIT_KEY_EX_HDR_TYPE_NETWROK is a special case for legacy users. It +// * means no specific header type - offset is relative to the network layer +// */ +type PeditHeaderType uint16 + +const ( + TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = iota + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + TCA_PEDIT_KEY_EX_HDR_TYPE_UDP + __PEDIT_HDR_TYPE_MAX +) + +type PeditCmd uint16 + +const ( + TCA_PEDIT_KEY_EX_CMD_SET = 0 + TCA_PEDIT_KEY_EX_CMD_ADD = 1 +) + +type TcPeditSel struct { + TcGen + NKeys uint8 + Flags uint8 +} + +func DeserializeTcPeditKey(b []byte) *TcPeditKey { + return (*TcPeditKey)(unsafe.Pointer(&b[0:SizeOfPeditKey][0])) +} + +func DeserializeTcPedit(b []byte) (*TcPeditSel, []TcPeditKey) { + x := &TcPeditSel{} + copy((*(*[SizeOfPeditSel]byte)(unsafe.Pointer(x)))[:SizeOfPeditSel], b) + + var keys []TcPeditKey + + next := SizeOfPeditKey + var i uint8 + for i = 0; i < x.NKeys; i++ { + keys = append(keys, *DeserializeTcPeditKey(b[next:])) + next += SizeOfPeditKey + } + + return x, keys +} + +type TcPeditKey struct { + Mask uint32 + Val uint32 + Off uint32 + At uint32 + OffMask uint32 + Shift uint32 +} + +type TcPeditKeyEx struct { + HeaderType PeditHeaderType + Cmd PeditCmd +} + +type TcPedit struct { + Sel TcPeditSel + Keys []TcPeditKey + KeysEx []TcPeditKeyEx + Extend uint8 +} + +func (p *TcPedit) Encode(parent *RtAttr) { + parent.AddRtAttr(TCA_ACT_KIND, ZeroTerminated("pedit")) + actOpts := parent.AddRtAttr(TCA_ACT_OPTIONS, nil) + + bbuf := bytes.NewBuffer(make([]byte, 0, int(unsafe.Sizeof(p.Sel)+unsafe.Sizeof(p.Keys)))) + + bbuf.Write((*(*[SizeOfPeditSel]byte)(unsafe.Pointer(&p.Sel)))[:]) + + for i := uint8(0); i < p.Sel.NKeys; i++ { + bbuf.Write((*(*[SizeOfPeditKey]byte)(unsafe.Pointer(&p.Keys[i])))[:]) + } + actOpts.AddRtAttr(TCA_PEDIT_PARMS_EX, bbuf.Bytes()) + + exAttrs := actOpts.AddRtAttr(int(TCA_PEDIT_KEYS_EX|NLA_F_NESTED), nil) + for i := uint8(0); i < p.Sel.NKeys; i++ { + keyAttr := exAttrs.AddRtAttr(int(TCA_PEDIT_KEY_EX|NLA_F_NESTED), nil) + + htypeBuf := make([]byte, 2) + cmdBuf := make([]byte, 2) + + NativeEndian().PutUint16(htypeBuf, uint16(p.KeysEx[i].HeaderType)) + NativeEndian().PutUint16(cmdBuf, uint16(p.KeysEx[i].Cmd)) + + keyAttr.AddRtAttr(TCA_PEDIT_KEY_EX_HTYPE, htypeBuf) + keyAttr.AddRtAttr(TCA_PEDIT_KEY_EX_CMD, cmdBuf) + } +} + +func (p *TcPedit) SetEthDst(mac net.HardwareAddr) { + u32 := NativeEndian().Uint32(mac) + u16 := NativeEndian().Uint16(mac[4:]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = uint32(u16) + tKey.Mask = 0xffff0000 + tKey.Off = 4 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetEthSrc(mac net.HardwareAddr) { + u16 := NativeEndian().Uint16(mac) + u32 := NativeEndian().Uint32(mac[2:]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = uint32(u16) << 16 + tKey.Mask = 0x0000ffff + tKey.Off = 4 + + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Mask = 0 + tKey.Off = 8 + + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetIPv6Src(ip6 net.IP) { + u32 := NativeEndian().Uint32(ip6[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 8 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[4:8]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 12 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[8:12]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 16 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[12:16]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 20 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetDstIP(ip net.IP) { + if ip.To4() != nil { + p.SetIPv4Dst(ip) + } else { + p.SetIPv6Dst(ip) + } +} + +func (p *TcPedit) SetSrcIP(ip net.IP) { + if ip.To4() != nil { + p.SetIPv4Src(ip) + } else { + p.SetIPv6Src(ip) + } +} + +func (p *TcPedit) SetIPv6Dst(ip6 net.IP) { + u32 := NativeEndian().Uint32(ip6[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 24 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[4:8]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 28 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[8:12]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 32 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[12:16]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 36 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetIPv4Src(ip net.IP) { + u32 := NativeEndian().Uint32(ip[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 12 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} + +func (p *TcPedit) SetIPv4Dst(ip net.IP) { + u32 := NativeEndian().Uint32(ip[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 16 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} + +// SetDstPort only tcp and udp are supported to set port +func (p *TcPedit) SetDstPort(dstPort uint16, protocol uint8) { + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + switch protocol { + case unix.IPPROTO_TCP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + case unix.IPPROTO_UDP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP + default: + return + } + + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + tKey.Val = uint32(Swap16(dstPort)) << 16 + tKey.Mask = 0x0000ffff + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} + +// SetSrcPort only tcp and udp are supported to set port +func (p *TcPedit) SetSrcPort(srcPort uint16, protocol uint8) { + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + switch protocol { + case unix.IPPROTO_TCP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + case unix.IPPROTO_UDP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP + default: + return + } + + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + tKey.Val = uint32(Swap16(srcPort)) + tKey.Mask = 0xffff0000 + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} diff --git a/vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go b/vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go new file mode 100644 index 00000000..f209125d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go @@ -0,0 +1,41 @@ +package nl + +const ( + VDPA_GENL_NAME = "vdpa" + VDPA_GENL_VERSION = 0x1 +) + +const ( + VDPA_CMD_UNSPEC = iota + VDPA_CMD_MGMTDEV_NEW + VDPA_CMD_MGMTDEV_GET /* can dump */ + VDPA_CMD_DEV_NEW + VDPA_CMD_DEV_DEL + VDPA_CMD_DEV_GET /* can dump */ + VDPA_CMD_DEV_CONFIG_GET /* can dump */ + VDPA_CMD_DEV_VSTATS_GET +) + +const ( + VDPA_ATTR_UNSPEC = iota + VDPA_ATTR_MGMTDEV_BUS_NAME + VDPA_ATTR_MGMTDEV_DEV_NAME + VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES + VDPA_ATTR_DEV_NAME + VDPA_ATTR_DEV_ID + VDPA_ATTR_DEV_VENDOR_ID + VDPA_ATTR_DEV_MAX_VQS + VDPA_ATTR_DEV_MAX_VQ_SIZE + VDPA_ATTR_DEV_MIN_VQ_SIZE + VDPA_ATTR_DEV_NET_CFG_MACADDR + VDPA_ATTR_DEV_NET_STATUS + VDPA_ATTR_DEV_NET_CFG_MAX_VQP + VDPA_ATTR_DEV_NET_CFG_MTU + VDPA_ATTR_DEV_NEGOTIATED_FEATURES + VDPA_ATTR_DEV_MGMTDEV_MAX_VQS + VDPA_ATTR_DEV_SUPPORTED_FEATURES + VDPA_ATTR_DEV_QUEUE_INDEX + VDPA_ATTR_DEV_VENDOR_ATTR_NAME + VDPA_ATTR_DEV_VENDOR_ATTR_VALUE + VDPA_ATTR_DEV_FEATURES +) diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go index dce9073f..cdb318ba 100644 --- a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go @@ -131,7 +131,15 @@ func (x *XfrmAddress) ToIP() net.IP { return ip } -func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet { +// family is only used when x and prefixlen are both 0 +func (x *XfrmAddress) ToIPNet(prefixlen uint8, family uint16) *net.IPNet { + empty := [SizeofXfrmAddress]byte{} + if bytes.Equal(x[:], empty[:]) && prefixlen == 0 { + if family == FAMILY_V6 { + return &net.IPNet{IP: net.ParseIP("::"), Mask: net.CIDRMask(int(prefixlen), 128)} + } + return &net.IPNet{IP: net.ParseIP("0.0.0.0"), Mask: net.CIDRMask(int(prefixlen), 32)} + } ip := x.ToIP() if GetIPFamily(ip) == FAMILY_V4 { return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go index 43a947f2..e8920b9a 100644 --- a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go @@ -15,6 +15,7 @@ const ( SizeofXfrmEncapTmpl = 0x18 SizeofXfrmUsersaFlush = 0x1 SizeofXfrmReplayStateEsn = 0x18 + SizeofXfrmReplayState = 0x0c ) const ( @@ -28,6 +29,11 @@ const ( XFRM_STATE_ESN = 128 ) +const ( + XFRM_SA_XFLAG_DONT_ENCAP_DSCP = 1 + XFRM_SA_XFLAG_OSEQ_MAY_WRAP = 2 +) + // struct xfrm_usersa_id { // xfrm_address_t daddr; // __be32 spi; @@ -103,6 +109,7 @@ func (msg *XfrmStats) Serialize() []byte { // }; // // #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +// #define XFRM_SA_XFLAG_OSEQ_MAY_WRAP 2 // type XfrmUsersaInfo struct { @@ -332,3 +339,23 @@ func (msg *XfrmReplayStateEsn) Serialize() []byte { // We deliberately do not pass Bmp, as it gets set by the kernel. return (*(*[SizeofXfrmReplayStateEsn]byte)(unsafe.Pointer(msg)))[:] } + +// struct xfrm_replay_state { +// __u32 oseq; +// __u32 seq; +// __u32 bitmap; +// }; + +type XfrmReplayState struct { + OSeq uint32 + Seq uint32 + BitMap uint32 +} + +func DeserializeXfrmReplayState(b []byte) *XfrmReplayState { + return (*XfrmReplayState)(unsafe.Pointer(&b[0:SizeofXfrmReplayState][0])) +} + +func (msg *XfrmReplayState) Serialize() []byte { + return (*(*[SizeofXfrmReplayState]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/proc_event_linux.go b/vendor/github.com/vishvananda/netlink/proc_event_linux.go index 53bc59a6..ac8762bd 100644 --- a/vendor/github.com/vishvananda/netlink/proc_event_linux.go +++ b/vendor/github.com/vishvananda/netlink/proc_event_linux.go @@ -63,15 +63,6 @@ type ExitProcEvent struct { ParentTgid uint32 } -type ExitProcEvent2 struct { - ProcessPid uint32 - ProcessTgid uint32 - ExitCode uint32 - ExitSignal uint32 - ParentPid uint32 - ParentTgid uint32 -} - func (e *ExitProcEvent) Pid() uint32 { return e.ProcessPid } diff --git a/vendor/github.com/vishvananda/netlink/protinfo.go b/vendor/github.com/vishvananda/netlink/protinfo.go index 60b23b37..0163cba3 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo.go +++ b/vendor/github.com/vishvananda/netlink/protinfo.go @@ -6,14 +6,16 @@ import ( // Protinfo represents bridge flags from netlink. type Protinfo struct { - Hairpin bool - Guard bool - FastLeave bool - RootBlock bool - Learning bool - Flood bool - ProxyArp bool - ProxyArpWiFi bool + Hairpin bool + Guard bool + FastLeave bool + RootBlock bool + Learning bool + Flood bool + ProxyArp bool + ProxyArpWiFi bool + Isolated bool + NeighSuppress bool } // String returns a list of enabled flags @@ -47,6 +49,12 @@ func (prot *Protinfo) String() string { if prot.ProxyArpWiFi { boolStrings = append(boolStrings, "ProxyArpWiFi") } + if prot.Isolated { + boolStrings = append(boolStrings, "Isolated") + } + if prot.NeighSuppress { + boolStrings = append(boolStrings, "NeighSuppress") + } return strings.Join(boolStrings, " ") } diff --git a/vendor/github.com/vishvananda/netlink/protinfo_linux.go b/vendor/github.com/vishvananda/netlink/protinfo_linux.go index 15b65123..1ba25d3c 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo_linux.go +++ b/vendor/github.com/vishvananda/netlink/protinfo_linux.go @@ -68,6 +68,10 @@ func parseProtinfo(infos []syscall.NetlinkRouteAttr) (pi Protinfo) { pi.ProxyArp = byteToBool(info.Value[0]) case nl.IFLA_BRPORT_PROXYARP_WIFI: pi.ProxyArpWiFi = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_ISOLATED: + pi.Isolated = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_NEIGH_SUPPRESS: + pi.NeighSuppress = byteToBool(info.Value[0]) } } return diff --git a/vendor/github.com/vishvananda/netlink/qdisc.go b/vendor/github.com/vishvananda/netlink/qdisc.go index f594c9c2..067743d3 100644 --- a/vendor/github.com/vishvananda/netlink/qdisc.go +++ b/vendor/github.com/vishvananda/netlink/qdisc.go @@ -17,19 +17,29 @@ const ( HANDLE_MIN_EGRESS = 0xFFFFFFF3 ) +const ( + HORIZON_DROP_POLICY_CAP = 0 + HORIZON_DROP_POLICY_DROP = 1 + HORIZON_DROP_POLICY_DEFAULT = 255 +) + type Qdisc interface { Attrs() *QdiscAttrs Type() string } +type QdiscStatistics ClassStatistics + // QdiscAttrs represents a netlink qdisc. A qdisc is associated with a link, // has a handle, a parent and a refcnt. The root qdisc of a device should // have parent == HANDLE_ROOT. type QdiscAttrs struct { - LinkIndex int - Handle uint32 - Parent uint32 - Refcnt uint32 // read only + LinkIndex int + Handle uint32 + Parent uint32 + Refcnt uint32 // read only + IngressBlock *uint32 + Statistics *QdiscStatistics } func (q QdiscAttrs) String() string { @@ -113,6 +123,7 @@ type Htb struct { Defcls uint32 Debug uint32 DirectPkts uint32 + DirectQlen *uint32 } func NewHtb(attrs QdiscAttrs) *Htb { @@ -123,6 +134,7 @@ func NewHtb(attrs QdiscAttrs) *Htb { Rate2Quantum: 10, Debug: 0, DirectPkts: 0, + DirectQlen: nil, } } @@ -150,6 +162,7 @@ type NetemQdiscAttrs struct { ReorderCorr float32 // in % CorruptProb float32 // in % CorruptCorr float32 // in % + Rate64 uint64 } func (q NetemQdiscAttrs) String() string { @@ -174,6 +187,7 @@ type Netem struct { ReorderCorr uint32 CorruptProb uint32 CorruptCorr uint32 + Rate64 uint64 } func (netem *Netem) String() string { @@ -210,6 +224,19 @@ func (qdisc *Tbf) Type() string { return "tbf" } +// Clsact is a qdisc for adding filters +type Clsact struct { + QdiscAttrs +} + +func (qdisc *Clsact) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Clsact) Type() string { + return "clsact" +} + // Ingress is a qdisc for adding ingress filters type Ingress struct { QdiscAttrs @@ -278,22 +305,25 @@ type Fq struct { FlowDefaultRate uint32 FlowMaxRate uint32 // called BucketsLog under the hood - Buckets uint32 - FlowRefillDelay uint32 - LowRateThreshold uint32 + Buckets uint32 + FlowRefillDelay uint32 + LowRateThreshold uint32 + Horizon uint32 + HorizonDropPolicy uint8 } func (fq *Fq) String() string { return fmt.Sprintf( - "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v}", - fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, + "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v, Horizon: %v, HorizonDropPolicy: %v}", + fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, fq.Horizon, fq.HorizonDropPolicy, ) } func NewFq(attrs QdiscAttrs) *Fq { return &Fq{ - QdiscAttrs: attrs, - Pacing: 1, + QdiscAttrs: attrs, + Pacing: 1, + HorizonDropPolicy: HORIZON_DROP_POLICY_DEFAULT, } } diff --git a/vendor/github.com/vishvananda/netlink/qdisc_linux.go b/vendor/github.com/vishvananda/netlink/qdisc_linux.go index e182e1cf..e732ae3b 100644 --- a/vendor/github.com/vishvananda/netlink/qdisc_linux.go +++ b/vendor/github.com/vishvananda/netlink/qdisc_linux.go @@ -5,6 +5,7 @@ import ( "io/ioutil" "strconv" "strings" + "sync" "syscall" "github.com/vishvananda/netlink/nl" @@ -17,6 +18,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { var lossCorr, delayCorr, duplicateCorr uint32 var reorderProb, reorderCorr uint32 var corruptProb, corruptCorr uint32 + var rate64 uint64 latency := nattrs.Latency loss := Percentage2u32(nattrs.Loss) @@ -57,6 +59,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { corruptProb = Percentage2u32(nattrs.CorruptProb) corruptCorr = Percentage2u32(nattrs.CorruptCorr) + rate64 = nattrs.Rate64 return &Netem{ QdiscAttrs: attrs, @@ -73,6 +76,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { ReorderCorr: reorderCorr, CorruptProb: corruptProb, CorruptCorr: corruptCorr, + Rate64: rate64, } } @@ -159,6 +163,9 @@ func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error { func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) + if qdisc.Attrs().IngressBlock != nil { + req.AddData(nl.NewRtAttr(nl.TCA_INGRESS_BLOCK, nl.Uint32Attr(*qdisc.Attrs().IngressBlock))) + } options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) @@ -194,7 +201,9 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { opt.Debug = qdisc.Debug opt.DirectPkts = qdisc.DirectPkts options.AddRtAttr(nl.TCA_HTB_INIT, opt.Serialize()) - // options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) + if qdisc.DirectQlen != nil { + options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, nl.Uint32Attr(*qdisc.DirectQlen)) + } case *Hfsc: opt := nl.TcHfscOpt{} opt.Defcls = qdisc.Defcls @@ -231,6 +240,19 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if reorder.Probability > 0 { options.AddRtAttr(nl.TCA_NETEM_REORDER, reorder.Serialize()) } + // Rate + if qdisc.Rate64 > 0 { + rate := nl.TcNetemRate{} + if qdisc.Rate64 >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_NETEM_RATE64, nl.Uint64Attr(qdisc.Rate64)) + rate.Rate = ^uint32(0) + } else { + rate.Rate = uint32(qdisc.Rate64) + } + options.AddRtAttr(nl.TCA_NETEM_RATE, rate.Serialize()) + } + case *Clsact: + options = nil case *Ingress: // ingress filters must use the proper handle if qdisc.Attrs().Parent != HANDLE_INGRESS { @@ -265,6 +287,9 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if qdisc.Buckets > 0 { options.AddRtAttr(nl.TCA_FQ_BUCKETS_LOG, nl.Uint32Attr((uint32(qdisc.Buckets)))) } + if qdisc.PacketLimit > 0 { + options.AddRtAttr(nl.TCA_FQ_PLIMIT, nl.Uint32Attr((uint32(qdisc.PacketLimit)))) + } if qdisc.LowRateThreshold > 0 { options.AddRtAttr(nl.TCA_FQ_LOW_RATE_THRESHOLD, nl.Uint32Attr((uint32(qdisc.LowRateThreshold)))) } @@ -286,6 +311,12 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if qdisc.FlowDefaultRate > 0 { options.AddRtAttr(nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate)))) } + if qdisc.Horizon > 0 { + options.AddRtAttr(nl.TCA_FQ_HORIZON, nl.Uint32Attr(qdisc.Horizon)) + } + if qdisc.HorizonDropPolicy != HORIZON_DROP_POLICY_DEFAULT { + options.AddRtAttr(nl.TCA_FQ_HORIZON_DROP, nl.Uint8Attr(qdisc.HorizonDropPolicy)) + } case *Sfq: opt := nl.TcSfqQoptV1{} opt.TcSfqQopt.Quantum = qdisc.Quantum @@ -380,6 +411,8 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { qdisc = &Netem{} case "sfq": qdisc = &Sfq{} + case "clsact": + qdisc = &Clsact{} default: qdisc = &GenericQdisc{QdiscType: qdiscType} } @@ -442,6 +475,22 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { // no options for ingress } + case nl.TCA_INGRESS_BLOCK: + ingressBlock := new(uint32) + *ingressBlock = native.Uint32(attr.Value) + base.IngressBlock = ingressBlock + case nl.TCA_STATS: + s, err := parseTcStats(attr.Value) + if err != nil { + return nil, err + } + base.Statistics = (*QdiscStatistics)(s) + case nl.TCA_STATS2: + s, err := parseTcStats2(attr.Value) + if err != nil { + return nil, err + } + base.Statistics = (*QdiscStatistics)(s) } } *qdisc.Attrs() = base @@ -479,8 +528,8 @@ func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { htb.Debug = opt.Debug htb.DirectPkts = opt.DirectPkts case nl.TCA_HTB_DIRECT_QLEN: - // TODO - //htb.DirectQlen = native.uint32(datum.Value) + directQlen := native.Uint32(datum.Value) + htb.DirectQlen = &directQlen } } return nil @@ -546,6 +595,11 @@ func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { fq.FlowMaxRate = native.Uint32(datum.Value) case nl.TCA_FQ_FLOW_DEFAULT_RATE: fq.FlowDefaultRate = native.Uint32(datum.Value) + case nl.TCA_FQ_HORIZON: + fq.Horizon = native.Uint32(datum.Value) + case nl.TCA_FQ_HORIZON_DROP: + fq.HorizonDropPolicy = datum.Value[0] + } } return nil @@ -564,6 +618,8 @@ func parseNetemData(qdisc Qdisc, value []byte) error { if err != nil { return err } + var rate *nl.TcNetemRate + var rate64 uint64 for _, datum := range data { switch datum.Attr.Type { case nl.TCA_NETEM_CORR: @@ -579,8 +635,19 @@ func parseNetemData(qdisc Qdisc, value []byte) error { opt := nl.DeserializeTcNetemReorder(datum.Value) netem.ReorderProb = opt.Probability netem.ReorderCorr = opt.Correlation + case nl.TCA_NETEM_RATE: + rate = nl.DeserializeTcNetemRate(datum.Value) + case nl.TCA_NETEM_RATE64: + rate64 = native.Uint64(datum.Value) } } + if rate != nil { + netem.Rate64 = uint64(rate.Rate) + if rate64 > 0 { + netem.Rate64 = rate64 + } + } + return nil } @@ -624,6 +691,9 @@ var ( tickInUsec float64 clockFactor float64 hz float64 + + // Without this, the go race detector may report races. + initClockMutex sync.Mutex ) func initClock() { @@ -658,6 +728,8 @@ func initClock() { } func TickInUsec() float64 { + initClockMutex.Lock() + defer initClockMutex.Unlock() if tickInUsec == 0.0 { initClock() } @@ -665,6 +737,8 @@ func TickInUsec() float64 { } func ClockFactor() float64 { + initClockMutex.Lock() + defer initClockMutex.Unlock() if clockFactor == 0.0 { initClock() } @@ -672,6 +746,8 @@ func ClockFactor() float64 { } func Hz() float64 { + initClockMutex.Lock() + defer initClockMutex.Unlock() if hz == 0.0 { initClock() } diff --git a/vendor/github.com/vishvananda/netlink/route.go b/vendor/github.com/vishvananda/netlink/route.go index 79cc218e..1b4555d5 100644 --- a/vendor/github.com/vishvananda/netlink/route.go +++ b/vendor/github.com/vishvananda/netlink/route.go @@ -154,8 +154,15 @@ type flagString struct { } // RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE + +// NlFlags is only non-zero for RTM_NEWROUTE, the following flags can be set: +// - unix.NLM_F_REPLACE - Replace existing matching config object with this request +// - unix.NLM_F_EXCL - Don't replace the config object if it already exists +// - unix.NLM_F_CREATE - Create config object if it doesn't already exist +// - unix.NLM_F_APPEND - Add to the end of the object list type RouteUpdate struct { - Type uint16 + Type uint16 + NlFlags uint16 Route } diff --git a/vendor/github.com/vishvananda/netlink/route_linux.go b/vendor/github.com/vishvananda/netlink/route_linux.go index 8da88665..0cd4f836 100644 --- a/vendor/github.com/vishvananda/netlink/route_linux.go +++ b/vendor/github.com/vishvananda/netlink/route_linux.go @@ -41,7 +41,6 @@ func (s Scope) String() string { } } - const ( FLAG_ONLINK NextHopFlag = unix.RTNH_F_ONLINK FLAG_PERVASIVE NextHopFlag = unix.RTNH_F_PERVASIVE @@ -274,6 +273,16 @@ type SEG6LocalEncap struct { In6Addr net.IP Iif int Oif int + bpf bpfObj +} + +func (e *SEG6LocalEncap) SetProg(progFd int, progName string) error { + if progFd <= 0 { + return fmt.Errorf("seg6local bpf SetProg: invalid fd") + } + e.bpf.progFd = progFd + e.bpf.progName = progName + return nil } func (e *SEG6LocalEncap) Type() int { @@ -307,6 +316,22 @@ func (e *SEG6LocalEncap) Decode(buf []byte) error { case nl.SEG6_LOCAL_OIF: e.Oif = int(native.Uint32(attr.Value[0:4])) e.Flags[nl.SEG6_LOCAL_OIF] = true + case nl.SEG6_LOCAL_BPF: + var bpfAttrs []syscall.NetlinkRouteAttr + bpfAttrs, err = nl.ParseRouteAttr(attr.Value) + bpfobj := bpfObj{} + for _, bpfAttr := range bpfAttrs { + switch bpfAttr.Attr.Type { + case nl.LWT_BPF_PROG_FD: + bpfobj.progFd = int(native.Uint32(bpfAttr.Value)) + case nl.LWT_BPF_PROG_NAME: + bpfobj.progName = string(bpfAttr.Value) + default: + err = fmt.Errorf("seg6local bpf decode: unknown attribute: Type %d", bpfAttr.Attr) + } + } + e.bpf = bpfobj + e.Flags[nl.SEG6_LOCAL_BPF] = true } } return err @@ -368,6 +393,16 @@ func (e *SEG6LocalEncap) Encode() ([]byte, error) { native.PutUint32(attr[4:], uint32(e.Oif)) res = append(res, attr...) } + if e.Flags[nl.SEG6_LOCAL_BPF] { + attr := nl.NewRtAttr(nl.SEG6_LOCAL_BPF, []byte{}) + if e.bpf.progFd != 0 { + attr.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(e.bpf.progFd))) + } + if e.bpf.progName != "" { + attr.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(e.bpf.progName)) + } + res = append(res, attr.Serialize()...) + } return res, err } func (e *SEG6LocalEncap) String() string { @@ -401,12 +436,15 @@ func (e *SEG6LocalEncap) String() string { } if e.Flags[nl.SEG6_LOCAL_SRH] { segs := make([]string, 0, len(e.Segments)) - //append segment backwards (from n to 0) since seg#0 is the last segment. + // append segment backwards (from n to 0) since seg#0 is the last segment. for i := len(e.Segments); i > 0; i-- { segs = append(segs, e.Segments[i-1].String()) } strs = append(strs, fmt.Sprintf("segs %d [ %s ]", len(e.Segments), strings.Join(segs, " "))) } + if e.Flags[nl.SEG6_LOCAL_BPF] { + strs = append(strs, fmt.Sprintf("bpf %s[%d]", e.bpf.progName, e.bpf.progFd)) + } return strings.Join(strs, " ") } func (e *SEG6LocalEncap) Equal(x Encap) bool { @@ -438,7 +476,7 @@ func (e *SEG6LocalEncap) Equal(x Encap) bool { if !e.InAddr.Equal(o.InAddr) || !e.In6Addr.Equal(o.In6Addr) { return false } - if e.Action != o.Action || e.Table != o.Table || e.Iif != o.Iif || e.Oif != o.Oif { + if e.Action != o.Action || e.Table != o.Table || e.Iif != o.Iif || e.Oif != o.Oif || e.bpf != o.bpf { return false } return true @@ -590,6 +628,109 @@ func (e *BpfEncap) Equal(x Encap) bool { return true } +// IP6tnlEncap definition +type IP6tnlEncap struct { + ID uint64 + Dst net.IP + Src net.IP + Hoplimit uint8 + TC uint8 + Flags uint16 +} + +func (e *IP6tnlEncap) Type() int { + return nl.LWTUNNEL_ENCAP_IP6 +} + +func (e *IP6tnlEncap) Decode(buf []byte) error { + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return err + } + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.LWTUNNEL_IP6_ID: + e.ID = uint64(native.Uint64(attr.Value[0:4])) + case nl.LWTUNNEL_IP6_DST: + e.Dst = net.IP(attr.Value[:]) + case nl.LWTUNNEL_IP6_SRC: + e.Src = net.IP(attr.Value[:]) + case nl.LWTUNNEL_IP6_HOPLIMIT: + e.Hoplimit = attr.Value[0] + case nl.LWTUNNEL_IP6_TC: + // e.TC = attr.Value[0] + err = fmt.Errorf("decoding TC in IP6tnlEncap is not supported") + case nl.LWTUNNEL_IP6_FLAGS: + // e.Flags = uint16(native.Uint16(attr.Value[0:2])) + err = fmt.Errorf("decoding FLAG in IP6tnlEncap is not supported") + case nl.LWTUNNEL_IP6_PAD: + err = fmt.Errorf("decoding PAD in IP6tnlEncap is not supported") + case nl.LWTUNNEL_IP6_OPTS: + err = fmt.Errorf("decoding OPTS in IP6tnlEncap is not supported") + } + } + return err +} + +func (e *IP6tnlEncap) Encode() ([]byte, error) { + + final := []byte{} + + resID := make([]byte, 12) + native.PutUint16(resID, 12) // 2+2+8 + native.PutUint16(resID[2:], nl.LWTUNNEL_IP6_ID) + native.PutUint64(resID[4:], 0) + final = append(final, resID...) + + resDst := make([]byte, 4) + native.PutUint16(resDst, 20) // 2+2+16 + native.PutUint16(resDst[2:], nl.LWTUNNEL_IP6_DST) + resDst = append(resDst, e.Dst...) + final = append(final, resDst...) + + resSrc := make([]byte, 4) + native.PutUint16(resSrc, 20) + native.PutUint16(resSrc[2:], nl.LWTUNNEL_IP6_SRC) + resSrc = append(resSrc, e.Src...) + final = append(final, resSrc...) + + // resTc := make([]byte, 5) + // native.PutUint16(resTc, 5) + // native.PutUint16(resTc[2:], nl.LWTUNNEL_IP6_TC) + // resTc[4] = e.TC + // final = append(final,resTc...) + + resHops := make([]byte, 5) + native.PutUint16(resHops, 5) + native.PutUint16(resHops[2:], nl.LWTUNNEL_IP6_HOPLIMIT) + resHops[4] = e.Hoplimit + final = append(final, resHops...) + + // resFlags := make([]byte, 6) + // native.PutUint16(resFlags, 6) + // native.PutUint16(resFlags[2:], nl.LWTUNNEL_IP6_FLAGS) + // native.PutUint16(resFlags[4:], e.Flags) + // final = append(final,resFlags...) + + return final, nil +} + +func (e *IP6tnlEncap) String() string { + return fmt.Sprintf("id %d src %s dst %s hoplimit %d tc %d flags 0x%.4x", e.ID, e.Src, e.Dst, e.Hoplimit, e.TC, e.Flags) +} + +func (e *IP6tnlEncap) Equal(x Encap) bool { + o, ok := x.(*IP6tnlEncap) + if !ok { + return false + } + + if e.ID != o.ID || e.Flags != o.Flags || e.Hoplimit != o.Hoplimit || e.Src.Equal(o.Src) || e.Dst.Equal(o.Dst) || e.TC != o.TC { + return false + } + return true +} + type Via struct { AddrFamily int Addr net.IP @@ -656,7 +797,8 @@ func RouteAdd(route *Route) error { func (h *Handle) RouteAdd(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_EXCL | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteAppend will append a route to the system. @@ -670,7 +812,8 @@ func RouteAppend(route *Route) error { func (h *Handle) RouteAppend(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_APPEND | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteAddEcmp will add a route to the system. @@ -682,7 +825,23 @@ func RouteAddEcmp(route *Route) error { func (h *Handle) RouteAddEcmp(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err +} + +// RouteChange will change an existing route in the system. +// Equivalent to: `ip route change $route` +func RouteChange(route *Route) error { + return pkgHandle.RouteChange(route) +} + +// RouteChange will change an existing route in the system. +// Equivalent to: `ip route change $route` +func (h *Handle) RouteChange(route *Route) error { + flags := unix.NLM_F_REPLACE | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteReplace will add a route to the system. @@ -696,7 +855,8 @@ func RouteReplace(route *Route) error { func (h *Handle) RouteReplace(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_REPLACE | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteDel will delete a route from the system. @@ -709,12 +869,27 @@ func RouteDel(route *Route) error { // Equivalent to: `ip route del $route` func (h *Handle) RouteDel(route *Route) error { req := h.newNetlinkRequest(unix.RTM_DELROUTE, unix.NLM_F_ACK) - return h.routeHandle(route, req, nl.NewRtDelMsg()) + _, err := h.routeHandle(route, req, nl.NewRtDelMsg()) + return err } -func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { - if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil { - return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil") +func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) ([][]byte, error) { + if err := h.prepareRouteReq(route, req, msg); err != nil { + return nil, err + } + return req.Execute(unix.NETLINK_ROUTE, 0) +} + +func (h *Handle) routeHandleIter(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg, f func(msg []byte) bool) error { + if err := h.prepareRouteReq(route, req, msg); err != nil { + return err + } + return req.ExecuteIter(unix.NETLINK_ROUTE, 0, f) +} + +func (h *Handle) prepareRouteReq(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { + if req.NlMsghdr.Type != unix.RTM_GETROUTE && (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil { + return fmt.Errorf("either Dst.IP, Src.IP or Gw must be set") } family := -1 @@ -968,19 +1143,21 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg msg.Flags = uint32(route.Flags) msg.Scope = uint8(route.Scope) - msg.Family = uint8(family) + // only overwrite family if it was not set in msg + if msg.Family == 0 { + msg.Family = uint8(family) + } req.AddData(msg) for _, attr := range rtAttrs { req.AddData(attr) } - b := make([]byte, 4) - native.PutUint32(b, uint32(route.LinkIndex)) - - req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) - - _, err := req.Execute(unix.NETLINK_ROUTE, 0) - return err + if (req.NlMsghdr.Type != unix.RTM_GETROUTE) || (req.NlMsghdr.Type == unix.RTM_GETROUTE && route.LinkIndex > 0) { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.LinkIndex)) + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + return nil } // RouteList gets a list of routes in the system. @@ -994,13 +1171,13 @@ func RouteList(link Link, family int) ([]Route, error) { // Equivalent to: `ip route show`. // The list can be filtered by link and ip family. func (h *Handle) RouteList(link Link, family int) ([]Route, error) { - var routeFilter *Route + routeFilter := &Route{} if link != nil { - routeFilter = &Route{ - LinkIndex: link.Attrs().Index, - } + routeFilter.LinkIndex = link.Attrs().Index + + return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF) } - return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF) + return h.RouteListFiltered(family, routeFilter, 0) } // RouteListFiltered gets a list of routes in the system filtered with specified rules. @@ -1012,68 +1189,94 @@ func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, e // RouteListFiltered gets a list of routes in the system filtered with specified rules. // All rules must be defined in RouteFilter struct func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { - req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP) - rtmsg := nl.NewRtMsg() - rtmsg.Family = uint8(family) - req.AddData(rtmsg) - - msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) + var res []Route + err := h.RouteListFilteredIter(family, filter, filterMask, func(route Route) (cont bool) { + res = append(res, route) + return true + }) if err != nil { return nil, err } + return res, nil +} - var res []Route - for _, m := range msgs { +// RouteListFilteredIter passes each route that matches the filter to the given iterator func. Iteration continues +// until all routes are loaded or the func returns false. +func RouteListFilteredIter(family int, filter *Route, filterMask uint64, f func(Route) (cont bool)) error { + return pkgHandle.RouteListFilteredIter(family, filter, filterMask, f) +} + +func (h *Handle) RouteListFilteredIter(family int, filter *Route, filterMask uint64, f func(Route) (cont bool)) error { + req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP) + rtmsg := &nl.RtMsg{} + rtmsg.Family = uint8(family) + + var parseErr error + err := h.routeHandleIter(filter, req, rtmsg, func(m []byte) bool { msg := nl.DeserializeRtMsg(m) + if family != FAMILY_ALL && msg.Family != uint8(family) { + // Ignore routes not matching requested family + return true + } if msg.Flags&unix.RTM_F_CLONED != 0 { // Ignore cloned routes - continue + return true } if msg.Table != unix.RT_TABLE_MAIN { - if filter == nil || filter != nil && filterMask&RT_FILTER_TABLE == 0 { + if filter == nil || filterMask&RT_FILTER_TABLE == 0 { // Ignore non-main tables - continue + return true } } route, err := deserializeRoute(m) if err != nil { - return nil, err + parseErr = err + return false } if filter != nil { switch { case filterMask&RT_FILTER_TABLE != 0 && filter.Table != unix.RT_TABLE_UNSPEC && route.Table != filter.Table: - continue + return true case filterMask&RT_FILTER_PROTOCOL != 0 && route.Protocol != filter.Protocol: - continue + return true case filterMask&RT_FILTER_SCOPE != 0 && route.Scope != filter.Scope: - continue + return true case filterMask&RT_FILTER_TYPE != 0 && route.Type != filter.Type: - continue + return true case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos: - continue + return true case filterMask&RT_FILTER_REALM != 0 && route.Realm != filter.Realm: - continue + return true case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex: - continue + return true case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex: - continue + return true case filterMask&RT_FILTER_GW != 0 && !route.Gw.Equal(filter.Gw): - continue + return true case filterMask&RT_FILTER_SRC != 0 && !route.Src.Equal(filter.Src): - continue + return true case filterMask&RT_FILTER_DST != 0: if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) { + if filter.Dst == nil { + filter.Dst = genZeroIPNet(family) + } if !ipNetEqual(route.Dst, filter.Dst) { - continue + return true } } case filterMask&RT_FILTER_HOPLIMIT != 0 && route.Hoplimit != filter.Hoplimit: - continue + return true } } - res = append(res, route) + return f(route) + }) + if err != nil { + return err } - return res, nil + if parseErr != nil { + return parseErr + } + return nil } // deserializeRoute decodes a binary netlink message into a Route struct @@ -1257,6 +1460,27 @@ func deserializeRoute(m []byte) (Route, error) { } } + // Same logic to generate "default" dst with iproute2 implementation + if route.Dst == nil { + var addLen int + var ip net.IP + switch msg.Family { + case FAMILY_V4: + addLen = net.IPv4len + ip = net.IPv4zero + case FAMILY_V6: + addLen = net.IPv6len + ip = net.IPv6zero + } + + if addLen != 0 { + route.Dst = &net.IPNet{ + IP: ip, + Mask: net.CIDRMask(int(msg.Dst_len), 8*addLen), + } + } + } + if len(encap.Value) != 0 && len(encapType.Value) != 0 { typ := int(native.Uint16(encapType.Value[0:2])) var e Encap @@ -1291,10 +1515,14 @@ func deserializeRoute(m []byte) (Route, error) { // RouteGetOptions contains a set of options to use with // RouteGetWithOptions type RouteGetOptions struct { - Iif string - Oif string - VrfName string - SrcAddr net.IP + Iif string + IifIndex int + Oif string + VrfName string + SrcAddr net.IP + UID *uint32 + Mark uint32 + FIBMatch bool } // RouteGetWithOptions gets a route to a specific destination from the host system. @@ -1330,6 +1558,9 @@ func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOption msg.Src_len = bitlen } msg.Flags = unix.RTM_F_LOOKUP_TABLE + if options != nil && options.FIBMatch { + msg.Flags |= unix.RTM_F_FIB_MATCH + } req.AddData(msg) rtaDst := nl.NewRtAttr(unix.RTA_DST, destinationData) @@ -1337,7 +1568,7 @@ func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOption if options != nil { if options.VrfName != "" { - link, err := LinkByName(options.VrfName) + link, err := h.LinkByName(options.VrfName) if err != nil { return nil, err } @@ -1347,20 +1578,27 @@ func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOption req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) } + iifIndex := 0 if len(options.Iif) > 0 { - link, err := LinkByName(options.Iif) + link, err := h.LinkByName(options.Iif) if err != nil { return nil, err } + iifIndex = link.Attrs().Index + } else if options.IifIndex > 0 { + iifIndex = options.IifIndex + } + + if iifIndex > 0 { b := make([]byte, 4) - native.PutUint32(b, uint32(link.Attrs().Index)) + native.PutUint32(b, uint32(iifIndex)) req.AddData(nl.NewRtAttr(unix.RTA_IIF, b)) } if len(options.Oif) > 0 { - link, err := LinkByName(options.Oif) + link, err := h.LinkByName(options.Oif) if err != nil { return nil, err } @@ -1381,6 +1619,21 @@ func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOption req.AddData(nl.NewRtAttr(unix.RTA_SRC, srcAddr)) } + + if options.UID != nil { + uid := *options.UID + b := make([]byte, 4) + native.PutUint32(b, uid) + + req.AddData(nl.NewRtAttr(unix.RTA_UID, b)) + } + + if options.Mark > 0 { + b := make([]byte, 4) + native.PutUint32(b, options.Mark) + + req.AddData(nl.NewRtAttr(unix.RTA_MARK, b)) + } } msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) @@ -1408,21 +1661,24 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { // RouteSubscribe takes a chan down which notifications will be sent // when routes are added or deleted. Close the 'done' chan to stop subscription. func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) + return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // RouteSubscribeAt works like RouteSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(ns, netns.None(), ch, done, nil, false) + return routeSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // RouteSubscribeOptions contains a set of options to use with // RouteSubscribeWithOptions. type RouteSubscribeOptions struct { - Namespace *netns.NsHandle - ErrorCallback func(error) - ListExisting bool + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // RouteSubscribeWithOptions work like RouteSubscribe but enable to @@ -1433,14 +1689,27 @@ func RouteSubscribeWithOptions(ch chan<- RouteUpdate, done <-chan struct{}, opti none := netns.None() options.Namespace = &none } - return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) + return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { +func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_ROUTE, unix.RTNLGRP_IPV6_ROUTE) if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done @@ -1495,7 +1764,11 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < } continue } - ch <- RouteUpdate{Type: m.Header.Type, Route: route} + ch <- RouteUpdate{ + Type: m.Header.Type, + NlFlags: m.Header.Flags & (unix.NLM_F_REPLACE | unix.NLM_F_EXCL | unix.NLM_F_CREATE | unix.NLM_F_APPEND), + Route: route, + } } } }() @@ -1523,7 +1796,7 @@ func (p RouteProtocol) String() string { return "gated" case unix.RTPROT_ISIS: return "isis" - //case unix.RTPROT_KEEPALIVED: + // case unix.RTPROT_KEEPALIVED: // return "keepalived" case unix.RTPROT_KERNEL: return "kernel" @@ -1553,3 +1826,24 @@ func (p RouteProtocol) String() string { return strconv.Itoa(int(p)) } } + +// genZeroIPNet returns 0.0.0.0/0 or ::/0 for IPv4 or IPv6, otherwise nil +func genZeroIPNet(family int) *net.IPNet { + var addLen int + var ip net.IP + switch family { + case FAMILY_V4: + addLen = net.IPv4len + ip = net.IPv4zero + case FAMILY_V6: + addLen = net.IPv6len + ip = net.IPv6zero + } + if addLen != 0 { + return &net.IPNet{ + IP: ip, + Mask: net.CIDRMask(0, 8*addLen), + } + } + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/rule.go b/vendor/github.com/vishvananda/netlink/rule.go index 53cd3d4f..cc739458 100644 --- a/vendor/github.com/vishvananda/netlink/rule.go +++ b/vendor/github.com/vishvananda/netlink/rule.go @@ -10,8 +10,8 @@ type Rule struct { Priority int Family int Table int - Mark int - Mask int + Mark uint32 + Mask *uint32 Tos uint TunID uint Goto int @@ -26,6 +26,8 @@ type Rule struct { Dport *RulePortRange Sport *RulePortRange IPProto int + UIDRange *RuleUIDRange + Protocol uint8 } func (r Rule) String() string { @@ -49,8 +51,8 @@ func NewRule() *Rule { SuppressIfgroup: -1, SuppressPrefixlen: -1, Priority: -1, - Mark: -1, - Mask: -1, + Mark: 0, + Mask: nil, Goto: -1, Flow: -1, } @@ -66,3 +68,14 @@ type RulePortRange struct { Start uint16 End uint16 } + +// NewRuleUIDRange creates rule uid range. +func NewRuleUIDRange(start, end uint32) *RuleUIDRange { + return &RuleUIDRange{Start: start, End: end} +} + +// RuleUIDRange represents rule uid range. +type RuleUIDRange struct { + Start uint32 + End uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/rule_linux.go b/vendor/github.com/vishvananda/netlink/rule_linux.go index 3ae21388..18c03a3e 100644 --- a/vendor/github.com/vishvananda/netlink/rule_linux.go +++ b/vendor/github.com/vishvananda/netlink/rule_linux.go @@ -102,14 +102,14 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { native.PutUint32(b, uint32(rule.Priority)) req.AddData(nl.NewRtAttr(nl.FRA_PRIORITY, b)) } - if rule.Mark >= 0 { + if rule.Mark != 0 || rule.Mask != nil { b := make([]byte, 4) - native.PutUint32(b, uint32(rule.Mark)) + native.PutUint32(b, rule.Mark) req.AddData(nl.NewRtAttr(nl.FRA_FWMARK, b)) } - if rule.Mask >= 0 { + if rule.Mask != nil { b := make([]byte, 4) - native.PutUint32(b, uint32(rule.Mask)) + native.PutUint32(b, *rule.Mask) req.AddData(nl.NewRtAttr(nl.FRA_FWMASK, b)) } if rule.Flow >= 0 { @@ -168,6 +168,15 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { req.AddData(nl.NewRtAttr(nl.FRA_SPORT_RANGE, b)) } + if rule.UIDRange != nil { + b := rule.UIDRange.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_UID_RANGE, b)) + } + + if rule.Protocol > 0 { + req.AddData(nl.NewRtAttr(nl.FRA_PROTOCOL, nl.Uint8Attr(rule.Protocol))) + } + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -212,8 +221,10 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ( } rule := NewRule() + rule.Priority = 0 // The default priority from kernel rule.Invert = msg.Flags&FibRuleInvert > 0 + rule.Family = int(msg.Family) rule.Tos = uint(msg.Tos) for j := range attrs { @@ -231,9 +242,10 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ( Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attrs[j].Value)), } case nl.FRA_FWMARK: - rule.Mark = int(native.Uint32(attrs[j].Value[0:4])) + rule.Mark = native.Uint32(attrs[j].Value[0:4]) case nl.FRA_FWMASK: - rule.Mask = int(native.Uint32(attrs[j].Value[0:4])) + mask := native.Uint32(attrs[j].Value[0:4]) + rule.Mask = &mask case nl.FRA_TUN_ID: rule.TunID = uint(native.Uint64(attrs[j].Value[0:8])) case nl.FRA_IIFNAME: @@ -262,6 +274,10 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ( rule.Dport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) case nl.FRA_SPORT_RANGE: rule.Sport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + case nl.FRA_UID_RANGE: + rule.UIDRange = NewRuleUIDRange(native.Uint32(attrs[j].Value[0:4]), native.Uint32(attrs[j].Value[4:8])) + case nl.FRA_PROTOCOL: + rule.Protocol = uint8(attrs[j].Value[0]) } } @@ -282,7 +298,7 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ( continue case filterMask&RT_FILTER_MARK != 0 && rule.Mark != filter.Mark: continue - case filterMask&RT_FILTER_MASK != 0 && rule.Mask != filter.Mask: + case filterMask&RT_FILTER_MASK != 0 && !ptrEqual(rule.Mask, filter.Mask): continue } } @@ -299,3 +315,20 @@ func (pr *RulePortRange) toRtAttrData() []byte { native.PutUint16(b[1], pr.End) return bytes.Join(b, []byte{}) } + +func (pr *RuleUIDRange) toRtAttrData() []byte { + b := [][]byte{make([]byte, 4), make([]byte, 4)} + native.PutUint32(b[0], pr.Start) + native.PutUint32(b[1], pr.End) + return bytes.Join(b, []byte{}) +} + +func ptrEqual(a, b *uint32) bool { + if a == b { + return true + } + if (a == nil) || (b == nil) { + return false + } + return *a == *b +} diff --git a/vendor/github.com/vishvananda/netlink/socket.go b/vendor/github.com/vishvananda/netlink/socket.go index 41aa7262..e65efb13 100644 --- a/vendor/github.com/vishvananda/netlink/socket.go +++ b/vendor/github.com/vishvananda/netlink/socket.go @@ -25,3 +25,80 @@ type Socket struct { UID uint32 INode uint32 } + +// UnixSocket represents a netlink unix socket. +type UnixSocket struct { + Type uint8 + Family uint8 + State uint8 + pad uint8 + INode uint32 + Cookie [2]uint32 +} + +// XDPSocket represents an XDP socket (and the common diagnosis part in +// particular). Please note that in contrast to [UnixSocket] the XDPSocket type +// does not feature “State” information. +type XDPSocket struct { + // xdp_diag_msg + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21 + Family uint8 + Type uint8 + pad uint16 + Ino uint32 + Cookie [2]uint32 +} + +type XDPInfo struct { + // XDP_DIAG_INFO/xdp_diag_info + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L51 + Ifindex uint32 + QueueID uint32 + + // XDP_DIAG_UID + UID uint32 + + // XDP_RX_RING + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L56 + RxRingEntries uint32 + TxRingEntries uint32 + UmemFillRingEntries uint32 + UmemCompletionRingEntries uint32 + + // XDR_DIAG_UMEM + Umem *XDPDiagUmem + + // XDR_DIAG_STATS + Stats *XDPDiagStats +} + +const ( + XDP_DU_F_ZEROCOPY = 1 << iota +) + +// XDPDiagUmem describes the umem attached to an XDP socket. +// +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L62 +type XDPDiagUmem struct { + Size uint64 + ID uint32 + NumPages uint32 + ChunkSize uint32 + Headroom uint32 + Ifindex uint32 + QueueID uint32 + Flags uint32 + Refs uint32 +} + +// XDPDiagStats contains ring statistics for an XDP socket. +// +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L74 +type XDPDiagStats struct { + RxDropped uint64 + RxInvalid uint64 + RxFull uint64 + FillRingEmpty uint64 + TxInvalid uint64 + TxRingEmpty uint64 +} diff --git a/vendor/github.com/vishvananda/netlink/socket_linux.go b/vendor/github.com/vishvananda/netlink/socket_linux.go index b881fe49..4eb4aeaf 100644 --- a/vendor/github.com/vishvananda/netlink/socket_linux.go +++ b/vendor/github.com/vishvananda/netlink/socket_linux.go @@ -11,9 +11,11 @@ import ( ) const ( - sizeofSocketID = 0x30 - sizeofSocketRequest = sizeofSocketID + 0x8 - sizeofSocket = sizeofSocketID + 0x18 + sizeofSocketID = 0x30 + sizeofSocketRequest = sizeofSocketID + 0x8 + sizeofSocket = sizeofSocketID + 0x18 + sizeofUnixSocketRequest = 0x18 // 24 byte + sizeofUnixSocket = 0x10 // 16 byte ) type socketRequest struct { @@ -54,10 +56,8 @@ func (r *socketRequest) Serialize() []byte { copy(b.Next(16), r.ID.Source) copy(b.Next(16), r.ID.Destination) } else { - copy(b.Next(4), r.ID.Source.To4()) - b.Next(12) - copy(b.Next(4), r.ID.Destination.To4()) - b.Next(12) + copy(b.Next(16), r.ID.Source.To4()) + copy(b.Next(16), r.ID.Destination.To4()) } native.PutUint32(b.Next(4), r.ID.Interface) native.PutUint32(b.Next(4), r.ID.Cookie[0]) @@ -67,6 +67,32 @@ func (r *socketRequest) Serialize() []byte { func (r *socketRequest) Len() int { return sizeofSocketRequest } +// According to linux/include/uapi/linux/unix_diag.h +type unixSocketRequest struct { + Family uint8 + Protocol uint8 + pad uint16 + States uint32 + INode uint32 + Show uint32 + Cookie [2]uint32 +} + +func (r *unixSocketRequest) Serialize() []byte { + b := writeBuffer{Bytes: make([]byte, sizeofUnixSocketRequest)} + b.Write(r.Family) + b.Write(r.Protocol) + native.PutUint16(b.Next(2), r.pad) + native.PutUint32(b.Next(4), r.States) + native.PutUint32(b.Next(4), r.INode) + native.PutUint32(b.Next(4), r.Show) + native.PutUint32(b.Next(4), r.Cookie[0]) + native.PutUint32(b.Next(4), r.Cookie[1]) + return b.Bytes +} + +func (r *unixSocketRequest) Len() int { return sizeofUnixSocketRequest } + type readBuffer struct { Bytes []byte pos int @@ -115,31 +141,126 @@ func (s *Socket) deserialize(b []byte) error { return nil } +func (u *UnixSocket) deserialize(b []byte) error { + if len(b) < sizeofUnixSocket { + return fmt.Errorf("unix diag data short read (%d); want %d", len(b), sizeofUnixSocket) + } + rb := readBuffer{Bytes: b} + u.Type = rb.Read() + u.Family = rb.Read() + u.State = rb.Read() + u.pad = rb.Read() + u.INode = native.Uint32(rb.Next(4)) + u.Cookie[0] = native.Uint32(rb.Next(4)) + u.Cookie[1] = native.Uint32(rb.Next(4)) + return nil +} + +// SocketGet returns the Socket identified by its local and remote addresses. +func (h *Handle) SocketGet(local, remote net.Addr) (*Socket, error) { + var protocol uint8 + var localIP, remoteIP net.IP + var localPort, remotePort uint16 + switch l := local.(type) { + case *net.TCPAddr: + r, ok := remote.(*net.TCPAddr) + if !ok { + return nil, ErrNotImplemented + } + localIP = l.IP + localPort = uint16(l.Port) + remoteIP = r.IP + remotePort = uint16(r.Port) + protocol = unix.IPPROTO_TCP + case *net.UDPAddr: + r, ok := remote.(*net.UDPAddr) + if !ok { + return nil, ErrNotImplemented + } + localIP = l.IP + localPort = uint16(l.Port) + remoteIP = r.IP + remotePort = uint16(r.Port) + protocol = unix.IPPROTO_UDP + default: + return nil, ErrNotImplemented + } + + var family uint8 + if localIP.To4() != nil && remoteIP.To4() != nil { + family = unix.AF_INET + } + + if family == 0 && localIP.To16() != nil && remoteIP.To16() != nil { + family = unix.AF_INET6 + } + + if family == 0 { + return nil, ErrNotImplemented + } + + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: protocol, + States: 0xffffffff, + ID: SocketID{ + SourcePort: localPort, + DestinationPort: remotePort, + Source: localIP, + Destination: remoteIP, + Cookie: [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE}, + }, + }) + + msgs, err := req.Execute(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY) + if err != nil { + return nil, err + } + if len(msgs) == 0 { + return nil, errors.New("no message nor error from netlink") + } + if len(msgs) > 2 { + return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs)) + } + + sock := &Socket{} + if err := sock.deserialize(msgs[0]); err != nil { + return nil, err + } + return sock, nil +} + // SocketGet returns the Socket identified by its local and remote addresses. func SocketGet(local, remote net.Addr) (*Socket, error) { + return pkgHandle.SocketGet(local, remote) +} + +// SocketDestroy kills the Socket identified by its local and remote addresses. +func (h *Handle) SocketDestroy(local, remote net.Addr) error { localTCP, ok := local.(*net.TCPAddr) if !ok { - return nil, ErrNotImplemented + return ErrNotImplemented } remoteTCP, ok := remote.(*net.TCPAddr) if !ok { - return nil, ErrNotImplemented + return ErrNotImplemented } localIP := localTCP.IP.To4() if localIP == nil { - return nil, ErrNotImplemented + return ErrNotImplemented } remoteIP := remoteTCP.IP.To4() if remoteIP == nil { - return nil, ErrNotImplemented + return ErrNotImplemented } s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) if err != nil { - return nil, err + return err } defer s.Close() - req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, 0) + req := h.newNetlinkRequest(nl.SOCK_DESTROY, unix.NLM_F_ACK) req.AddData(&socketRequest{ Family: unix.AF_INET, Protocol: unix.IPPROTO_TCP, @@ -151,64 +272,81 @@ func SocketGet(local, remote net.Addr) (*Socket, error) { Cookie: [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE}, }, }) - s.Send(req) - msgs, from, err := s.Receive() - if err != nil { - return nil, err - } - if from.Pid != nl.PidKernel { - return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) - } - if len(msgs) == 0 { - return nil, errors.New("no message nor error from netlink") - } - if len(msgs) > 2 { - return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs)) - } - sock := &Socket{} - if err := sock.deserialize(msgs[0].Data); err != nil { - return nil, err - } - return sock, nil + + _, err = req.Execute(unix.NETLINK_INET_DIAG, 0) + return err +} + +// SocketDestroy kills the Socket identified by its local and remote addresses. +func SocketDestroy(local, remote net.Addr) error { + return pkgHandle.SocketDestroy(local, remote) } // SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info. -func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { +func (h *Handle) SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_TCP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result var result []*InetDiagTCPInfoResp - err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error { + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { sockInfo := &Socket{} - if err := sockInfo.deserialize(m.Data); err != nil { - return err + if err = sockInfo.deserialize(msg); err != nil { + return false } - attrs, err := nl.ParseRouteAttr(m.Data[sizeofSocket:]) - if err != nil { - return err + var attrs []syscall.NetlinkRouteAttr + if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil { + return false } - res, err := attrsToInetDiagTCPInfoResp(attrs, sockInfo) - if err != nil { - return err + var res *InetDiagTCPInfoResp + if res, err = attrsToInetDiagTCPInfoResp(attrs, sockInfo); err != nil { + return false } result = append(result, res) - return nil + return true }) + if err != nil { return nil, err } return result, nil } +// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info. +func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { + return pkgHandle.SocketDiagTCPInfo(family) +} + // SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket. -func SocketDiagTCP(family uint8) ([]*Socket, error) { +func (h *Handle) SocketDiagTCP(family uint8) ([]*Socket, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_TCP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result var result []*Socket - err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error { + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { sockInfo := &Socket{} - if err := sockInfo.deserialize(m.Data); err != nil { - return err + if err = sockInfo.deserialize(msg); err != nil { + return false } result = append(result, sockInfo) - return nil + return true }) if err != nil { return nil, err @@ -216,76 +354,237 @@ func SocketDiagTCP(family uint8) ([]*Socket, error) { return result, nil } -// socketDiagTCPExecutor requests INET_DIAG_INFO for TCP protocol for specified family type. -func socketDiagTCPExecutor(family uint8, receiver func(syscall.NetlinkMessage) error) error { - s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) +// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket. +func SocketDiagTCP(family uint8) ([]*Socket, error) { + return pkgHandle.SocketDiagTCP(family) +} + +// SocketDiagUDPInfo requests INET_DIAG_INFO for UDP protocol for specified family type and return with extension info. +func (h *Handle) SocketDiagUDPInfo(family uint8) ([]*InetDiagUDPInfoResp, error) { + // Construct the request + var extensions uint8 + extensions = 1 << (INET_DIAG_VEGASINFO - 1) + extensions |= 1 << (INET_DIAG_INFO - 1) + extensions |= 1 << (INET_DIAG_MEMINFO - 1) + + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_UDP, + Ext: extensions, + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result + var result []*InetDiagUDPInfoResp + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + + var attrs []syscall.NetlinkRouteAttr + if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil { + return false + } + + var res *InetDiagUDPInfoResp + if res, err = attrsToInetDiagUDPInfoResp(attrs, sockInfo); err != nil { + return false + } + + result = append(result, res) + return true + }) if err != nil { - return err + return nil, err } - defer s.Close() + return result, nil +} - req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) +// SocketDiagUDPInfo requests INET_DIAG_INFO for UDP protocol for specified family type and return with extension info. +func SocketDiagUDPInfo(family uint8) ([]*InetDiagUDPInfoResp, error) { + return pkgHandle.SocketDiagUDPInfo(family) +} + +// SocketDiagUDP requests INET_DIAG_INFO for UDP protocol for specified family type and return related socket. +func (h *Handle) SocketDiagUDP(family uint8) ([]*Socket, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) req.AddData(&socketRequest{ Family: family, - Protocol: unix.IPPROTO_TCP, + Protocol: unix.IPPROTO_UDP, Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), - States: uint32(0xfff), // All TCP states + States: uint32(0xfff), // all states }) - s.Send(req) -loop: - for { - msgs, from, err := s.Receive() - if err != nil { - return err + // Do the query and parse the result + var result []*Socket + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false } - if from.Pid != nl.PidKernel { - return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + result = append(result, sockInfo) + return true + }) + if err != nil { + return nil, err + } + return result, nil +} + +// SocketDiagUDP requests INET_DIAG_INFO for UDP protocol for specified family type and return related socket. +func SocketDiagUDP(family uint8) ([]*Socket, error) { + return pkgHandle.SocketDiagUDP(family) +} + +// UnixSocketDiagInfo requests UNIX_DIAG_INFO for unix sockets and return with extension info. +func (h *Handle) UnixSocketDiagInfo() ([]*UnixDiagInfoResp, error) { + // Construct the request + var extensions uint8 + extensions = 1 << UNIX_DIAG_NAME + extensions |= 1 << UNIX_DIAG_PEER + extensions |= 1 << UNIX_DIAG_RQLEN + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&unixSocketRequest{ + Family: unix.AF_UNIX, + States: ^uint32(0), // all states + Show: uint32(extensions), + }) + + var result []*UnixDiagInfoResp + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &UnixSocket{} + if err = sockInfo.deserialize(msg); err != nil { + return false } - if len(msgs) == 0 { - return errors.New("no message nor error from netlink") + + // Diagnosis also delivers sockets with AF_INET family, filter those + if sockInfo.Family != unix.AF_UNIX { + return false } - for _, m := range msgs { - switch m.Header.Type { - case unix.NLMSG_DONE: - break loop - case unix.NLMSG_ERROR: - error := int32(native.Uint32(m.Data[0:4])) - return syscall.Errno(-error) - } - if err := receiver(m); err != nil { - return err - } + var attrs []syscall.NetlinkRouteAttr + if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil { + return false } + + var res *UnixDiagInfoResp + if res, err = attrsToUnixDiagInfoResp(attrs, sockInfo); err != nil { + return false + } + result = append(result, res) + return true + }) + if err != nil { + return nil, err } - return nil + return result, nil +} + +// UnixSocketDiagInfo requests UNIX_DIAG_INFO for unix sockets and return with extension info. +func UnixSocketDiagInfo() ([]*UnixDiagInfoResp, error) { + return pkgHandle.UnixSocketDiagInfo() +} + +// UnixSocketDiag requests UNIX_DIAG_INFO for unix sockets. +func (h *Handle) UnixSocketDiag() ([]*UnixSocket, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&unixSocketRequest{ + Family: unix.AF_UNIX, + States: ^uint32(0), // all states + }) + + var result []*UnixSocket + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &UnixSocket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + + // Diagnosis also delivers sockets with AF_INET family, filter those + if sockInfo.Family == unix.AF_UNIX { + result = append(result, sockInfo) + } + return true + }) + if err != nil { + return nil, err + } + return result, nil +} + +// UnixSocketDiag requests UNIX_DIAG_INFO for unix sockets. +func UnixSocketDiag() ([]*UnixSocket, error) { + return pkgHandle.UnixSocketDiag() } func attrsToInetDiagTCPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagTCPInfoResp, error) { - var tcpInfo *TCPInfo - var tcpBBRInfo *TCPBBRInfo + info := &InetDiagTCPInfoResp{ + InetDiagMsg: sockInfo, + } for _, a := range attrs { - if a.Attr.Type == INET_DIAG_INFO { - tcpInfo = &TCPInfo{} - if err := tcpInfo.deserialize(a.Value); err != nil { + switch a.Attr.Type { + case INET_DIAG_INFO: + info.TCPInfo = &TCPInfo{} + if err := info.TCPInfo.deserialize(a.Value); err != nil { + return nil, err + } + case INET_DIAG_BBRINFO: + info.TCPBBRInfo = &TCPBBRInfo{} + if err := info.TCPBBRInfo.deserialize(a.Value); err != nil { return nil, err } - continue } + } - if a.Attr.Type == INET_DIAG_BBRINFO { - tcpBBRInfo = &TCPBBRInfo{} - if err := tcpBBRInfo.deserialize(a.Value); err != nil { + return info, nil +} + +func attrsToInetDiagUDPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagUDPInfoResp, error) { + info := &InetDiagUDPInfoResp{ + InetDiagMsg: sockInfo, + } + for _, a := range attrs { + switch a.Attr.Type { + case INET_DIAG_MEMINFO: + info.Memory = &MemInfo{} + if err := info.Memory.deserialize(a.Value); err != nil { return nil, err } - continue } } - return &InetDiagTCPInfoResp{ - InetDiagMsg: sockInfo, - TCPInfo: tcpInfo, - TCPBBRInfo: tcpBBRInfo, - }, nil + return info, nil +} + +func attrsToUnixDiagInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *UnixSocket) (*UnixDiagInfoResp, error) { + info := &UnixDiagInfoResp{ + DiagMsg: sockInfo, + } + for _, a := range attrs { + switch a.Attr.Type { + case UNIX_DIAG_NAME: + name := string(a.Value[:a.Attr.Len]) + info.Name = &name + case UNIX_DIAG_PEER: + peer := native.Uint32(a.Value) + info.Peer = &peer + case UNIX_DIAG_RQLEN: + info.Queue = &QueueInfo{ + RQueue: native.Uint32(a.Value[:4]), + WQueue: native.Uint32(a.Value[4:]), + } + // default: + // fmt.Println("unknown unix attribute type", a.Attr.Type, "with data", a.Value) + } + } + + return info, nil } diff --git a/vendor/github.com/vishvananda/netlink/socket_xdp_linux.go b/vendor/github.com/vishvananda/netlink/socket_xdp_linux.go new file mode 100644 index 00000000..20c82f9c --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/socket_xdp_linux.go @@ -0,0 +1,195 @@ +package netlink + +import ( + "errors" + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +const ( + sizeofXDPSocketRequest = 1 + 1 + 2 + 4 + 4 + 2*4 + sizeofXDPSocket = 0x10 +) + +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L12 +type xdpSocketRequest struct { + Family uint8 + Protocol uint8 + pad uint16 + Ino uint32 + Show uint32 + Cookie [2]uint32 +} + +func (r *xdpSocketRequest) Serialize() []byte { + b := writeBuffer{Bytes: make([]byte, sizeofSocketRequest)} + b.Write(r.Family) + b.Write(r.Protocol) + native.PutUint16(b.Next(2), r.pad) + native.PutUint32(b.Next(4), r.Ino) + native.PutUint32(b.Next(4), r.Show) + native.PutUint32(b.Next(4), r.Cookie[0]) + native.PutUint32(b.Next(4), r.Cookie[1]) + return b.Bytes +} + +func (r *xdpSocketRequest) Len() int { return sizeofXDPSocketRequest } + +func (s *XDPSocket) deserialize(b []byte) error { + if len(b) < sizeofXDPSocket { + return fmt.Errorf("XDP socket data short read (%d); want %d", len(b), sizeofXDPSocket) + } + rb := readBuffer{Bytes: b} + s.Family = rb.Read() + s.Type = rb.Read() + s.pad = native.Uint16(rb.Next(2)) + s.Ino = native.Uint32(rb.Next(4)) + s.Cookie[0] = native.Uint32(rb.Next(4)) + s.Cookie[1] = native.Uint32(rb.Next(4)) + return nil +} + +// XDPSocketGet returns the XDP socket identified by its inode number and/or +// socket cookie. Specify the cookie as SOCK_ANY_COOKIE if +func SocketXDPGetInfo(ino uint32, cookie uint64) (*XDPDiagInfoResp, error) { + // We have a problem here: dumping AF_XDP sockets currently does not support + // filtering. We thus need to dump all XSKs and then only filter afterwards + // :( + xsks, err := SocketDiagXDP() + if err != nil { + return nil, err + } + checkCookie := cookie != SOCK_ANY_COOKIE && cookie != 0 + crumblingCookie := [2]uint32{uint32(cookie), uint32(cookie >> 32)} + checkIno := ino != 0 + var xskinfo *XDPDiagInfoResp + for _, xsk := range xsks { + if checkIno && xsk.XDPDiagMsg.Ino != ino { + continue + } + if checkCookie && xsk.XDPDiagMsg.Cookie != crumblingCookie { + continue + } + if xskinfo != nil { + return nil, errors.New("multiple matching XDP sockets") + } + xskinfo = xsk + } + if xskinfo == nil { + return nil, errors.New("no matching XDP socket") + } + return xskinfo, nil +} + +// SocketDiagXDP requests XDP_DIAG_INFO for XDP family sockets. +func SocketDiagXDP() ([]*XDPDiagInfoResp, error) { + var result []*XDPDiagInfoResp + err := socketDiagXDPExecutor(func(m syscall.NetlinkMessage) error { + sockInfo := &XDPSocket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + attrs, err := nl.ParseRouteAttr(m.Data[sizeofXDPSocket:]) + if err != nil { + return err + } + + res, err := attrsToXDPDiagInfoResp(attrs, sockInfo) + if err != nil { + return err + } + + result = append(result, res) + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} + +// socketDiagXDPExecutor requests XDP_DIAG_INFO for XDP family sockets. +func socketDiagXDPExecutor(receiver func(syscall.NetlinkMessage) error) error { + s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) + if err != nil { + return err + } + defer s.Close() + + req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&xdpSocketRequest{ + Family: unix.AF_XDP, + Show: XDP_SHOW_INFO | XDP_SHOW_RING_CFG | XDP_SHOW_UMEM | XDP_SHOW_STATS, + }) + if err := s.Send(req); err != nil { + return err + } + +loop: + for { + msgs, from, err := s.Receive() + if err != nil { + return err + } + if from.Pid != nl.PidKernel { + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + } + if len(msgs) == 0 { + return errors.New("no message nor error from netlink") + } + + for _, m := range msgs { + switch m.Header.Type { + case unix.NLMSG_DONE: + break loop + case unix.NLMSG_ERROR: + error := int32(native.Uint32(m.Data[0:4])) + return syscall.Errno(-error) + } + if err := receiver(m); err != nil { + return err + } + } + } + return nil +} + +func attrsToXDPDiagInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *XDPSocket) (*XDPDiagInfoResp, error) { + resp := &XDPDiagInfoResp{ + XDPDiagMsg: sockInfo, + XDPInfo: &XDPInfo{}, + } + for _, a := range attrs { + switch a.Attr.Type { + case XDP_DIAG_INFO: + resp.XDPInfo.Ifindex = native.Uint32(a.Value[0:4]) + resp.XDPInfo.QueueID = native.Uint32(a.Value[4:8]) + case XDP_DIAG_UID: + resp.XDPInfo.UID = native.Uint32(a.Value[0:4]) + case XDP_DIAG_RX_RING: + resp.XDPInfo.RxRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_TX_RING: + resp.XDPInfo.TxRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM_FILL_RING: + resp.XDPInfo.UmemFillRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM_COMPLETION_RING: + resp.XDPInfo.UmemCompletionRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM: + umem := &XDPDiagUmem{} + if err := umem.deserialize(a.Value); err != nil { + return nil, err + } + resp.XDPInfo.Umem = umem + case XDP_DIAG_STATS: + stats := &XDPDiagStats{} + if err := stats.deserialize(a.Value); err != nil { + return nil, err + } + resp.XDPInfo.Stats = stats + } + } + return resp, nil +} diff --git a/vendor/github.com/vishvananda/netlink/tcp.go b/vendor/github.com/vishvananda/netlink/tcp.go index 23ca014d..43f80a0f 100644 --- a/vendor/github.com/vishvananda/netlink/tcp.go +++ b/vendor/github.com/vishvananda/netlink/tcp.go @@ -82,3 +82,11 @@ type TCPBBRInfo struct { BBRPacingGain uint32 BBRCwndGain uint32 } + +// According to https://man7.org/linux/man-pages/man7/sock_diag.7.html +type MemInfo struct { + RMem uint32 + WMem uint32 + FMem uint32 + TMem uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/tcp_linux.go b/vendor/github.com/vishvananda/netlink/tcp_linux.go index 29385873..e98036da 100644 --- a/vendor/github.com/vishvananda/netlink/tcp_linux.go +++ b/vendor/github.com/vishvananda/netlink/tcp_linux.go @@ -8,6 +8,7 @@ import ( const ( tcpBBRInfoLen = 20 + memInfoLen = 16 ) func checkDeserErr(err error) error { @@ -351,3 +352,17 @@ func (t *TCPBBRInfo) deserialize(b []byte) error { return nil } + +func (m *MemInfo) deserialize(b []byte) error { + if len(b) != memInfoLen { + return errors.New("Invalid length") + } + + rb := bytes.NewBuffer(b) + m.RMem = native.Uint32(rb.Next(4)) + m.WMem = native.Uint32(rb.Next(4)) + m.FMem = native.Uint32(rb.Next(4)) + m.TMem = native.Uint32(rb.Next(4)) + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/unix_diag.go b/vendor/github.com/vishvananda/netlink/unix_diag.go new file mode 100644 index 00000000..d81776f3 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/unix_diag.go @@ -0,0 +1,27 @@ +package netlink + +// According to linux/include/uapi/linux/unix_diag.h +const ( + UNIX_DIAG_NAME = iota + UNIX_DIAG_VFS + UNIX_DIAG_PEER + UNIX_DIAG_ICONS + UNIX_DIAG_RQLEN + UNIX_DIAG_MEMINFO + UNIX_DIAG_SHUTDOWN + UNIX_DIAG_UID + UNIX_DIAG_MAX +) + +type UnixDiagInfoResp struct { + DiagMsg *UnixSocket + Name *string + Peer *uint32 + Queue *QueueInfo + Shutdown *uint8 +} + +type QueueInfo struct { + RQueue uint32 + WQueue uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/vdpa_linux.go b/vendor/github.com/vishvananda/netlink/vdpa_linux.go new file mode 100644 index 00000000..7c15986d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/vdpa_linux.go @@ -0,0 +1,463 @@ +package netlink + +import ( + "fmt" + "net" + "syscall" + + "golang.org/x/sys/unix" + + "github.com/vishvananda/netlink/nl" +) + +type vdpaDevID struct { + Name string + ID uint32 +} + +// VDPADev contains info about VDPA device +type VDPADev struct { + vdpaDevID + VendorID uint32 + MaxVQS uint32 + MaxVQSize uint16 + MinVQSize uint16 +} + +// VDPADevConfig contains configuration of the VDPA device +type VDPADevConfig struct { + vdpaDevID + Features uint64 + NegotiatedFeatures uint64 + Net VDPADevConfigNet +} + +// VDPADevVStats conatins vStats for the VDPA device +type VDPADevVStats struct { + vdpaDevID + QueueIndex uint32 + Vendor []VDPADevVStatsVendor + NegotiatedFeatures uint64 +} + +// VDPADevVStatsVendor conatins name and value for vendor specific vstat option +type VDPADevVStatsVendor struct { + Name string + Value uint64 +} + +// VDPADevConfigNet conatins status and net config for the VDPA device +type VDPADevConfigNet struct { + Status VDPADevConfigNetStatus + Cfg VDPADevConfigNetCfg +} + +// VDPADevConfigNetStatus contains info about net status +type VDPADevConfigNetStatus struct { + LinkUp bool + Announce bool +} + +// VDPADevConfigNetCfg contains net config for the VDPA device +type VDPADevConfigNetCfg struct { + MACAddr net.HardwareAddr + MaxVQP uint16 + MTU uint16 +} + +// VDPAMGMTDev conatins info about VDPA management device +type VDPAMGMTDev struct { + BusName string + DevName string + SupportedClasses uint64 + SupportedFeatures uint64 + MaxVQS uint32 +} + +// VDPANewDevParams contains parameters for new VDPA device +// use SetBits to configure requried features for the device +// example: +// +// VDPANewDevParams{Features: SetBits(0, VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_MAC_ADDR)} +type VDPANewDevParams struct { + MACAddr net.HardwareAddr + MaxVQP uint16 + MTU uint16 + Features uint64 +} + +// SetBits set provided bits in the uint64 input value +// usage example: +// features := SetBits(0, VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_MAC_ADDR) +func SetBits(input uint64, pos ...int) uint64 { + for _, p := range pos { + input |= 1 << uint64(p) + } + return input +} + +// IsBitSet check if specific bit is set in the uint64 input value +// usage example: +// hasNetClass := IsBitSet(mgmtDev, VIRTIO_ID_NET) +func IsBitSet(input uint64, pos int) bool { + val := input & (1 << uint64(pos)) + return val > 0 +} + +// VDPANewDev adds new VDPA device +// Equivalent to: `vdpa dev add name mgmtdev /mgmtName [params]` +func VDPANewDev(name, mgmtBus, mgmtName string, params VDPANewDevParams) error { + return pkgHandle.VDPANewDev(name, mgmtBus, mgmtName, params) +} + +// VDPADelDev removes VDPA device +// Equivalent to: `vdpa dev del ` +func VDPADelDev(name string) error { + return pkgHandle.VDPADelDev(name) +} + +// VDPAGetDevList returns list of VDPA devices +// Equivalent to: `vdpa dev show` +func VDPAGetDevList() ([]*VDPADev, error) { + return pkgHandle.VDPAGetDevList() +} + +// VDPAGetDevByName returns VDPA device selected by name +// Equivalent to: `vdpa dev show ` +func VDPAGetDevByName(name string) (*VDPADev, error) { + return pkgHandle.VDPAGetDevByName(name) +} + +// VDPAGetDevConfigList returns list of VDPA devices configurations +// Equivalent to: `vdpa dev config show` +func VDPAGetDevConfigList() ([]*VDPADevConfig, error) { + return pkgHandle.VDPAGetDevConfigList() +} + +// VDPAGetDevConfigByName returns VDPA device configuration selected by name +// Equivalent to: `vdpa dev config show ` +func VDPAGetDevConfigByName(name string) (*VDPADevConfig, error) { + return pkgHandle.VDPAGetDevConfigByName(name) +} + +// VDPAGetDevVStats returns vstats for VDPA device +// Equivalent to: `vdpa dev vstats show qidx ` +func VDPAGetDevVStats(name string, queueIndex uint32) (*VDPADevVStats, error) { + return pkgHandle.VDPAGetDevVStats(name, queueIndex) +} + +// VDPAGetMGMTDevList returns list of mgmt devices +// Equivalent to: `vdpa mgmtdev show` +func VDPAGetMGMTDevList() ([]*VDPAMGMTDev, error) { + return pkgHandle.VDPAGetMGMTDevList() +} + +// VDPAGetMGMTDevByBusAndName returns mgmt devices selected by bus and name +// Equivalent to: `vdpa mgmtdev show /` +func VDPAGetMGMTDevByBusAndName(bus, name string) (*VDPAMGMTDev, error) { + return pkgHandle.VDPAGetMGMTDevByBusAndName(bus, name) +} + +type vdpaNetlinkMessage []syscall.NetlinkRouteAttr + +func (id *vdpaDevID) parseIDAttribute(attr syscall.NetlinkRouteAttr) { + switch attr.Attr.Type { + case nl.VDPA_ATTR_DEV_NAME: + id.Name = nl.BytesToString(attr.Value) + case nl.VDPA_ATTR_DEV_ID: + id.ID = native.Uint32(attr.Value) + } +} + +func (netStatus *VDPADevConfigNetStatus) parseStatusAttribute(value []byte) { + a := native.Uint16(value) + netStatus.Announce = (a & VIRTIO_NET_S_ANNOUNCE) > 0 + netStatus.LinkUp = (a & VIRTIO_NET_S_LINK_UP) > 0 +} + +func (d *VDPADev) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + d.parseIDAttribute(a) + switch a.Attr.Type { + case nl.VDPA_ATTR_DEV_VENDOR_ID: + d.VendorID = native.Uint32(a.Value) + case nl.VDPA_ATTR_DEV_MAX_VQS: + d.MaxVQS = native.Uint32(a.Value) + case nl.VDPA_ATTR_DEV_MAX_VQ_SIZE: + d.MaxVQSize = native.Uint16(a.Value) + case nl.VDPA_ATTR_DEV_MIN_VQ_SIZE: + d.MinVQSize = native.Uint16(a.Value) + } + } +} + +func (c *VDPADevConfig) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + c.parseIDAttribute(a) + switch a.Attr.Type { + case nl.VDPA_ATTR_DEV_NET_CFG_MACADDR: + c.Net.Cfg.MACAddr = a.Value + case nl.VDPA_ATTR_DEV_NET_STATUS: + c.Net.Status.parseStatusAttribute(a.Value) + case nl.VDPA_ATTR_DEV_NET_CFG_MAX_VQP: + c.Net.Cfg.MaxVQP = native.Uint16(a.Value) + case nl.VDPA_ATTR_DEV_NET_CFG_MTU: + c.Net.Cfg.MTU = native.Uint16(a.Value) + case nl.VDPA_ATTR_DEV_FEATURES: + c.Features = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_NEGOTIATED_FEATURES: + c.NegotiatedFeatures = native.Uint64(a.Value) + } + } +} + +func (s *VDPADevVStats) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + s.parseIDAttribute(a) + switch a.Attr.Type { + case nl.VDPA_ATTR_DEV_QUEUE_INDEX: + s.QueueIndex = native.Uint32(a.Value) + case nl.VDPA_ATTR_DEV_VENDOR_ATTR_NAME: + s.Vendor = append(s.Vendor, VDPADevVStatsVendor{Name: nl.BytesToString(a.Value)}) + case nl.VDPA_ATTR_DEV_VENDOR_ATTR_VALUE: + if len(s.Vendor) == 0 { + break + } + s.Vendor[len(s.Vendor)-1].Value = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_NEGOTIATED_FEATURES: + s.NegotiatedFeatures = native.Uint64(a.Value) + } + } +} + +func (d *VDPAMGMTDev) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + switch a.Attr.Type { + case nl.VDPA_ATTR_MGMTDEV_BUS_NAME: + d.BusName = nl.BytesToString(a.Value) + case nl.VDPA_ATTR_MGMTDEV_DEV_NAME: + d.DevName = nl.BytesToString(a.Value) + case nl.VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES: + d.SupportedClasses = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_SUPPORTED_FEATURES: + d.SupportedFeatures = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_MGMTDEV_MAX_VQS: + d.MaxVQS = native.Uint32(a.Value) + } + } +} + +func (h *Handle) vdpaRequest(command uint8, extraFlags int, attrs []*nl.RtAttr) ([]vdpaNetlinkMessage, error) { + f, err := h.GenlFamilyGet(nl.VDPA_GENL_NAME) + if err != nil { + return nil, err + } + req := h.newNetlinkRequest(int(f.ID), unix.NLM_F_ACK|extraFlags) + req.AddData(&nl.Genlmsg{ + Command: command, + Version: nl.VDPA_GENL_VERSION, + }) + for _, a := range attrs { + req.AddData(a) + } + + resp, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + messages := make([]vdpaNetlinkMessage, 0, len(resp)) + for _, m := range resp { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + messages = append(messages, attrs) + } + return messages, nil +} + +// dump all devices if dev is nil +func (h *Handle) vdpaDevGet(dev *string) ([]*VDPADev, error) { + var extraFlags int + var attrs []*nl.RtAttr + if dev != nil { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(*dev))) + } else { + extraFlags = extraFlags | unix.NLM_F_DUMP + } + messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_GET, extraFlags, attrs) + if err != nil { + return nil, err + } + devs := make([]*VDPADev, 0, len(messages)) + for _, m := range messages { + d := &VDPADev{} + d.parseAttributes(m) + devs = append(devs, d) + } + return devs, nil +} + +// dump all devices if dev is nil +func (h *Handle) vdpaDevConfigGet(dev *string) ([]*VDPADevConfig, error) { + var extraFlags int + var attrs []*nl.RtAttr + if dev != nil { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(*dev))) + } else { + extraFlags = extraFlags | unix.NLM_F_DUMP + } + messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_CONFIG_GET, extraFlags, attrs) + if err != nil { + return nil, err + } + cfgs := make([]*VDPADevConfig, 0, len(messages)) + for _, m := range messages { + cfg := &VDPADevConfig{} + cfg.parseAttributes(m) + cfgs = append(cfgs, cfg) + } + return cfgs, nil +} + +// dump all devices if dev is nil +func (h *Handle) vdpaMGMTDevGet(bus, dev *string) ([]*VDPAMGMTDev, error) { + var extraFlags int + var attrs []*nl.RtAttr + if dev != nil { + attrs = append(attrs, + nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_DEV_NAME, nl.ZeroTerminated(*dev)), + ) + if bus != nil { + attrs = append(attrs, + nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_BUS_NAME, nl.ZeroTerminated(*bus)), + ) + } + } else { + extraFlags = extraFlags | unix.NLM_F_DUMP + } + messages, err := h.vdpaRequest(nl.VDPA_CMD_MGMTDEV_GET, extraFlags, attrs) + if err != nil { + return nil, err + } + cfgs := make([]*VDPAMGMTDev, 0, len(messages)) + for _, m := range messages { + cfg := &VDPAMGMTDev{} + cfg.parseAttributes(m) + cfgs = append(cfgs, cfg) + } + return cfgs, nil +} + +// VDPANewDev adds new VDPA device +// Equivalent to: `vdpa dev add name mgmtdev /mgmtName [params]` +func (h *Handle) VDPANewDev(name, mgmtBus, mgmtName string, params VDPANewDevParams) error { + attrs := []*nl.RtAttr{ + nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name)), + nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_DEV_NAME, nl.ZeroTerminated(mgmtName)), + } + if mgmtBus != "" { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_BUS_NAME, nl.ZeroTerminated(mgmtBus))) + } + if len(params.MACAddr) != 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MACADDR, params.MACAddr)) + } + if params.MaxVQP > 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MAX_VQP, nl.Uint16Attr(params.MaxVQP))) + } + if params.MTU > 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MTU, nl.Uint16Attr(params.MTU))) + } + if params.Features > 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_FEATURES, nl.Uint64Attr(params.Features))) + } + _, err := h.vdpaRequest(nl.VDPA_CMD_DEV_NEW, 0, attrs) + return err +} + +// VDPADelDev removes VDPA device +// Equivalent to: `vdpa dev del ` +func (h *Handle) VDPADelDev(name string) error { + _, err := h.vdpaRequest(nl.VDPA_CMD_DEV_DEL, 0, []*nl.RtAttr{ + nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name))}) + return err +} + +// VDPAGetDevList returns list of VDPA devices +// Equivalent to: `vdpa dev show` +func (h *Handle) VDPAGetDevList() ([]*VDPADev, error) { + return h.vdpaDevGet(nil) +} + +// VDPAGetDevByName returns VDPA device selected by name +// Equivalent to: `vdpa dev show ` +func (h *Handle) VDPAGetDevByName(name string) (*VDPADev, error) { + devs, err := h.vdpaDevGet(&name) + if err != nil { + return nil, err + } + if len(devs) == 0 { + return nil, fmt.Errorf("device not found") + } + return devs[0], nil +} + +// VDPAGetDevConfigList returns list of VDPA devices configurations +// Equivalent to: `vdpa dev config show` +func (h *Handle) VDPAGetDevConfigList() ([]*VDPADevConfig, error) { + return h.vdpaDevConfigGet(nil) +} + +// VDPAGetDevConfigByName returns VDPA device configuration selected by name +// Equivalent to: `vdpa dev config show ` +func (h *Handle) VDPAGetDevConfigByName(name string) (*VDPADevConfig, error) { + cfgs, err := h.vdpaDevConfigGet(&name) + if err != nil { + return nil, err + } + if len(cfgs) == 0 { + return nil, fmt.Errorf("configuration not found") + } + return cfgs[0], nil +} + +// VDPAGetDevVStats returns vstats for VDPA device +// Equivalent to: `vdpa dev vstats show qidx ` +func (h *Handle) VDPAGetDevVStats(name string, queueIndex uint32) (*VDPADevVStats, error) { + messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_VSTATS_GET, 0, []*nl.RtAttr{ + nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name)), + nl.NewRtAttr(nl.VDPA_ATTR_DEV_QUEUE_INDEX, nl.Uint32Attr(queueIndex)), + }) + if err != nil { + return nil, err + } + if len(messages) == 0 { + return nil, fmt.Errorf("stats not found") + } + stats := &VDPADevVStats{} + stats.parseAttributes(messages[0]) + return stats, nil +} + +// VDPAGetMGMTDevList returns list of mgmt devices +// Equivalent to: `vdpa mgmtdev show` +func (h *Handle) VDPAGetMGMTDevList() ([]*VDPAMGMTDev, error) { + return h.vdpaMGMTDevGet(nil, nil) +} + +// VDPAGetMGMTDevByBusAndName returns mgmt devices selected by bus and name +// Equivalent to: `vdpa mgmtdev show /` +func (h *Handle) VDPAGetMGMTDevByBusAndName(bus, name string) (*VDPAMGMTDev, error) { + var busPtr *string + if bus != "" { + busPtr = &bus + } + devs, err := h.vdpaMGMTDevGet(busPtr, &name) + if err != nil { + return nil, err + } + if len(devs) == 0 { + return nil, fmt.Errorf("mgmtdev not found") + } + return devs[0], nil +} diff --git a/vendor/github.com/vishvananda/netlink/virtio.go b/vendor/github.com/vishvananda/netlink/virtio.go new file mode 100644 index 00000000..78a497bb --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/virtio.go @@ -0,0 +1,132 @@ +package netlink + +// features for virtio net +const ( + VIRTIO_NET_F_CSUM = 0 // Host handles pkts w/ partial csum + VIRTIO_NET_F_GUEST_CSUM = 1 // Guest handles pkts w/ partial csum + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS = 2 // Dynamic offload configuration. + VIRTIO_NET_F_MTU = 3 // Initial MTU advice + VIRTIO_NET_F_MAC = 5 // Host has given MAC address. + VIRTIO_NET_F_GUEST_TSO4 = 7 // Guest can handle TSOv4 in. + VIRTIO_NET_F_GUEST_TSO6 = 8 // Guest can handle TSOv6 in. + VIRTIO_NET_F_GUEST_ECN = 9 // Guest can handle TSO[6] w/ ECN in. + VIRTIO_NET_F_GUEST_UFO = 10 // Guest can handle UFO in. + VIRTIO_NET_F_HOST_TSO4 = 11 // Host can handle TSOv4 in. + VIRTIO_NET_F_HOST_TSO6 = 12 // Host can handle TSOv6 in. + VIRTIO_NET_F_HOST_ECN = 13 // Host can handle TSO[6] w/ ECN in. + VIRTIO_NET_F_HOST_UFO = 14 // Host can handle UFO in. + VIRTIO_NET_F_MRG_RXBUF = 15 // Host can merge receive buffers. + VIRTIO_NET_F_STATUS = 16 // virtio_net_config.status available + VIRTIO_NET_F_CTRL_VQ = 17 // Control channel available + VIRTIO_NET_F_CTRL_RX = 18 // Control channel RX mode support + VIRTIO_NET_F_CTRL_VLAN = 19 // Control channel VLAN filtering + VIRTIO_NET_F_CTRL_RX_EXTRA = 20 // Extra RX mode control support + VIRTIO_NET_F_GUEST_ANNOUNCE = 21 // Guest can announce device on the* network + VIRTIO_NET_F_MQ = 22 // Device supports Receive Flow Steering + VIRTIO_NET_F_CTRL_MAC_ADDR = 23 // Set MAC address + VIRTIO_NET_F_VQ_NOTF_COAL = 52 // Device supports virtqueue notification coalescing + VIRTIO_NET_F_NOTF_COAL = 53 // Device supports notifications coalescing + VIRTIO_NET_F_GUEST_USO4 = 54 // Guest can handle USOv4 in. + VIRTIO_NET_F_GUEST_USO6 = 55 // Guest can handle USOv6 in. + VIRTIO_NET_F_HOST_USO = 56 // Host can handle USO in. + VIRTIO_NET_F_HASH_REPORT = 57 // Supports hash report + VIRTIO_NET_F_GUEST_HDRLEN = 59 // Guest provides the exact hdr_len value. + VIRTIO_NET_F_RSS = 60 // Supports RSS RX steering + VIRTIO_NET_F_RSC_EXT = 61 // extended coalescing info + VIRTIO_NET_F_STANDBY = 62 // Act as standby for another device with the same MAC. + VIRTIO_NET_F_SPEED_DUPLEX = 63 // Device set linkspeed and duplex + VIRTIO_NET_F_GSO = 6 // Host handles pkts any GSO type +) + +// virtio net status +const ( + VIRTIO_NET_S_LINK_UP = 1 // Link is up + VIRTIO_NET_S_ANNOUNCE = 2 // Announcement is needed +) + +// virtio config +const ( + // Do we get callbacks when the ring is completely used, even if we've + // suppressed them? + VIRTIO_F_NOTIFY_ON_EMPTY = 24 + // Can the device handle any descriptor layout? + VIRTIO_F_ANY_LAYOUT = 27 + // v1.0 compliant + VIRTIO_F_VERSION_1 = 32 + // If clear - device has the platform DMA (e.g. IOMMU) bypass quirk feature. + // If set - use platform DMA tools to access the memory. + // Note the reverse polarity (compared to most other features), + // this is for compatibility with legacy systems. + VIRTIO_F_ACCESS_PLATFORM = 33 + // Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace) + VIRTIO_F_IOMMU_PLATFORM = VIRTIO_F_ACCESS_PLATFORM + // This feature indicates support for the packed virtqueue layout. + VIRTIO_F_RING_PACKED = 34 + // Inorder feature indicates that all buffers are used by the device + // in the same order in which they have been made available. + VIRTIO_F_IN_ORDER = 35 + // This feature indicates that memory accesses by the driver and the + // device are ordered in a way described by the platform. + VIRTIO_F_ORDER_PLATFORM = 36 + // Does the device support Single Root I/O Virtualization? + VIRTIO_F_SR_IOV = 37 + // This feature indicates that the driver passes extra data (besides + // identifying the virtqueue) in its device notifications. + VIRTIO_F_NOTIFICATION_DATA = 38 + // This feature indicates that the driver uses the data provided by the device + // as a virtqueue identifier in available buffer notifications. + VIRTIO_F_NOTIF_CONFIG_DATA = 39 + // This feature indicates that the driver can reset a queue individually. + VIRTIO_F_RING_RESET = 40 +) + +// virtio device ids +const ( + VIRTIO_ID_NET = 1 // virtio net + VIRTIO_ID_BLOCK = 2 // virtio block + VIRTIO_ID_CONSOLE = 3 // virtio console + VIRTIO_ID_RNG = 4 // virtio rng + VIRTIO_ID_BALLOON = 5 // virtio balloon + VIRTIO_ID_IOMEM = 6 // virtio ioMemory + VIRTIO_ID_RPMSG = 7 // virtio remote processor messaging + VIRTIO_ID_SCSI = 8 // virtio scsi + VIRTIO_ID_9P = 9 // 9p virtio console + VIRTIO_ID_MAC80211_WLAN = 10 // virtio WLAN MAC + VIRTIO_ID_RPROC_SERIAL = 11 // virtio remoteproc serial link + VIRTIO_ID_CAIF = 12 // Virtio caif + VIRTIO_ID_MEMORY_BALLOON = 13 // virtio memory balloon + VIRTIO_ID_GPU = 16 // virtio GPU + VIRTIO_ID_CLOCK = 17 // virtio clock/timer + VIRTIO_ID_INPUT = 18 // virtio input + VIRTIO_ID_VSOCK = 19 // virtio vsock transport + VIRTIO_ID_CRYPTO = 20 // virtio crypto + VIRTIO_ID_SIGNAL_DIST = 21 // virtio signal distribution device + VIRTIO_ID_PSTORE = 22 // virtio pstore device + VIRTIO_ID_IOMMU = 23 // virtio IOMMU + VIRTIO_ID_MEM = 24 // virtio mem + VIRTIO_ID_SOUND = 25 // virtio sound + VIRTIO_ID_FS = 26 // virtio filesystem + VIRTIO_ID_PMEM = 27 // virtio pmem + VIRTIO_ID_RPMB = 28 // virtio rpmb + VIRTIO_ID_MAC80211_HWSIM = 29 // virtio mac80211-hwsim + VIRTIO_ID_VIDEO_ENCODER = 30 // virtio video encoder + VIRTIO_ID_VIDEO_DECODER = 31 // virtio video decoder + VIRTIO_ID_SCMI = 32 // virtio SCMI + VIRTIO_ID_NITRO_SEC_MOD = 33 // virtio nitro secure module + VIRTIO_ID_I2C_ADAPTER = 34 // virtio i2c adapter + VIRTIO_ID_WATCHDOG = 35 // virtio watchdog + VIRTIO_ID_CAN = 36 // virtio can + VIRTIO_ID_DMABUF = 37 // virtio dmabuf + VIRTIO_ID_PARAM_SERV = 38 // virtio parameter server + VIRTIO_ID_AUDIO_POLICY = 39 // virtio audio policy + VIRTIO_ID_BT = 40 // virtio bluetooth + VIRTIO_ID_GPIO = 41 // virtio gpio + // Virtio Transitional IDs + VIRTIO_TRANS_ID_NET = 0x1000 // transitional virtio net + VIRTIO_TRANS_ID_BLOCK = 0x1001 // transitional virtio block + VIRTIO_TRANS_ID_BALLOON = 0x1002 // transitional virtio balloon + VIRTIO_TRANS_ID_CONSOLE = 0x1003 // transitional virtio console + VIRTIO_TRANS_ID_SCSI = 0x1004 // transitional virtio SCSI + VIRTIO_TRANS_ID_RNG = 0x1005 // transitional virtio rng + VIRTIO_TRANS_ID_9P = 0x1009 // transitional virtio 9p console +) diff --git a/vendor/github.com/vishvananda/netlink/xdp_diag.go b/vendor/github.com/vishvananda/netlink/xdp_diag.go new file mode 100644 index 00000000..e88825bf --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xdp_diag.go @@ -0,0 +1,34 @@ +package netlink + +import "github.com/vishvananda/netlink/nl" + +const SOCK_ANY_COOKIE = uint64(nl.TCPDIAG_NOCOOKIE)<<32 + uint64(nl.TCPDIAG_NOCOOKIE) + +// XDP diagnosis show flag constants to request particular information elements. +const ( + XDP_SHOW_INFO = 1 << iota + XDP_SHOW_RING_CFG + XDP_SHOW_UMEM + XDP_SHOW_MEMINFO + XDP_SHOW_STATS +) + +// XDP diag element constants +const ( + XDP_DIAG_NONE = iota + XDP_DIAG_INFO // when using XDP_SHOW_INFO + XDP_DIAG_UID // when using XDP_SHOW_INFO + XDP_DIAG_RX_RING // when using XDP_SHOW_RING_CFG + XDP_DIAG_TX_RING // when using XDP_SHOW_RING_CFG + XDP_DIAG_UMEM // when using XDP_SHOW_UMEM + XDP_DIAG_UMEM_FILL_RING // when using XDP_SHOW_UMEM + XDP_DIAG_UMEM_COMPLETION_RING // when using XDP_SHOW_UMEM + XDP_DIAG_MEMINFO // when using XDP_SHOW_MEMINFO + XDP_DIAG_STATS // when using XDP_SHOW_STATS +) + +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21 +type XDPDiagInfoResp struct { + XDPDiagMsg *XDPSocket + XDPInfo *XDPInfo +} diff --git a/vendor/github.com/vishvananda/netlink/xdp_linux.go b/vendor/github.com/vishvananda/netlink/xdp_linux.go new file mode 100644 index 00000000..896a406d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xdp_linux.go @@ -0,0 +1,46 @@ +package netlink + +import ( + "bytes" + "fmt" +) + +const ( + xdrDiagUmemLen = 8 + 8*4 + xdrDiagStatsLen = 6 * 8 +) + +func (x *XDPDiagUmem) deserialize(b []byte) error { + if len(b) < xdrDiagUmemLen { + return fmt.Errorf("XDP umem diagnosis data short read (%d); want %d", len(b), xdrDiagUmemLen) + } + + rb := bytes.NewBuffer(b) + x.Size = native.Uint64(rb.Next(8)) + x.ID = native.Uint32(rb.Next(4)) + x.NumPages = native.Uint32(rb.Next(4)) + x.ChunkSize = native.Uint32(rb.Next(4)) + x.Headroom = native.Uint32(rb.Next(4)) + x.Ifindex = native.Uint32(rb.Next(4)) + x.QueueID = native.Uint32(rb.Next(4)) + x.Flags = native.Uint32(rb.Next(4)) + x.Refs = native.Uint32(rb.Next(4)) + + return nil +} + +func (x *XDPDiagStats) deserialize(b []byte) error { + if len(b) < xdrDiagStatsLen { + return fmt.Errorf("XDP diagnosis statistics short read (%d); want %d", len(b), xdrDiagStatsLen) + } + + rb := bytes.NewBuffer(b) + x.RxDropped = native.Uint64(rb.Next(8)) + x.RxInvalid = native.Uint64(rb.Next(8)) + x.RxFull = native.Uint64(rb.Next(8)) + x.FillRingEmpty = native.Uint64(rb.Next(8)) + x.TxInvalid = native.Uint64(rb.Next(8)) + x.TxRingEmpty = native.Uint64(rb.Next(8)) + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm.go b/vendor/github.com/vishvananda/netlink/xfrm_linux.go similarity index 95% rename from vendor/github.com/vishvananda/netlink/xfrm.go rename to vendor/github.com/vishvananda/netlink/xfrm_linux.go index 02b41842..dd38ed8e 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_linux.go @@ -14,7 +14,7 @@ const ( XFRM_PROTO_ESP Proto = unix.IPPROTO_ESP XFRM_PROTO_AH Proto = unix.IPPROTO_AH XFRM_PROTO_HAO Proto = unix.IPPROTO_DSTOPTS - XFRM_PROTO_COMP Proto = 0x6c // NOTE not defined on darwin + XFRM_PROTO_COMP Proto = unix.IPPROTO_COMP XFRM_PROTO_IPSEC_ANY Proto = unix.IPPROTO_RAW ) diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy.go b/vendor/github.com/vishvananda/netlink/xfrm_policy.go deleted file mode 100644 index b7532b09..00000000 --- a/vendor/github.com/vishvananda/netlink/xfrm_policy.go +++ /dev/null @@ -1,97 +0,0 @@ -package netlink - -import ( - "fmt" - "net" -) - -// Dir is an enum representing an ipsec template direction. -type Dir uint8 - -const ( - XFRM_DIR_IN Dir = iota - XFRM_DIR_OUT - XFRM_DIR_FWD - XFRM_SOCKET_IN - XFRM_SOCKET_OUT - XFRM_SOCKET_FWD -) - -func (d Dir) String() string { - switch d { - case XFRM_DIR_IN: - return "dir in" - case XFRM_DIR_OUT: - return "dir out" - case XFRM_DIR_FWD: - return "dir fwd" - case XFRM_SOCKET_IN: - return "socket in" - case XFRM_SOCKET_OUT: - return "socket out" - case XFRM_SOCKET_FWD: - return "socket fwd" - } - return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN) -} - -// PolicyAction is an enum representing an ipsec policy action. -type PolicyAction uint8 - -const ( - XFRM_POLICY_ALLOW PolicyAction = 0 - XFRM_POLICY_BLOCK PolicyAction = 1 -) - -func (a PolicyAction) String() string { - switch a { - case XFRM_POLICY_ALLOW: - return "allow" - case XFRM_POLICY_BLOCK: - return "block" - default: - return fmt.Sprintf("action %d", a) - } -} - -// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec -// policy. These rules are matched with XfrmState to determine encryption -// and authentication algorithms. -type XfrmPolicyTmpl struct { - Dst net.IP - Src net.IP - Proto Proto - Mode Mode - Spi int - Reqid int - Optional int -} - -func (t XfrmPolicyTmpl) String() string { - return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}", - t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid) -} - -// XfrmPolicy represents an ipsec policy. It represents the overlay network -// and has a list of XfrmPolicyTmpls representing the base addresses of -// the policy. -type XfrmPolicy struct { - Dst *net.IPNet - Src *net.IPNet - Proto Proto - DstPort int - SrcPort int - Dir Dir - Priority int - Index int - Action PolicyAction - Ifindex int - Ifid int - Mark *XfrmMark - Tmpls []XfrmPolicyTmpl -} - -func (p XfrmPolicy) String() string { - return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}", - p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls) -} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go index 35849680..d526739c 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go @@ -1,10 +1,104 @@ package netlink import ( + "fmt" + "net" + "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) +// Dir is an enum representing an ipsec template direction. +type Dir uint8 + +const ( + XFRM_DIR_IN Dir = iota + XFRM_DIR_OUT + XFRM_DIR_FWD + XFRM_SOCKET_IN + XFRM_SOCKET_OUT + XFRM_SOCKET_FWD +) + +func (d Dir) String() string { + switch d { + case XFRM_DIR_IN: + return "dir in" + case XFRM_DIR_OUT: + return "dir out" + case XFRM_DIR_FWD: + return "dir fwd" + case XFRM_SOCKET_IN: + return "socket in" + case XFRM_SOCKET_OUT: + return "socket out" + case XFRM_SOCKET_FWD: + return "socket fwd" + } + return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN) +} + +// PolicyAction is an enum representing an ipsec policy action. +type PolicyAction uint8 + +const ( + XFRM_POLICY_ALLOW PolicyAction = 0 + XFRM_POLICY_BLOCK PolicyAction = 1 +) + +func (a PolicyAction) String() string { + switch a { + case XFRM_POLICY_ALLOW: + return "allow" + case XFRM_POLICY_BLOCK: + return "block" + default: + return fmt.Sprintf("action %d", a) + } +} + +// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec +// policy. These rules are matched with XfrmState to determine encryption +// and authentication algorithms. +type XfrmPolicyTmpl struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + Optional int +} + +func (t XfrmPolicyTmpl) String() string { + return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}", + t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid) +} + +// XfrmPolicy represents an ipsec policy. It represents the overlay network +// and has a list of XfrmPolicyTmpls representing the base addresses of +// the policy. +type XfrmPolicy struct { + Dst *net.IPNet + Src *net.IPNet + Proto Proto + DstPort int + SrcPort int + Dir Dir + Priority int + Index int + Action PolicyAction + Ifindex int + Ifid int + Mark *XfrmMark + Tmpls []XfrmPolicyTmpl +} + +func (p XfrmPolicy) String() string { + return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}", + p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls) +} + func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) { sel.Family = uint16(nl.FAMILY_V4) if policy.Dst != nil { @@ -75,6 +169,7 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { userTmpl := nl.DeserializeXfrmUserTmpl(tmplData[start : start+nl.SizeofXfrmUserTmpl]) userTmpl.XfrmId.Daddr.FromIP(tmpl.Dst) userTmpl.Saddr.FromIP(tmpl.Src) + userTmpl.Family = uint16(nl.GetIPFamily(tmpl.Dst)) userTmpl.XfrmId.Proto = uint8(tmpl.Proto) userTmpl.XfrmId.Spi = nl.Swap32(uint32(tmpl.Spi)) userTmpl.Mode = uint8(tmpl.Mode) @@ -223,8 +318,8 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { var policy XfrmPolicy - policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD) - policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS) + policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, uint16(family)) + policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, uint16(family)) policy.Proto = Proto(msg.Sel.Proto) policy.DstPort = int(nl.Swap16(msg.Sel.Dport)) policy.SrcPort = int(nl.Swap16(msg.Sel.Sport)) diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state.go b/vendor/github.com/vishvananda/netlink/xfrm_state.go deleted file mode 100644 index 19df82c7..00000000 --- a/vendor/github.com/vishvananda/netlink/xfrm_state.go +++ /dev/null @@ -1,131 +0,0 @@ -package netlink - -import ( - "fmt" - "net" - "time" -) - -// XfrmStateAlgo represents the algorithm to use for the ipsec encryption. -type XfrmStateAlgo struct { - Name string - Key []byte - TruncateLen int // Auth only - ICVLen int // AEAD only -} - -func (a XfrmStateAlgo) String() string { - base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key) - if a.TruncateLen != 0 { - base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen) - } - if a.ICVLen != 0 { - base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen) - } - return fmt.Sprintf("%s}", base) -} - -// EncapType is an enum representing the optional packet encapsulation. -type EncapType uint8 - -const ( - XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1 - XFRM_ENCAP_ESPINUDP -) - -func (e EncapType) String() string { - switch e { - case XFRM_ENCAP_ESPINUDP_NONIKE: - return "espinudp-non-ike" - case XFRM_ENCAP_ESPINUDP: - return "espinudp" - } - return "unknown" -} - -// XfrmStateEncap represents the encapsulation to use for the ipsec encryption. -type XfrmStateEncap struct { - Type EncapType - SrcPort int - DstPort int - OriginalAddress net.IP -} - -func (e XfrmStateEncap) String() string { - return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}", - e.Type, e.SrcPort, e.DstPort, e.OriginalAddress) -} - -// XfrmStateLimits represents the configured limits for the state. -type XfrmStateLimits struct { - ByteSoft uint64 - ByteHard uint64 - PacketSoft uint64 - PacketHard uint64 - TimeSoft uint64 - TimeHard uint64 - TimeUseSoft uint64 - TimeUseHard uint64 -} - -// XfrmStateStats represents the current number of bytes/packets -// processed by this State, the State's installation and first use -// time and the replay window counters. -type XfrmStateStats struct { - ReplayWindow uint32 - Replay uint32 - Failed uint32 - Bytes uint64 - Packets uint64 - AddTime uint64 - UseTime uint64 -} - -// XfrmState represents the state of an ipsec policy. It optionally -// contains an XfrmStateAlgo for encryption and one for authentication. -type XfrmState struct { - Dst net.IP - Src net.IP - Proto Proto - Mode Mode - Spi int - Reqid int - ReplayWindow int - Limits XfrmStateLimits - Statistics XfrmStateStats - Mark *XfrmMark - OutputMark *XfrmMark - Ifid int - Auth *XfrmStateAlgo - Crypt *XfrmStateAlgo - Aead *XfrmStateAlgo - Encap *XfrmStateEncap - ESN bool -} - -func (sa XfrmState) String() string { - return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t", - sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN) -} -func (sa XfrmState) Print(stats bool) string { - if !stats { - return sa.String() - } - at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate) - ut := "-" - if sa.Statistics.UseTime > 0 { - ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate) - } - return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+ - "AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d", - sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard), - sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut, - sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed) -} - -func printLimit(lmt uint64) string { - if lmt == ^uint64(0) { - return "(INF)" - } - return fmt.Sprintf("%d", lmt) -} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go index 61a2d2de..554f2498 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go @@ -2,12 +2,154 @@ package netlink import ( "fmt" + "net" + "time" "unsafe" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) +// XfrmStateAlgo represents the algorithm to use for the ipsec encryption. +type XfrmStateAlgo struct { + Name string + Key []byte + TruncateLen int // Auth only + ICVLen int // AEAD only +} + +func (a XfrmStateAlgo) String() string { + base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key) + if a.TruncateLen != 0 { + base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen) + } + if a.ICVLen != 0 { + base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen) + } + return fmt.Sprintf("%s}", base) +} + +// EncapType is an enum representing the optional packet encapsulation. +type EncapType uint8 + +const ( + XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1 + XFRM_ENCAP_ESPINUDP +) + +func (e EncapType) String() string { + switch e { + case XFRM_ENCAP_ESPINUDP_NONIKE: + return "espinudp-non-ike" + case XFRM_ENCAP_ESPINUDP: + return "espinudp" + } + return "unknown" +} + +// XfrmStateEncap represents the encapsulation to use for the ipsec encryption. +type XfrmStateEncap struct { + Type EncapType + SrcPort int + DstPort int + OriginalAddress net.IP +} + +func (e XfrmStateEncap) String() string { + return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}", + e.Type, e.SrcPort, e.DstPort, e.OriginalAddress) +} + +// XfrmStateLimits represents the configured limits for the state. +type XfrmStateLimits struct { + ByteSoft uint64 + ByteHard uint64 + PacketSoft uint64 + PacketHard uint64 + TimeSoft uint64 + TimeHard uint64 + TimeUseSoft uint64 + TimeUseHard uint64 +} + +// XfrmStateStats represents the current number of bytes/packets +// processed by this State, the State's installation and first use +// time and the replay window counters. +type XfrmStateStats struct { + ReplayWindow uint32 + Replay uint32 + Failed uint32 + Bytes uint64 + Packets uint64 + AddTime uint64 + UseTime uint64 +} + +// XfrmReplayState represents the sequence number states for +// "legacy" anti-replay mode. +type XfrmReplayState struct { + OSeq uint32 + Seq uint32 + BitMap uint32 +} + +func (r XfrmReplayState) String() string { + return fmt.Sprintf("{OSeq: 0x%x, Seq: 0x%x, BitMap: 0x%x}", + r.OSeq, r.Seq, r.BitMap) +} + +// XfrmState represents the state of an ipsec policy. It optionally +// contains an XfrmStateAlgo for encryption and one for authentication. +type XfrmState struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + ReplayWindow int + Limits XfrmStateLimits + Statistics XfrmStateStats + Mark *XfrmMark + OutputMark *XfrmMark + Ifid int + Auth *XfrmStateAlgo + Crypt *XfrmStateAlgo + Aead *XfrmStateAlgo + Encap *XfrmStateEncap + ESN bool + DontEncapDSCP bool + OSeqMayWrap bool + Replay *XfrmReplayState + Selector *XfrmPolicy +} + +func (sa XfrmState) String() string { + return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t, DontEncapDSCP: %t, OSeqMayWrap: %t, Replay: %v", + sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN, sa.DontEncapDSCP, sa.OSeqMayWrap, sa.Replay) +} +func (sa XfrmState) Print(stats bool) string { + if !stats { + return sa.String() + } + at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate) + ut := "-" + if sa.Statistics.UseTime > 0 { + ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate) + } + return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+ + "AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d", + sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard), + sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut, + sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed) +} + +func printLimit(lmt uint64) string { + if lmt == ^uint64(0) { + return "(INF)" + } + return fmt.Sprintf("%d", lmt) +} func writeStateAlgo(a *XfrmStateAlgo) []byte { algo := nl.XfrmAlgo{ AlgKeyLen: uint32(len(a.Key) * 8), @@ -77,6 +219,14 @@ func writeReplayEsn(replayWindow int) []byte { return replayEsn.Serialize() } +func writeReplay(r *XfrmReplayState) []byte { + return (&nl.XfrmReplayState{ + OSeq: r.OSeq, + Seq: r.Seq, + BitMap: r.BitMap, + }).Serialize() +} + // XfrmStateAdd will add an xfrm state to the system. // Equivalent to: `ip xfrm state add $state` func XfrmStateAdd(state *XfrmState) error { @@ -166,6 +316,21 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { req.AddData(out) } } + if state.OSeqMayWrap || state.DontEncapDSCP { + var flags uint32 + if state.DontEncapDSCP { + flags |= nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP + } + if state.OSeqMayWrap { + flags |= nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP + } + out := nl.NewRtAttr(nl.XFRMA_SA_EXTRA_FLAGS, nl.Uint32Attr(flags)) + req.AddData(out) + } + if state.Replay != nil { + out := nl.NewRtAttr(nl.XFRMA_REPLAY_VAL, writeReplay(state.Replay)) + req.AddData(out) + } if state.Ifid != 0 { ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) @@ -186,7 +351,6 @@ func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) { msg.Min = 0x100 msg.Max = 0xffffffff req.AddData(msg) - if state.Mark != nil { out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark)) req.AddData(out) @@ -314,7 +478,6 @@ var familyError = fmt.Errorf("family error") func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { var state XfrmState - state.Dst = msg.Id.Daddr.ToIP() state.Src = msg.Saddr.ToIP() state.Proto = Proto(msg.Id.Proto) @@ -324,20 +487,25 @@ func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { state.ReplayWindow = int(msg.ReplayWindow) lftToLimits(&msg.Lft, &state.Limits) curToStats(&msg.Curlft, &msg.Stats, &state.Statistics) + state.Selector = &XfrmPolicy{ + Dst: msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, msg.Sel.Family), + Src: msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, msg.Sel.Family), + Proto: Proto(msg.Sel.Proto), + DstPort: int(nl.Swap16(msg.Sel.Dport)), + SrcPort: int(nl.Swap16(msg.Sel.Sport)), + Ifindex: int(msg.Sel.Ifindex), + } return &state } func parseXfrmState(m []byte, family int) (*XfrmState, error) { msg := nl.DeserializeXfrmUsersaInfo(m) - // This is mainly for the state dump if family != FAMILY_ALL && family != int(msg.Family) { return nil, familyError } - state := xfrmStateFromXfrmUsersaInfo(msg) - attrs, err := nl.ParseRouteAttr(m[nl.SizeofXfrmUsersaInfo:]) if err != nil { return nil, err @@ -385,6 +553,14 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) { state.Mark = new(XfrmMark) state.Mark.Value = mark.Value state.Mark.Mask = mark.Mask + case nl.XFRMA_SA_EXTRA_FLAGS: + flags := native.Uint32(attr.Value) + if (flags & nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP) != 0 { + state.DontEncapDSCP = true + } + if (flags & nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP) != 0 { + state.OSeqMayWrap = true + } case nl.XFRMA_SET_MARK: if state.OutputMark == nil { state.OutputMark = new(XfrmMark) @@ -400,6 +576,14 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) { } case nl.XFRMA_IF_ID: state.Ifid = int(native.Uint32(attr.Value)) + case nl.XFRMA_REPLAY_VAL: + if state.Replay == nil { + state.Replay = new(XfrmReplayState) + } + replay := nl.DeserializeXfrmReplayState(attr.Value[:]) + state.Replay.OSeq = replay.OSeq + state.Replay.Seq = replay.Seq + state.Replay.BitMap = replay.BitMap } } @@ -476,6 +660,9 @@ func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo { msg.Id.Spi = nl.Swap32(uint32(state.Spi)) msg.Reqid = uint32(state.Reqid) msg.ReplayWindow = uint8(state.ReplayWindow) - + msg.Sel = nl.XfrmSelector{} + if state.Selector != nil { + selFromPolicy(&msg.Sel, state.Selector) + } return msg } diff --git a/vendor/github.com/vishvananda/netlink/xfrm_unspecified.go b/vendor/github.com/vishvananda/netlink/xfrm_unspecified.go new file mode 100644 index 00000000..12fdd26d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_unspecified.go @@ -0,0 +1,7 @@ +//go:build !linux +// +build !linux + +package netlink + +type XfrmPolicy struct{} +type XfrmState struct{} diff --git a/vendor/github.com/vishvananda/netns/.golangci.yml b/vendor/github.com/vishvananda/netns/.golangci.yml new file mode 100644 index 00000000..600bef78 --- /dev/null +++ b/vendor/github.com/vishvananda/netns/.golangci.yml @@ -0,0 +1,2 @@ +run: + timeout: 5m diff --git a/vendor/github.com/vishvananda/netns/README.md b/vendor/github.com/vishvananda/netns/README.md index 1fdb2d3e..bdfedbe8 100644 --- a/vendor/github.com/vishvananda/netns/README.md +++ b/vendor/github.com/vishvananda/netns/README.md @@ -23,6 +23,7 @@ import ( "fmt" "net" "runtime" + "github.com/vishvananda/netns" ) @@ -48,14 +49,3 @@ func main() { } ``` - -## NOTE - -The library can be safely used only with Go >= 1.10 due to [golang/go#20676](https://github.com/golang/go/issues/20676). - -After locking a goroutine to its current OS thread with `runtime.LockOSThread()` -and changing its network namespace, any new subsequent goroutine won't be -scheduled on that thread while it's locked. Therefore, the new goroutine -will run in a different namespace leading to unexpected results. - -See [here](https://www.weave.works/blog/linux-namespaces-golang-followup) for more details. diff --git a/vendor/github.com/vishvananda/netns/doc.go b/vendor/github.com/vishvananda/netns/doc.go new file mode 100644 index 00000000..cd4093a4 --- /dev/null +++ b/vendor/github.com/vishvananda/netns/doc.go @@ -0,0 +1,9 @@ +// Package netns allows ultra-simple network namespace handling. NsHandles +// can be retrieved and set. Note that the current namespace is thread +// local so actions that set and reset namespaces should use LockOSThread +// to make sure the namespace doesn't change due to a goroutine switch. +// It is best to close NsHandles when you are done with them. This can be +// accomplished via a `defer ns.Close()` on the handle. Changing namespaces +// requires elevated privileges, so in most cases this code needs to be run +// as root. +package netns diff --git a/vendor/github.com/vishvananda/netns/netns_linux.go b/vendor/github.com/vishvananda/netns/netns_linux.go index 6be5c55c..2ed7c7e2 100644 --- a/vendor/github.com/vishvananda/netns/netns_linux.go +++ b/vendor/github.com/vishvananda/netns/netns_linux.go @@ -1,34 +1,31 @@ -//go:build linux && go1.10 -// +build linux,go1.10 - package netns import ( "fmt" - "io/ioutil" "os" "path" "path/filepath" "strconv" "strings" - "syscall" "golang.org/x/sys/unix" ) -// Deprecated: use syscall pkg instead (go >= 1.5 needed). +// Deprecated: use golang.org/x/sys/unix pkg instead. const ( - CLONE_NEWUTS = 0x04000000 /* New utsname group? */ - CLONE_NEWIPC = 0x08000000 /* New ipcs */ - CLONE_NEWUSER = 0x10000000 /* New user namespace */ - CLONE_NEWPID = 0x20000000 /* New pid namespace */ - CLONE_NEWNET = 0x40000000 /* New network namespace */ - CLONE_IO = 0x80000000 /* Get io context */ - bindMountPath = "/run/netns" /* Bind mount path for named netns */ + CLONE_NEWUTS = unix.CLONE_NEWUTS /* New utsname group? */ + CLONE_NEWIPC = unix.CLONE_NEWIPC /* New ipcs */ + CLONE_NEWUSER = unix.CLONE_NEWUSER /* New user namespace */ + CLONE_NEWPID = unix.CLONE_NEWPID /* New pid namespace */ + CLONE_NEWNET = unix.CLONE_NEWNET /* New network namespace */ + CLONE_IO = unix.CLONE_IO /* Get io context */ ) -// Setns sets namespace using syscall. Note that this should be a method -// in syscall but it has not been added. +const bindMountPath = "/run/netns" /* Bind mount path for named netns */ + +// Setns sets namespace using golang.org/x/sys/unix.Setns. +// +// Deprecated: Use golang.org/x/sys/unix.Setns instead. func Setns(ns NsHandle, nstype int) (err error) { return unix.Setns(int(ns), nstype) } @@ -36,19 +33,20 @@ func Setns(ns NsHandle, nstype int) (err error) { // Set sets the current network namespace to the namespace represented // by NsHandle. func Set(ns NsHandle) (err error) { - return Setns(ns, CLONE_NEWNET) + return unix.Setns(int(ns), unix.CLONE_NEWNET) } // New creates a new network namespace, sets it as current and returns // a handle to it. func New() (ns NsHandle, err error) { - if err := unix.Unshare(CLONE_NEWNET); err != nil { + if err := unix.Unshare(unix.CLONE_NEWNET); err != nil { return -1, err } return Get() } -// NewNamed creates a new named network namespace and returns a handle to it +// NewNamed creates a new named network namespace, sets it as current, +// and returns a handle to it func NewNamed(name string) (NsHandle, error) { if _, err := os.Stat(bindMountPath); os.IsNotExist(err) { err = os.MkdirAll(bindMountPath, 0755) @@ -66,13 +64,15 @@ func NewNamed(name string) (NsHandle, error) { f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444) if err != nil { + newNs.Close() return None(), err } f.Close() - nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), syscall.Gettid()) - err = syscall.Mount(nsPath, namedPath, "bind", syscall.MS_BIND, "") + nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid()) + err = unix.Mount(nsPath, namedPath, "bind", unix.MS_BIND, "") if err != nil { + newNs.Close() return None(), err } @@ -83,7 +83,7 @@ func NewNamed(name string) (NsHandle, error) { func DeleteNamed(name string) error { namedPath := path.Join(bindMountPath, name) - err := syscall.Unmount(namedPath, syscall.MNT_DETACH) + err := unix.Unmount(namedPath, unix.MNT_DETACH) if err != nil { return err } @@ -109,7 +109,7 @@ func GetFromPath(path string) (NsHandle, error) { // GetFromName gets a handle to a named network namespace such as one // created by `ip netns add`. func GetFromName(name string) (NsHandle, error) { - return GetFromPath(fmt.Sprintf("/var/run/netns/%s", name)) + return GetFromPath(filepath.Join(bindMountPath, name)) } // GetFromPid gets a handle to the network namespace of a given pid. @@ -135,7 +135,7 @@ func GetFromDocker(id string) (NsHandle, error) { // borrowed from docker/utils/utils.go func findCgroupMountpoint(cgroupType string) (int, string, error) { - output, err := ioutil.ReadFile("/proc/mounts") + output, err := os.ReadFile("/proc/mounts") if err != nil { return -1, "", err } @@ -165,7 +165,7 @@ func findCgroupMountpoint(cgroupType string) (int, string, error) { // borrowed from docker/utils/utils.go // modified to get the docker pid instead of using /proc/self func getDockerCgroup(cgroupVer int, cgroupType string) (string, error) { - dockerpid, err := ioutil.ReadFile("/var/run/docker.pid") + dockerpid, err := os.ReadFile("/var/run/docker.pid") if err != nil { return "", err } @@ -177,7 +177,7 @@ func getDockerCgroup(cgroupVer int, cgroupType string) (string, error) { if err != nil { return "", err } - output, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + output, err := os.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) if err != nil { return "", err } @@ -264,7 +264,7 @@ func getPidForContainer(id string) (int, error) { return pid, fmt.Errorf("Unable to find container: %v", id[:len(id)-1]) } - output, err := ioutil.ReadFile(filename) + output, err := os.ReadFile(filename) if err != nil { return pid, err } diff --git a/vendor/github.com/vishvananda/netns/netns_unspecified.go b/vendor/github.com/vishvananda/netns/netns_others.go similarity index 63% rename from vendor/github.com/vishvananda/netns/netns_unspecified.go rename to vendor/github.com/vishvananda/netns/netns_others.go index d06af62b..04898377 100644 --- a/vendor/github.com/vishvananda/netns/netns_unspecified.go +++ b/vendor/github.com/vishvananda/netns/netns_others.go @@ -1,3 +1,4 @@ +//go:build !linux // +build !linux package netns @@ -10,6 +11,14 @@ var ( ErrNotImplemented = errors.New("not implemented") ) +// Setns sets namespace using golang.org/x/sys/unix.Setns on Linux. It +// is not implemented on other platforms. +// +// Deprecated: Use golang.org/x/sys/unix.Setns instead. +func Setns(ns NsHandle, nstype int) (err error) { + return ErrNotImplemented +} + func Set(ns NsHandle) (err error) { return ErrNotImplemented } @@ -18,6 +27,14 @@ func New() (ns NsHandle, err error) { return -1, ErrNotImplemented } +func NewNamed(name string) (NsHandle, error) { + return -1, ErrNotImplemented +} + +func DeleteNamed(name string) error { + return ErrNotImplemented +} + func Get() (NsHandle, error) { return -1, ErrNotImplemented } diff --git a/vendor/github.com/vishvananda/netns/netns.go b/vendor/github.com/vishvananda/netns/nshandle_linux.go similarity index 75% rename from vendor/github.com/vishvananda/netns/netns.go rename to vendor/github.com/vishvananda/netns/nshandle_linux.go index 116befd5..1baffb66 100644 --- a/vendor/github.com/vishvananda/netns/netns.go +++ b/vendor/github.com/vishvananda/netns/nshandle_linux.go @@ -1,11 +1,3 @@ -// Package netns allows ultra-simple network namespace handling. NsHandles -// can be retrieved and set. Note that the current namespace is thread -// local so actions that set and reset namespaces should use LockOSThread -// to make sure the namespace doesn't change due to a goroutine switch. -// It is best to close NsHandles when you are done with them. This can be -// accomplished via a `defer ns.Close()` on the handle. Changing namespaces -// requires elevated privileges, so in most cases this code needs to be run -// as root. package netns import ( @@ -38,7 +30,7 @@ func (ns NsHandle) Equal(other NsHandle) bool { // String shows the file descriptor number and its dev and inode. func (ns NsHandle) String() string { if ns == -1 { - return "NS(None)" + return "NS(none)" } var s unix.Stat_t if err := unix.Fstat(int(ns), &s); err != nil { @@ -71,7 +63,7 @@ func (ns *NsHandle) Close() error { if err := unix.Close(int(*ns)); err != nil { return err } - (*ns) = -1 + *ns = -1 return nil } diff --git a/vendor/github.com/vishvananda/netns/nshandle_others.go b/vendor/github.com/vishvananda/netns/nshandle_others.go new file mode 100644 index 00000000..af727bc0 --- /dev/null +++ b/vendor/github.com/vishvananda/netns/nshandle_others.go @@ -0,0 +1,45 @@ +//go:build !linux +// +build !linux + +package netns + +// NsHandle is a handle to a network namespace. It can only be used on Linux, +// but provides stub methods on other platforms. +type NsHandle int + +// Equal determines if two network handles refer to the same network +// namespace. It is only implemented on Linux. +func (ns NsHandle) Equal(_ NsHandle) bool { + return false +} + +// String shows the file descriptor number and its dev and inode. +// It is only implemented on Linux, and returns "NS(none)" on other +// platforms. +func (ns NsHandle) String() string { + return "NS(none)" +} + +// UniqueId returns a string which uniquely identifies the namespace +// associated with the network handle. It is only implemented on Linux, +// and returns "NS(none)" on other platforms. +func (ns NsHandle) UniqueId() string { + return "NS(none)" +} + +// IsOpen returns true if Close() has not been called. It is only implemented +// on Linux and always returns false on other platforms. +func (ns NsHandle) IsOpen() bool { + return false +} + +// Close closes the NsHandle and resets its file descriptor to -1. +// It is only implemented on Linux. +func (ns *NsHandle) Close() error { + return nil +} + +// None gets an empty (closed) NsHandle. +func None() NsHandle { + return NsHandle(-1) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index aca28bb6..429d8639 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -133,12 +133,12 @@ github.com/stretchr/testify/assert ## explicit; go 1.21 github.com/u-root/uio/rand github.com/u-root/uio/uio -# github.com/vishvananda/netlink v1.2.1-beta.2 +# github.com/vishvananda/netlink v1.2.1 ## explicit; go 1.12 github.com/vishvananda/netlink github.com/vishvananda/netlink/nl -# github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 -## explicit; go 1.12 +# github.com/vishvananda/netns v0.0.4 +## explicit; go 1.17 github.com/vishvananda/netns # golang.org/x/crypto v0.26.0 ## explicit; go 1.20