diff --git a/Documentation/cmdref/cilium-agent.md b/Documentation/cmdref/cilium-agent.md index 0897e964f8bdd..1c1771121a281 100644 --- a/Documentation/cmdref/cilium-agent.md +++ b/Documentation/cmdref/cilium-agent.md @@ -161,7 +161,8 @@ cilium-agent [flags] --encrypt-interface string Transparent encryption interface --encrypt-node Enables encrypting traffic from non-Cilium pods and host networking (only supported with WireGuard, beta) --encryption-strict-mode-allow-remote-node-identities Allows unencrypted traffic from pods to remote node identities within the strict mode CIDR. This is required when tunneling is used or direct routing is used and the node CIDR and pod CIDR overlap. - --encryption-strict-mode-cidr string In strict-mode encryption, all unencrypted traffic coming from this CIDR and going to this same CIDR will be dropped + --encryption-strict-mode-node-cidrs strings In strict-mode encryption, all unencrypted traffic coming from one of those CIDRs and going one of those CIDRs will be dropped + --encryption-strict-mode-pod-cidrs strings In strict-mode encryption, all unencrypted traffic coming from one of those CIDRs and going one of those CIDRs will be dropped --endpoint-bpf-prog-watchdog-interval duration Interval to trigger endpoint BPF programs load check watchdog (default 30s) --endpoint-queue-size int Size of EventQueue per-endpoint (default 25) --endpoint-status strings Enable additional CiliumEndpoint status features (controllers,health,log,policy,state) @@ -333,6 +334,5 @@ cilium-agent [flags] ### SEE ALSO -* [cilium-agent completion](cilium-agent_completion.md) - Generate the autocompletion script for the specified shell -* [cilium-agent hive](cilium-agent_hive.md) - Inspect the hive - +* [cilium-agent completion](cilium-agent_completion.md) - Generate the autocompletion script for the specified shell +* [cilium-agent hive](cilium-agent_hive.md) - Inspect the hive diff --git a/Documentation/helm-values.rst b/Documentation/helm-values.rst index 2f7ab38c71176..0ed03b0068359 100644 --- a/Documentation/helm-values.rst +++ b/Documentation/helm-values.rst @@ -989,21 +989,25 @@ - string - ``"cilium-ipsec-keys"`` * - :spelling:ignore:`encryption.strictMode` - - Configure the WireGuard Pod2Pod strict mode. + - Configure the WireGuard strict mode. - object - - ``{"allowRemoteNodeIdentities":false,"cidr":"","enabled":false}`` + - ``{"allowRemoteNodeIdentities":true,"enabled":false,"nodeCIDRList":[],"podCIDRList":[]}`` * - :spelling:ignore:`encryption.strictMode.allowRemoteNodeIdentities` - - Allow dynamic lookup of remote node identities. This is required when tunneling is used or direct routing is used and the node CIDR and pod CIDR overlap. + - Allow dynamic lookup of remote node identities. This is required when tunneling is used or direct routing is used and the node CIDR and pod CIDR overlap. This is also required when control-plane nodes are exempted from node-to-node encryption. - bool - - ``false`` - * - :spelling:ignore:`encryption.strictMode.cidr` - - CIDR for the WireGuard Pod2Pod strict mode. - - string - - ``""`` + - ``true`` * - :spelling:ignore:`encryption.strictMode.enabled` - - Enable WireGuard Pod2Pod strict mode. + - Enable WireGuard strict mode. - bool - ``false`` + * - :spelling:ignore:`encryption.strictMode.nodeCIDRList` + - nodeCIDRList for the WireGuard strict mode. + - list + - ``[]`` + * - :spelling:ignore:`encryption.strictMode.podCIDRList` + - podCIDRList for the WireGuard strict mode. + - list + - ``[]`` * - :spelling:ignore:`encryption.type` - Encryption method. Can be either ipsec or wireguard. - string diff --git a/bpf/bpf_alignchecker.c b/bpf/bpf_alignchecker.c index 1f69e9c733c42..b3e13732bbd8b 100644 --- a/bpf/bpf_alignchecker.c +++ b/bpf/bpf_alignchecker.c @@ -85,3 +85,4 @@ add_type(struct tunnel_key); add_type(struct tunnel_value); add_type(struct auth_key); add_type(struct auth_info); +add_type(struct strict_mode_policy); diff --git a/bpf/bpf_host.c b/bpf/bpf_host.c index 16f932469d282..80a481f196969 100644 --- a/bpf/bpf_host.c +++ b/bpf/bpf_host.c @@ -1400,11 +1400,17 @@ int cil_to_netdev(struct __ctx_buff *ctx __maybe_unused) return send_drop_notify_error(ctx, 0, ret, CTX_ACT_DROP, METRIC_EGRESS); -#if defined(ENCRYPTION_STRICT_MODE) + /* We disable the WireGuard strict mode if the tunnel mode is enabled, + * since we have a check earlier in the datapath before encapsulation. + * We chose this approach so that we don't have to decapsulate the + * packet to check if the packet's original destination is allowed to + * be sent unencrypted. + */ +#if defined(ENCRYPTION_STRICT_MODE) && (!defined(TUNNEL_MODE) || defined(ENABLE_NODE_ENCRYPTION)) if (!strict_allow(ctx)) return send_drop_notify_error(ctx, 0, DROP_UNENCRYPTED_TRAFFIC, CTX_ACT_DROP, METRIC_EGRESS); -#endif /* ENCRYPTION_STRICT_MODE */ +#endif /* ENCRYPTION_STRICT_MODE && (!TUNNEL_MODE || ENABLE_NODE_ENCRYPTION) */ #endif /* ENABLE_WIREGUARD */ #ifdef ENABLE_HEALTH_CHECK diff --git a/bpf/lib/maps.h b/bpf/lib/maps.h index 8790f56ab16c2..611e545380051 100644 --- a/bpf/lib/maps.h +++ b/bpf/lib/maps.h @@ -183,6 +183,22 @@ struct { __uint(max_entries, 1); } ENCRYPT_MAP __section_maps_btf; +struct strict_mode_policy { + __u8 allow; + __u8 pad1; + __be16 port1; + __be16 port2; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __type(key, struct ipcache_key); + __type(value, struct strict_mode_policy); + __uint(pinning, LIBBPF_PIN_BY_NAME); + __uint(max_entries, STRICT_MAP_SIZE); + __uint(map_flags, BPF_F_NO_PREALLOC); +} STRICT_MODE_MAP __section_maps_btf; + struct node_key { __u16 pad1; __u8 pad2; diff --git a/bpf/lib/wireguard.h b/bpf/lib/wireguard.h index 9e19616646572..ed203f4dbd023 100644 --- a/bpf/lib/wireguard.h +++ b/bpf/lib/wireguard.h @@ -12,6 +12,8 @@ #include "tailcall.h" #include "common.h" #include "overloadable.h" +#include "maps.h" +#include "eps.h" static __always_inline int wg_maybe_redirect_to_encrypt(struct __ctx_buff *ctx) @@ -147,20 +149,43 @@ encrypt: __maybe_unused #ifdef ENCRYPTION_STRICT_MODE +static __always_inline __maybe_unused struct strict_mode_policy * +strict_lookup4(const void *map, __be32 addr, __u32 prefix, __u8 cluster_id) +{ + struct ipcache_key key = { + .lpm_key = { IPCACHE_PREFIX_LEN(prefix), {} }, + .cluster_id = cluster_id, + .family = ENDPOINT_KEY_IPV4, + .ip4 = addr, + }; + + key.ip4 &= GET_PREFIX(prefix); + return map_lookup_elem(map, &key); +} + /* strict_allow checks whether the packet is allowed to pass through the strict mode. */ static __always_inline bool strict_allow(struct __ctx_buff *ctx) { struct remote_endpoint_info __maybe_unused *dest_info, __maybe_unused *src_info; - bool __maybe_unused in_strict_cidr = false; + bool __maybe_unused src_in_cidr = false; + bool __maybe_unused dst_in_cidr = false; + struct strict_mode_policy __maybe_unused *entry = NULL; void *data, *data_end; #ifdef ENABLE_IPV4 struct iphdr *ip4; + struct tcphdr *tcph = NULL; + __u16 offset; #endif __u16 proto = 0; if (!validate_ethertype(ctx, &proto)) return true; +#ifdef ENABLE_NODE_ENCRYPTION + if ((ctx->mark & MARK_MAGIC_WG_ENCRYPTED) == MARK_MAGIC_WG_ENCRYPTED) + return true; +#endif /* ENABLE_NODE_ENCRYPTION */ + switch (proto) { #ifdef ENABLE_IPV4 case bpf_htons(ETH_P_IP): @@ -171,24 +196,39 @@ strict_allow(struct __ctx_buff *ctx) { * (1) When encapsulation is used and the destination is a remote pod. * (2) When the destination is a remote-node. */ +#ifndef ENABLE_NODE_ENCRYPTION if (ip4->saddr == IPV4_GATEWAY || ip4->saddr == IPV4_ENCRYPT_IFACE) return true; +#endif /* ENABLE_NODE_ENCRYPTION */ + + if (ip4->protocol == IPPROTO_TCP) { + offset = sizeof(struct ethhdr) + sizeof(struct iphdr); + if ((data + offset + sizeof(struct tcphdr)) > data_end) + return true; + tcph = (struct tcphdr *)(data + offset); + } + + entry = strict_lookup4(&STRICT_MODE_MAP, ip4->daddr, V4_CACHE_KEY_LEN, 0); + if (entry && entry->allow == 0) + dst_in_cidr = true; + if (entry && tcph && (tcph->dest == entry->port1 || tcph->dest == entry->port2)) + return true; + + entry = strict_lookup4(&STRICT_MODE_MAP, ip4->saddr, V4_CACHE_KEY_LEN, 0); + if (entry && entry->allow == 0) + src_in_cidr = true; + if (entry && tcph && (tcph->source == entry->port1 || tcph->source == entry->port2)) + return true; - in_strict_cidr = ipv4_is_in_subnet(ip4->daddr, - STRICT_IPV4_NET, - STRICT_IPV4_NET_SIZE); - in_strict_cidr &= ipv4_is_in_subnet(ip4->saddr, - STRICT_IPV4_NET, - STRICT_IPV4_NET_SIZE); -#if defined(TUNNEL_MODE) || defined(STRICT_IPV4_OVERLAPPING_CIDR) - /* Allow pod to remote-node communication */ +#ifdef ALLOW_REMOTE_NODE_IDENTITIES + /* Allow X to remote-node communication */ dest_info = lookup_ip4_remote_endpoint(ip4->daddr, 0); if (dest_info && dest_info->sec_identity && identity_is_node(dest_info->sec_identity)) return true; -#endif /* TUNNEL_MODE || STRICT_IPV4_OVERLAPPING_CIDR */ - return !in_strict_cidr; +#endif /* ALLOW_REMOTE_NODE_IDENTITIES */ + return !(src_in_cidr && dst_in_cidr); #endif /* ENABLE_IPV4 */ default: return true; diff --git a/bpf/node_config.h b/bpf/node_config.h index 9bf2bf5413248..7f0c46d6775fe 100644 --- a/bpf/node_config.h +++ b/bpf/node_config.h @@ -148,6 +148,7 @@ DEFINE_IPV6(HOST_IP, 0xbe, 0xef, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0xa, 0x #define AUTH_MAP test_cilium_auth #define CONFIG_MAP test_cilium_runtime_config #define IPCACHE_MAP test_cilium_ipcache +#define STRICT_MODE_MAP test_cilium_strict_mode_map #define NODE_MAP test_cilium_node_map #define ENCRYPT_MAP test_cilium_encrypt_state #define L2_RESPONDER_MAP4 test_cilium_l2_responder_v4 @@ -187,6 +188,7 @@ DEFINE_IPV6(HOST_IP, 0xbe, 0xef, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0xa, 0x #define POLICY_MAP_SIZE 16384 #define AUTH_MAP_SIZE 512000 #define CONFIG_MAP_SIZE 256 +#define STRICT_MAP_SIZE 5 #define IPCACHE_MAP_SIZE 512000 #define NODE_MAP_SIZE 16384 #define EGRESS_POLICY_MAP_SIZE 16384 diff --git a/daemon/cmd/daemon.go b/daemon/cmd/daemon.go index 7701ecfd66dec..ead2b6f13e508 100644 --- a/daemon/cmd/daemon.go +++ b/daemon/cmd/daemon.go @@ -404,6 +404,10 @@ func newDaemon(ctx context.Context, cleaner *daemonCleanup, params *daemonParams } lbmap.Init(lbmapInitParams) + if err := setupStrictModeMap(params.LocalNodeStore); err != nil { + return nil, nil, fmt.Errorf("unable to setup strict map: %s", err) + } + params.NodeManager.Subscribe(params.Datapath.Node()) identity.IterateReservedIdentities(func(_ identity.NumericIdentity, _ *identity.Identity) { @@ -1000,7 +1004,7 @@ func newDaemon(ctx context.Context, cleaner *daemonCleanup, params *daemonParams // controller is to ensure that endpoints and host IPs entries are // reinserted to the bpf maps if they are ever removed from them. syncErrs := make(chan error, 1) - var syncHostIPsControllerGroup = controller.NewGroup("sync-host-ips") + syncHostIPsControllerGroup := controller.NewGroup("sync-host-ips") d.controllers.UpdateController( syncHostIPsController, controller.ControllerParams{ diff --git a/daemon/cmd/daemon_main.go b/daemon/cmd/daemon_main.go index 56a95eebe73aa..8f0ee2634233d 100644 --- a/daemon/cmd/daemon_main.go +++ b/daemon/cmd/daemon_main.go @@ -402,8 +402,11 @@ func InitGlobalFlags(cmd *cobra.Command, vp *viper.Viper) { flags.Bool(option.EnableEncryptionStrictMode, false, "Enable encryption strict mode") option.BindEnv(vp, option.EnableEncryptionStrictMode) - flags.String(option.EncryptionStrictModeCIDR, "", "In strict-mode encryption, all unencrypted traffic coming from this CIDR and going to this same CIDR will be dropped") - option.BindEnv(vp, option.EncryptionStrictModeCIDR) + flags.StringSlice(option.EncryptionStrictModeNodeCIDRs, []string{}, "In strict-mode encryption, all unencrypted traffic coming from one of those CIDRs and going one of those CIDRs will be dropped") + option.BindEnv(vp, option.EncryptionStrictModeNodeCIDRs) + + flags.StringSlice(option.EncryptionStrictModePodCIDRs, []string{}, "In strict-mode encryption, all unencrypted traffic coming from one of those CIDRs and going one of those CIDRs will be dropped") + option.BindEnv(vp, option.EncryptionStrictModePodCIDRs) flags.Bool(option.EncryptionStrictModeAllowRemoteNodeIdentities, false, "Allows unencrypted traffic from pods to remote node identities within the strict mode CIDR. This is required when tunneling is used or direct routing is used and the node CIDR and pod CIDR overlap.") option.BindEnv(vp, option.EncryptionStrictModeAllowRemoteNodeIdentities) diff --git a/daemon/cmd/datapath.go b/daemon/cmd/datapath.go index b4f6df66c32de..abaf648b5c7e1 100644 --- a/daemon/cmd/datapath.go +++ b/daemon/cmd/datapath.go @@ -4,6 +4,7 @@ package cmd import ( + "context" "fmt" "net" "net/netip" @@ -12,6 +13,7 @@ import ( "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" + k8sLabels "k8s.io/apimachinery/pkg/labels" "github.com/cilium/cilium/pkg/cidr" "github.com/cilium/cilium/pkg/datapath/linux/linux_defaults" @@ -35,6 +37,7 @@ import ( "github.com/cilium/cilium/pkg/maps/neighborsmap" "github.com/cilium/cilium/pkg/maps/policymap" "github.com/cilium/cilium/pkg/maps/srv6map" + "github.com/cilium/cilium/pkg/maps/strictmap" "github.com/cilium/cilium/pkg/maps/tunnel" "github.com/cilium/cilium/pkg/maps/vtep" "github.com/cilium/cilium/pkg/maps/worldcidrsmap" @@ -80,7 +83,6 @@ func clearCiliumVeths() error { } return -1 }) - if err != nil { return fmt.Errorf("unable to retrieve host network interfaces: %s", err) } @@ -469,6 +471,59 @@ func (d *Daemon) initMaps() error { return nil } +func setupStrictModeMap(lns *node.LocalNodeStore) error { + if err := strictmap.Create(); err != nil { + return fmt.Errorf("initializing strict mode map: %w", err) + } + + strictCIDRs := append(option.Config.EncryptionStrictModeNodeCIDRs, option.Config.EncryptionStrictModePodCIDRs...) + for _, cidr := range strictCIDRs { + + ipv4Interface, ok := netip.AddrFromSlice(node.GetIPv4().To4()) + if !ok { + return fmt.Errorf("unable to parse node IPv4 address %s", node.GetIPv4()) + } + if cidr.Contains(ipv4Interface) && !option.Config.NodeEncryptionEnabled() { + if !option.Config.EncryptionStrictModeAllowRemoteNodeIdentities { + return fmt.Errorf(`encryption strict mode is enabled but the node's IPv4 address is within the strict CIDR range. + This will cause the node to drop all traffic. + Please either disable encryption or set --encryption-strict-mode-allow-dynamic-lookup=true`) + } + } + + if err := strictmap.UpdateContext(cidr, 0, 0, 0, 0); err != nil { + return fmt.Errorf("updating strict mode map: %w", err) + } + } + + // Add the default match to the trie map. + // If this prefix is matched, then the packet is allowed to pass unencrypted as indicated by the "1" as a value. + if err := strictmap.UpdateContext(netip.MustParsePrefix("0.0.0.0/0"), 0, 1, 0, 0); err != nil { + return fmt.Errorf("updating strict mode map: %w", err) + } + + // Allow etcd ports only on control plane nodes + sel, err := k8sLabels.Parse("node-role.kubernetes.io/control-plane") + if err != nil { + return fmt.Errorf("unable to parse control plane label selector: %w", err) + } + + localNode, err := lns.Get(context.Background()) + if err != nil { + return fmt.Errorf("unable to get local node: %w", err) + } + + if sel.Matches(k8sLabels.Set(localNode.Labels)) { + for _, nodeCIDR := range option.Config.EncryptionStrictModeNodeCIDRs { + if err := strictmap.UpdateContext(nodeCIDR, 0, 0, 2379, 2380); err != nil { + return fmt.Errorf("updating strict mode map: %w", err) + } + } + } + + return nil +} + func setupVTEPMapping() error { for i, ep := range option.Config.VtepEndpoints { log.WithFields(logrus.Fields{ @@ -482,7 +537,6 @@ func setupVTEPMapping() error { } return nil - } func setupRouteToVtepCidr() error { diff --git a/install/kubernetes/cilium/README.md b/install/kubernetes/cilium/README.md index bdbca6edc8de6..21f7633a0db1b 100644 --- a/install/kubernetes/cilium/README.md +++ b/install/kubernetes/cilium/README.md @@ -297,10 +297,11 @@ contributors across the globe, there is almost always someone available to help. | encryption.mountPath | string | `"/etc/ipsec"` | Deprecated in favor of encryption.ipsec.mountPath. To be removed in 1.15. Path to mount the secret inside the Cilium pod. This option is only effective when encryption.type is set to ipsec. | | encryption.nodeEncryption | bool | `false` | Enable encryption for pure node to node traffic. This option is only effective when encryption.type is set to "wireguard". | | encryption.secretName | string | `"cilium-ipsec-keys"` | Deprecated in favor of encryption.ipsec.secretName. To be removed in 1.15. Name of the Kubernetes secret containing the encryption keys. This option is only effective when encryption.type is set to ipsec. | -| encryption.strictMode | object | `{"allowRemoteNodeIdentities":false,"cidr":"","enabled":false}` | Configure the WireGuard Pod2Pod strict mode. | -| encryption.strictMode.allowRemoteNodeIdentities | bool | `false` | Allow dynamic lookup of remote node identities. This is required when tunneling is used or direct routing is used and the node CIDR and pod CIDR overlap. | -| encryption.strictMode.cidr | string | `""` | CIDR for the WireGuard Pod2Pod strict mode. | -| encryption.strictMode.enabled | bool | `false` | Enable WireGuard Pod2Pod strict mode. | +| encryption.strictMode | object | `{"allowRemoteNodeIdentities":true,"enabled":false,"nodeCIDRList":[],"podCIDRList":[]}` | Configure the WireGuard strict mode. | +| encryption.strictMode.allowRemoteNodeIdentities | bool | `true` | Allow dynamic lookup of remote node identities. This is required when tunneling is used or direct routing is used and the node CIDR and pod CIDR overlap. This is also required when control-plane nodes are exempted from node-to-node encryption. | +| encryption.strictMode.enabled | bool | `false` | Enable WireGuard strict mode. | +| encryption.strictMode.nodeCIDRList | list | `[]` | nodeCIDRList for the WireGuard strict mode. | +| encryption.strictMode.podCIDRList | list | `[]` | podCIDRList for the WireGuard strict mode. | | encryption.type | string | `"ipsec"` | Encryption method. Can be either ipsec or wireguard. | | encryption.wireguard.persistentKeepalive | string | `"0s"` | Controls Wireguard PersistentKeepalive option. Set 0s to disable. | | encryption.wireguard.userspaceFallback | bool | `false` | Enables the fallback to the user-space implementation. | diff --git a/install/kubernetes/cilium/templates/cilium-configmap.yaml b/install/kubernetes/cilium/templates/cilium-configmap.yaml index 826b98ed64c44..b67e503924e75 100644 --- a/install/kubernetes/cilium/templates/cilium-configmap.yaml +++ b/install/kubernetes/cilium/templates/cilium-configmap.yaml @@ -606,7 +606,9 @@ data: {{- if .Values.encryption.strictMode.enabled }} enable-encryption-strict-mode: {{ .Values.encryption.strictMode.enabled | quote }} - encryption-strict-mode-cidr: {{ .Values.encryption.strictMode.cidr | quote }} + encryption-strict-mode-node-cidrs: {{ .Values.encryption.strictMode.nodeCIDRList | join " " | quote }} + + encryption-strict-mode-pod-cidrs: {{ .Values.encryption.strictMode.podCIDRList | join " " | quote }} encryption-strict-mode-allow-remote-node-identities: {{ .Values.encryption.strictMode.allowRemoteNodeIdentities | quote }} {{- end }} diff --git a/install/kubernetes/cilium/values.yaml b/install/kubernetes/cilium/values.yaml index b36ad34e70a71..712d981a79248 100644 --- a/install/kubernetes/cilium/values.yaml +++ b/install/kubernetes/cilium/values.yaml @@ -803,17 +803,21 @@ encryption: # This option is only effective when encryption.type is set to "wireguard". nodeEncryption: false - # -- Configure the WireGuard Pod2Pod strict mode. + # -- Configure the WireGuard strict mode. strictMode: - # -- Enable WireGuard Pod2Pod strict mode. + # -- Enable WireGuard strict mode. enabled: false - # -- CIDR for the WireGuard Pod2Pod strict mode. - cidr: "" + # -- podCIDRList for the WireGuard strict mode. + podCIDRList: [] + + # -- nodeCIDRList for the WireGuard strict mode. + nodeCIDRList: [] # -- Allow dynamic lookup of remote node identities. # This is required when tunneling is used or direct routing is used and the node CIDR and pod CIDR overlap. - allowRemoteNodeIdentities: false + # This is also required when control-plane nodes are exempted from node-to-node encryption. + allowRemoteNodeIdentities: true ipsec: # -- Name of the key file inside the Kubernetes secret configured via secretName. diff --git a/install/kubernetes/cilium/values.yaml.tmpl b/install/kubernetes/cilium/values.yaml.tmpl index 92a7ad7597b9a..2f55aa49a9fb0 100644 --- a/install/kubernetes/cilium/values.yaml.tmpl +++ b/install/kubernetes/cilium/values.yaml.tmpl @@ -800,17 +800,21 @@ encryption: # This option is only effective when encryption.type is set to "wireguard". nodeEncryption: false - # -- Configure the WireGuard Pod2Pod strict mode. + # -- Configure the WireGuard strict mode. strictMode: - # -- Enable WireGuard Pod2Pod strict mode. + # -- Enable WireGuard strict mode. enabled: false - # -- CIDR for the WireGuard Pod2Pod strict mode. - cidr: "" + # -- podCIDRList for the WireGuard strict mode. + podCIDRList: [] + + # -- nodeCIDRList for the WireGuard strict mode. + nodeCIDRList: [] # -- Allow dynamic lookup of remote node identities. # This is required when tunneling is used or direct routing is used and the node CIDR and pod CIDR overlap. - allowRemoteNodeIdentities: false + # This is also required when control-plane nodes are exempted from node-to-node encryption. + allowRemoteNodeIdentities: true ipsec: # -- Name of the key file inside the Kubernetes secret configured via secretName. diff --git a/pkg/datapath/linux/config/config.go b/pkg/datapath/linux/config/config.go index e89ef3454c4ac..b8ffe1f92c3a7 100644 --- a/pkg/datapath/linux/config/config.go +++ b/pkg/datapath/linux/config/config.go @@ -12,7 +12,6 @@ import ( "fmt" "io" "net" - "net/netip" "sort" "strconv" "strings" @@ -52,6 +51,7 @@ import ( "github.com/cilium/cilium/pkg/maps/recorder" "github.com/cilium/cilium/pkg/maps/signalmap" "github.com/cilium/cilium/pkg/maps/srv6map" + "github.com/cilium/cilium/pkg/maps/strictmap" "github.com/cilium/cilium/pkg/maps/tunnel" "github.com/cilium/cilium/pkg/maps/vtep" "github.com/cilium/cilium/pkg/maps/worldcidrsmap" @@ -182,6 +182,8 @@ func (h *HeaderfileWriter) WriteNodeConfig(w io.Writer, cfg *datapath.LocalNodeC cDefinesMap["CONFIG_MAP_SIZE"] = fmt.Sprintf("%d", configmap.MaxEntries) cDefinesMap["IPCACHE_MAP"] = ipcachemap.Name cDefinesMap["IPCACHE_MAP_SIZE"] = fmt.Sprintf("%d", ipcachemap.MaxEntries) + cDefinesMap["STRICT_MODE_MAP"] = strictmap.Name + cDefinesMap["STRICT_MAP_SIZE"] = fmt.Sprintf("%d", strictmap.MaxEntries) cDefinesMap["NODE_MAP"] = nodemap.MapName cDefinesMap["NODE_MAP_SIZE"] = fmt.Sprintf("%d", nodemap.MaxEntries) cDefinesMap["SRV6_VRF_MAP4"] = srv6map.VRFMapName4 @@ -304,24 +306,10 @@ func (h *HeaderfileWriter) WriteNodeConfig(w io.Writer, cfg *datapath.LocalNodeC if option.Config.EnableEncryptionStrictMode { cDefinesMap["ENCRYPTION_STRICT_MODE"] = "1" - // when parsing the user input we only accept ipv4 addresses - cDefinesMap["STRICT_IPV4_NET"] = fmt.Sprintf("%#x", byteorder.NetIPAddrToHost32(option.Config.EncryptionStrictModeCIDR.Addr())) - cDefinesMap["STRICT_IPV4_NET_SIZE"] = fmt.Sprintf("%d", option.Config.EncryptionStrictModeCIDR.Bits()) - cDefinesMap["IPV4_ENCRYPT_IFACE"] = fmt.Sprintf("%#x", byteorder.NetIPv4ToHost32(node.GetIPv4())) - ipv4Interface, ok := netip.AddrFromSlice(node.GetIPv4().To4()) - if !ok { - return fmt.Errorf("unable to parse node IPv4 address %s", node.GetIPv4()) - } - - if option.Config.EncryptionStrictModeCIDR.Contains(ipv4Interface) { - if !option.Config.EncryptionStrictModeAllowRemoteNodeIdentities { - return fmt.Errorf(`encryption strict mode is enabled but the node's IPv4 address is within the strict CIDR range. - This will cause the node to drop all traffic. - Please either disable encryption or set --encryption-strict-mode-allow-dynamic-lookup=true`) - } - cDefinesMap["STRICT_IPV4_OVERLAPPING_CIDR"] = "1" + if option.Config.EncryptionStrictModeAllowRemoteNodeIdentities { + cDefinesMap["ALLOW_REMOTE_NODE_IDENTITIES"] = "1" } } diff --git a/pkg/datapath/loader/cache.go b/pkg/datapath/loader/cache.go index 769e494bd2877..f1c9c79a9ff9c 100644 --- a/pkg/datapath/loader/cache.go +++ b/pkg/datapath/loader/cache.go @@ -51,6 +51,7 @@ var ignoredELFPrefixes = []string{ "cilium_runtime_config", // Global "cilium_signals", // Global "cilium_snat", // All SNAT maps + "cilium_strict", // Global "cilium_tail_call_buffer", // Global "cilium_tunnel", // Global "cilium_ipv4_frag_datagrams", // Global diff --git a/pkg/maps/strictmap/doc.go b/pkg/maps/strictmap/doc.go new file mode 100644 index 0000000000000..5942c0525a4ab --- /dev/null +++ b/pkg/maps/strictmap/doc.go @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Cilium + +// +groupName=maps +package strictmap diff --git a/pkg/maps/strictmap/strict.go b/pkg/maps/strictmap/strict.go new file mode 100644 index 0000000000000..17fbf1b78e5dc --- /dev/null +++ b/pkg/maps/strictmap/strict.go @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Cilium + +package strictmap + +import ( + "fmt" + "net" + "net/netip" + "sync" + "unsafe" + + "github.com/cilium/cilium/pkg/bpf" + "github.com/cilium/cilium/pkg/byteorder" + cmtypes "github.com/cilium/cilium/pkg/clustermesh/types" + "github.com/cilium/cilium/pkg/ebpf" + "github.com/cilium/cilium/pkg/option" + "github.com/cilium/cilium/pkg/types" +) + +const ( + // MaxEntries is the maximum number of keys that can be present in the + // StrictModeMap. + MaxEntries = 5 + + // Name is the canonical name for the StrictModeMap on the filesystem. + Name = "cilium_strict" +) + +// key implements the bpf.MapKey interface. +// +// Must be in sync with struct ipcache_key in +type key struct { + Prefixlen uint32 `align:"lpm_key"` + Pad1 uint16 `align:"pad1"` + ClusterID uint8 `align:"cluster_id"` + Family uint8 `align:"family"` + // represents both IPv6 and IPv4 (in the lowest four bytes) + IP types.IPv6 `align:"$union0"` +} + +type value struct { + Allow uint8 `align:"allow"` + Pad1 uint8 `align:"pad1"` + Port1 uint16 `align:"port1"` + Port2 uint16 `align:"port2"` +} + +const ( + sizeofStrictKey = int(unsafe.Sizeof(key{})) + sizeofPrefixlen = int(unsafe.Sizeof(key{}.Prefixlen)) + sizeofIP = int(unsafe.Sizeof(key{}.IP)) + + staticPrefixBits = uint32(sizeofStrictKey-sizeofPrefixlen-sizeofIP) * 8 +) + +func (k key) String() string { + var ( + addr netip.Addr + ok bool + ) + + switch k.Family { + case bpf.EndpointKeyIPv4: + addr, ok = netip.AddrFromSlice(k.IP[:net.IPv4len]) + if !ok { + return "" + } + case bpf.EndpointKeyIPv6: + addr = netip.AddrFrom16(k.IP) + default: + return "" + } + + prefixLen := int(k.Prefixlen - staticPrefixBits) + clusterID := uint32(k.ClusterID) + + return cmtypes.PrefixClusterFrom(addr, prefixLen, cmtypes.WithClusterID(clusterID)).String() +} + +func (k *key) New() bpf.MapKey { return &key{} } + +// getPrefixLen determines the length that should be set inside the Key so that +// the lookup prefix is correct in the BPF map key. The specified 'prefixBits' +// indicates the number of bits in the IP that must match to match the entry in +// the BPF StrictModeMap. +func getPrefixLen(ipPrefixBits int) uint32 { + return staticPrefixBits + uint32(ipPrefixBits) +} + +// newKey returns an Key based on the provided CIDR and ClusterID. +// The address family is automatically detected +func newKey(ip netip.Prefix, clusterID uint8) (*key, error) { + result := key{} + + ones := ip.Bits() + if ones == -1 { + return nil, fmt.Errorf("invalid IP address: %s", ip) + } + + if ip.Addr().Is4() { + ipv4 := ip.Addr().As4() + result.Family = bpf.EndpointKeyIPv4 + copy(result.IP[:], ipv4[:]) + } else if ip.Addr().Is6() { + ipv6 := ip.Addr().As16() + result.Family = bpf.EndpointKeyIPv6 + copy(result.IP[:], ipv6[:]) + } else { + return nil, fmt.Errorf("invalid IP address: %s", ip) + } + + result.Prefixlen = getPrefixLen(ones) + result.ClusterID = clusterID + + return &result, nil +} + +func (v *value) String() string { + return fmt.Sprintf("allowed=%d, AllowPorts=%d,%d", v.Allow, byteorder.NetworkToHost16(v.Port1), byteorder.NetworkToHost16(v.Port2)) +} + +func (v *value) New() bpf.MapValue { return &value{} } + +var ( + // The StrictModeMap is a mapping of all CIDRs in the cluster to whether + // strict encryption should be enforced. + // It is a singleton; there is only one such map per agent. + strict *bpf.Map + once = &sync.Once{} +) + +// Create will create a strict map +func Create() error { + once.Do(func() { + strict = bpf.NewMap( + Name, + ebpf.LPMTrie, + &key{}, + &value{}, + MaxEntries, + bpf.BPF_F_NO_PREALLOC, + ).WithCache(). + WithEvents(option.Config.GetEventBufferConfig(Name)) + }) + + return strict.OpenOrCreate() +} + +// UpdateContext updates the encrypt state with ctxID to use the new keyID +func UpdateContext(ip netip.Prefix, clusterID uint8, allow uint8, port1 uint16, port2 uint16) error { + k, err := newKey(ip, clusterID) + if err != nil { + return err + } + + v := &value{ + Allow: allow, + Port1: byteorder.HostToNetwork16(port1), + Port2: byteorder.HostToNetwork16(port2), + } + + return strict.Update(k, v) +} diff --git a/pkg/option/config.go b/pkg/option/config.go index 63375f038d9fb..b6511b68d024e 100644 --- a/pkg/option/config.go +++ b/pkg/option/config.go @@ -754,8 +754,11 @@ const ( // EnableEncryptionStrictMode is the name of the option to enable strict encryption mode. EnableEncryptionStrictMode = "enable-encryption-strict-mode" - // EncryptionStrictModeCIDR is the CIDR in which the strict ecryption mode should be enforced. - EncryptionStrictModeCIDR = "encryption-strict-mode-cidr" + // EncryptionStrictModeNodeCIDRs are CIDRs in which the strict ecryption mode should be enforced. + EncryptionStrictModeNodeCIDRs = "encryption-strict-mode-node-cidrs" + + // EncryptionStrictModePodCIDRs are CIDRs in which the strict ecryption mode should be enforced. + EncryptionStrictModePodCIDRs = "encryption-strict-mode-pod-cidrs" // EncryptionStrictModeAllowRemoteNodeIdentities allows dynamic lookup of remote node identities. // This is required when tunneling is used @@ -1702,8 +1705,11 @@ type DaemonConfig struct { // EnableEncryptionStrictMode enables strict mode for encryption EnableEncryptionStrictMode bool - // EncryptionStrictModeCIDR is the CIDR to use for strict mode - EncryptionStrictModeCIDR netip.Prefix + // EncryptionStrictModeNodeCIDRs are the CIDRs to use for strict mode + EncryptionStrictModeNodeCIDRs []netip.Prefix + + // EncryptionStrictModePodCIDRs are the CIDRs to use for strict mode + EncryptionStrictModePodCIDRs []netip.Prefix // EncryptionStrictModeAllowRemoteNodeIdentities allows dynamic lookup of node identities. // This is required when tunneling is used @@ -3218,14 +3224,30 @@ func (c *DaemonConfig) Populate(vp *viper.Viper) { log.Warnf("WireGuard encryption strict mode only support IPv4. IPv6 traffic is not protected and can be leaked.") } - strictCIDR := vp.GetString(EncryptionStrictModeCIDR) - c.EncryptionStrictModeCIDR, err = netip.ParsePrefix(strictCIDR) - if err != nil { - log.WithError(err).Fatalf("Cannot parse CIDR %s from --%s option", strictCIDR, EncryptionStrictModeCIDR) + strictCIDRs := vp.GetStringSlice(EncryptionStrictModeNodeCIDRs) + for _, strictCIDR := range strictCIDRs { + cdir, err := netip.ParsePrefix(strictCIDR) + if err != nil { + log.WithError(err).Fatalf("Cannot parse CIDR %s from --%s option", strictCIDR, EncryptionStrictModeNodeCIDRs) + } + if !cdir.Addr().Is4() { + log.Fatalf("%s must be an IPv4 CIDR", cdir) + } + + c.EncryptionStrictModeNodeCIDRs = append(c.EncryptionStrictModeNodeCIDRs, cdir) } - if !c.EncryptionStrictModeCIDR.Addr().Is4() { - log.Fatalf("%s must be an IPv4 CIDR", EncryptionStrictModeCIDR) + strictCIDRs = vp.GetStringSlice(EncryptionStrictModePodCIDRs) + for _, strictCIDR := range strictCIDRs { + cdir, err := netip.ParsePrefix(strictCIDR) + if err != nil { + log.WithError(err).Fatalf("Cannot parse CIDR %s from --%s option", strictCIDR, EncryptionStrictModeNodeCIDRs) + } + if !cdir.Addr().Is4() { + log.Fatalf("%s must be an IPv4 CIDR", cdir) + } + + c.EncryptionStrictModePodCIDRs = append(c.EncryptionStrictModePodCIDRs, cdir) } c.EncryptionStrictModeAllowRemoteNodeIdentities = vp.GetBool(EncryptionStrictModeAllowRemoteNodeIdentities)