From 333fca2470a517789f508a9d6612878846cb9f9f Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 5 Jan 2024 14:40:25 +0000 Subject: [PATCH 1/3] net: fix network unlock with iptables-nft When iptables-nft is used as backend for iptables, the rules for network locking are translated into the following nft rules: ``` $ iptables-restore-translate -f lock.txt add table ip filter add chain ip filter CRIU insert rule ip filter INPUT counter jump CRIU insert rule ip filter OUTPUT counter jump CRIU add rule ip filter CRIU mark 0xc114 counter accept add rule ip filter CRIU counter drop ``` These rules create the following chains: ``` table ip filter { # handle 1 chain CRIU { # handle 1 meta mark 0x0000c114 counter packets 16 bytes 890 accept # handle 6 counter packets 1 bytes 60 drop # handle 7 meta mark 0x0000c114 counter packets 0 bytes 0 accept # handle 8 counter packets 0 bytes 0 drop # handle 9 } chain INPUT { # handle 2 type filter hook input priority filter; policy accept; counter packets 8 bytes 445 jump CRIU # handle 3 counter packets 0 bytes 0 jump CRIU # handle 10 } chain OUTPUT { # handle 4 type filter hook output priority filter; policy accept; counter packets 9 bytes 505 jump CRIU # handle 5 counter packets 0 bytes 0 jump CRIU # handle 11 } } ``` In order to delete the CRIU chain, we need to first delete all four jump targets. Otherwise, `-X CRIU` would fail with the following error: iptables-restore v1.8.10 (nf_tables): line 5: CHAIN_DEL failed (Resource busy): chain CRIU Reported-by: Andrei Vagin Signed-off-by: Radostin Stoyanov --- criu/net.c | 50 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/criu/net.c b/criu/net.c index 7109e6876a..b34c379bab 100644 --- a/criu/net.c +++ b/criu/net.c @@ -3178,19 +3178,53 @@ static inline int nftables_network_unlock(void) #endif } +static int iptables_has_criu_jump_target(void) +{ + int fd, ret; + char *argv[4] = { "sh", "-c", "iptables -C INPUT -j CRIU", NULL }; + + fd = open("/dev/null", O_RDWR); + if (fd < 0) { + fd = -1; + pr_perror("failed to open /dev/null, using log fd"); + } + + ret = cr_system(fd, fd, fd, "sh", argv, CRS_CAN_FAIL); + close_safe(&fd); + return ret; +} + static int iptables_network_unlock_internal(void) { - char conf[] = "*filter\n" - ":CRIU - [0:0]\n" - "-D INPUT -j CRIU\n" - "-D OUTPUT -j CRIU\n" - "-X CRIU\n" - "COMMIT\n"; + char delete_jump_targets[] = "*filter\n" + ":CRIU - [0:0]\n" + "-D INPUT -j CRIU\n" + "-D OUTPUT -j CRIU\n" + "COMMIT\n"; + + char delete_criu_chain[] = "*filter\n" + ":CRIU - [0:0]\n" + "-X CRIU\n" + "COMMIT\n"; + int ret = 0; - ret |= iptables_restore(false, conf, sizeof(conf) - 1); + ret |= iptables_restore(false, delete_jump_targets, sizeof(delete_jump_targets) - 1); if (kdat.ipv6) - ret |= iptables_restore(true, conf, sizeof(conf) - 1); + ret |= iptables_restore(true, delete_jump_targets, sizeof(delete_jump_targets) - 1); + + /* For compatibility with iptables-nft backend, we need to make sure that all jump + * targets have been removed before deleting the CRIU chain. + */ + if (!iptables_has_criu_jump_target()) { + ret |= iptables_restore(false, delete_jump_targets, sizeof(delete_jump_targets) - 1); + if (kdat.ipv6) + ret |= iptables_restore(true, delete_jump_targets, sizeof(delete_jump_targets) - 1); + } + + ret |= iptables_restore(false, delete_criu_chain, sizeof(delete_criu_chain) - 1); + if (kdat.ipv6) + ret |= iptables_restore(true, delete_criu_chain, sizeof(delete_criu_chain) - 1); return ret; } From 120cd22a10d72c49dfa101890a6e2685f36d0333 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 5 Jan 2024 18:07:59 +0000 Subject: [PATCH 2/3] test/nfconntrack: use nft or iptables-legacy nft does not support xtables compat expressions https://git.netfilter.org/nftables/commit/?id=79195a8cc9e9d9cf2d17165bf07ac4cc9d55539f Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.alpine | 1 + test/zdtm/static/Makefile | 8 +++--- ...nntrack.c => socket-tcp-ipt-nfconntrack.c} | 0 .../static/socket-tcp-ipt-nfconntrack.desc | 6 +++++ test/zdtm/static/socket-tcp-nfconntrack.desc | 1 - test/zdtm/static/socket-tcp-nft-nfconntrack.c | 1 + .../static/socket-tcp-nft-nfconntrack.desc | 7 +++++ test/zdtm/static/socket-tcp.c | 27 ++++++++++++++++--- 8 files changed, 44 insertions(+), 7 deletions(-) rename test/zdtm/static/{socket-tcp-nfconntrack.c => socket-tcp-ipt-nfconntrack.c} (100%) create mode 100644 test/zdtm/static/socket-tcp-ipt-nfconntrack.desc delete mode 100644 test/zdtm/static/socket-tcp-nfconntrack.desc create mode 120000 test/zdtm/static/socket-tcp-nft-nfconntrack.c create mode 100644 test/zdtm/static/socket-tcp-nft-nfconntrack.desc diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 593e190315..2c58c910e7 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -33,6 +33,7 @@ RUN make mrproper && date && make -j $(nproc) CC="$CC" && date RUN apk add \ ip6tables \ iptables \ + iptables-legacy \ nftables \ iproute2 \ tar \ diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 07d3bc6e21..fb856d55b4 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -85,7 +85,8 @@ TST_NOFILE := \ socket-tcp4v6 \ socket-tcp-local \ socket-tcp-reuseport \ - socket-tcp-nfconntrack \ + socket-tcp-ipt-nfconntrack \ + socket-tcp-nft-nfconntrack \ socket-tcp6-local \ socket-tcp4v6-local \ socket-tcpbuf \ @@ -277,7 +278,7 @@ pkg-config-check = $(shell sh -c '$(PKG_CONFIG) $(1) && echo y') ifeq ($(call pkg-config-check,libbpf),y) TST_NOFILE += \ bpf_hash \ - bpf_array + bpf_array endif ifneq ($(ARCH),arm) @@ -598,7 +599,8 @@ socket-tcpbuf6-local: CFLAGS += -D ZDTM_TCP_LOCAL -D ZDTM_IPV6 socket-tcp6-local: CFLAGS += -D ZDTM_TCP_LOCAL -D ZDTM_IPV6 socket-tcp4v6-local: CFLAGS += -D ZDTM_TCP_LOCAL -D ZDTM_IPV4V6 socket-tcp-local: CFLAGS += -D ZDTM_TCP_LOCAL -socket-tcp-nfconntrack: CFLAGS += -D ZDTM_TCP_LOCAL -DZDTM_CONNTRACK +socket-tcp-ipt-nfconntrack: CFLAGS += -D ZDTM_TCP_LOCAL -DZDTM_IPT_CONNTRACK +socket-tcp-nft-nfconntrack: CFLAGS += -D ZDTM_TCP_LOCAL -DZDTM_NFT_CONNTRACK socket_listen6: CFLAGS += -D ZDTM_IPV6 socket_listen4v6: CFLAGS += -D ZDTM_IPV4V6 socket-tcp6-closed: CFLAGS += -D ZDTM_IPV6 diff --git a/test/zdtm/static/socket-tcp-nfconntrack.c b/test/zdtm/static/socket-tcp-ipt-nfconntrack.c similarity index 100% rename from test/zdtm/static/socket-tcp-nfconntrack.c rename to test/zdtm/static/socket-tcp-ipt-nfconntrack.c diff --git a/test/zdtm/static/socket-tcp-ipt-nfconntrack.desc b/test/zdtm/static/socket-tcp-ipt-nfconntrack.desc new file mode 100644 index 0000000000..53dd822854 --- /dev/null +++ b/test/zdtm/static/socket-tcp-ipt-nfconntrack.desc @@ -0,0 +1,6 @@ +{ + 'feature': 'has_ipt_legacy', + 'flavor': 'h', + 'opts': '--tcp-established', + 'flags': 'suid' +} diff --git a/test/zdtm/static/socket-tcp-nfconntrack.desc b/test/zdtm/static/socket-tcp-nfconntrack.desc deleted file mode 100644 index add2513f81..0000000000 --- a/test/zdtm/static/socket-tcp-nfconntrack.desc +++ /dev/null @@ -1 +0,0 @@ -{'flavor': 'h', 'opts': '--tcp-established', 'flags': 'suid'} diff --git a/test/zdtm/static/socket-tcp-nft-nfconntrack.c b/test/zdtm/static/socket-tcp-nft-nfconntrack.c new file mode 120000 index 0000000000..8cb60dd03a --- /dev/null +++ b/test/zdtm/static/socket-tcp-nft-nfconntrack.c @@ -0,0 +1 @@ +socket-tcp.c \ No newline at end of file diff --git a/test/zdtm/static/socket-tcp-nft-nfconntrack.desc b/test/zdtm/static/socket-tcp-nft-nfconntrack.desc new file mode 100644 index 0000000000..38a4eb3897 --- /dev/null +++ b/test/zdtm/static/socket-tcp-nft-nfconntrack.desc @@ -0,0 +1,7 @@ +{ + 'flavor': 'h', + 'feature': 'network_lock_nftables', + 'opts': '--tcp-established', + 'dopts': '--network-lock nftables', + 'flags': 'suid' +} diff --git a/test/zdtm/static/socket-tcp.c b/test/zdtm/static/socket-tcp.c index f6ef473853..9830c7860a 100644 --- a/test/zdtm/static/socket-tcp.c +++ b/test/zdtm/static/socket-tcp.c @@ -67,17 +67,38 @@ int main(int argc, char **argv) int val; socklen_t optlen; -#ifdef ZDTM_CONNTRACK +#ifdef ZDTM_IPT_CONNTRACK if (unshare(CLONE_NEWNET)) { pr_perror("unshare"); return 1; } if (system("ip link set up dev lo")) return 1; - if (system("iptables -w -A INPUT -i lo -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT")) + + if (system("iptables-legacy -w -A INPUT -i lo -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT")) + return 1; + if (system("iptables-legacy -w -A INPUT -j DROP")) + return 1; + +#endif + +#ifdef ZDTM_NFT_CONNTRACK + if (unshare(CLONE_NEWNET)) { + pr_perror("unshare"); return 1; - if (system("iptables -w -A INPUT -j DROP")) + } + if (system("ip link set up dev lo")) + return 1; + + if (system("nft add table ip filter")) return 1; + if (system("nft add chain ip filter INPUT")) + return 1; + if (system("nft add rule ip filter INPUT iifname \"lo\" ip protocol tcp ct state new,established counter accept")) + return 1; + if (system("nft add rule ip filter INPUT counter drop")) + return 1; + #endif #ifdef ZDTM_TCP_LOCAL From 8b69f185f0bc137492caee6c6d586f57045450fe Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 7 Jan 2024 18:09:28 +0000 Subject: [PATCH 3/3] net: add error messages for restore of nftables Show appropriate error messages when restore of nftables fails. Signed-off-by: Radostin Stoyanov --- criu/net.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/criu/net.c b/criu/net.c index b34c379bab..0f7280bb50 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2438,27 +2438,39 @@ static inline int do_restore_nftables(struct cr_img *img) off_t img_data_size; char *buf; - if ((img_data_size = img_raw_size(img)) < 0) + if ((img_data_size = img_raw_size(img)) < 0) { + pr_err("image size mismatch\n"); goto out; + } - if (read_img_str(img, &buf, img_data_size) < 0) + if (read_img_str(img, &buf, img_data_size) < 0) { + pr_err("Failed to read nftables data\n"); goto out; + } nft = nft_ctx_new(NFT_CTX_DEFAULT); - if (!nft) + if (!nft) { + pr_err("Failed to create nft context object\n"); goto buf_free_out; + } + + if (nft_ctx_buffer_output(nft) || nft_ctx_buffer_error(nft)) { + pr_err("Failed to enable std/err output buffering\n"); + goto nft_ctx_free_out; + } - if (nft_ctx_buffer_output(nft) || nft_ctx_buffer_error(nft) || #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) - nft_run_cmd_from_buffer(nft, buf, strlen(buf))) + if (nft_run_cmd_from_buffer(nft, buf, strlen(buf))) #elif defined(CONFIG_HAS_NFTABLES_LIB_API_1) - nft_run_cmd_from_buffer(nft, buf)) + if (nft_run_cmd_from_buffer(nft, buf)) #else - { - BUILD_BUG_ON(1); - } + BUILD_BUG_ON(1); #endif + { + pr_err("nft command error:\n%s\n%s\n", + nft_ctx_get_error_buffer(nft), buf); goto nft_ctx_free_out; + } exit_code = 0;