Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(bpf): Monitor use of splice to avoid kernel bug on fast WAN redirecting #507

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions control/control_plane_core.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,12 @@ func (c *controlPlaneCore) setupSkPidMonitor() error {
}

func (c *controlPlaneCore) setupLocalTcpFastRedirect() (err error) {
tp, err := link.Tracepoint("syscalls", "sys_enter_splice", c.bpf.TracepointSyscallsSysEnterSplice, nil)
if err != nil {
return fmt.Errorf("Attach tracepoint:sys_enter_splice: %w", err)
}
c.deferFuncs = append(c.deferFuncs, tp.Close)

cgroupPath, err := detectCgroupPath()
if err != nil {
return
Expand Down
95 changes: 91 additions & 4 deletions control/kern/tproxy.c
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,13 @@ struct {
__uint(pinning, LIBBPF_PIN_BY_NAME);
} cookie_pid_map SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, char[TASK_COMM_LEN]);
__type(value, __u8);
__uint(max_entries, MAX_COOKIE_PID_PNAME_MAPPING_NUM);
} fastsock_allowlist_map SEC(".maps");

struct udp_conn_state {
// pass

Expand Down Expand Up @@ -1845,12 +1852,74 @@ SEC("sockops")
int local_tcp_sockops(struct bpf_sock_ops *skops)
{
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
__u32 pid = BPF_CORE_READ(task, pid);
__u32 pid = BPF_CORE_READ(task, tgid);

/* Only local TCP connection has non-zero pids. */
if (pid == 0)
return 0;

/* We only care about 3 kinds of events, skip others */
switch (skops->op) {
/* PASSIVE_ESTABLISHED_CB event is triggered when a new connection is
* established on a listening socket. In our case it's a dae TCP
* socket.
*/
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
/* ACTIVE_ESTABLISHED_CB event is triggered when a new connection is
* established on a client process. In our case it's a local client
* process whose traffic has been redirected to dae.
*/
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
/* STATE_CB event is triggered when a TCP status changes. It requires
* bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG) when
* connection is established. In our case it can only happen for a
* "probing" socket whose process is unknown to fastsock_allowlist_map.
* */
case BPF_SOCK_OPS_STATE_CB:
break;
default:
return 0;
}

char pname[16];

__builtin_memset(&pname, 0, sizeof(pname));
BPF_CORE_READ_STR_INTO(&pname, task, comm);

/* Let's handle BPF_SOCK_OPS_STATE_CB events here */
if (skops->op == BPF_SOCK_OPS_STATE_CB) {
/* TCP connection is closing, let's check if splice(2) is called */
if (skops->args[1] == BPF_TCP_CLOSE || skops->args[0] == BPF_TCP_ESTABLISHED) {
if (bpf_map_lookup_elem(&fastsock_allowlist_map, &pname)) {
/* Process has been recogized, return */
return 0;
}
/* Still no record, meaning process didn't call
* splice(2), add it to the allowlist. */
bpf_map_update_elem(&fastsock_allowlist_map, &pname, &one_key, BPF_ANY);
bpf_printk("fastsock_allowlist_map[%s] = 1", pname);
}
return 0;
}

/* Now it's BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB or
* BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, let's check if process is in the
* allowlist.
* */
__u8 *allow = bpf_map_lookup_elem(&fastsock_allowlist_map, &pname);

if (!allow) {
/* No entry, unknown process, let's probe it. */
bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
bpf_printk("track TCP socket session: \"%s\"\n", pname);
return 0;
} else if (!*allow) {
/* Entry found, but it's forbidden, abort. */
bpf_printk("fastsock not allowed: %s", pname);
return 0;
}

/* Okay this process is allowed to proceed with fast socket, let's add it to sockmap. */
struct tuples_key tuple = {};

tuple.l4proto = IPPROTO_TCP;
Expand Down Expand Up @@ -1955,12 +2024,30 @@ int sk_msg_fast_redirect(struct sk_msg_md *msg)
if (bpf_msg_redirect_hash(msg, &fast_sock, &rev_tuple, BPF_F_INGRESS) ==
SK_PASS)
bpf_printk("tcp fast redirect: %pI4:%lu -> %pI4:%lu",
&rev_tuple.sip.u6_addr32[3],
bpf_ntohs(rev_tuple.sport),
&rev_tuple.dip.u6_addr32[3],
bpf_ntohs(rev_tuple.dport));
bpf_ntohs(rev_tuple.dport),
&rev_tuple.sip.u6_addr32[3],
bpf_ntohs(rev_tuple.sport));

return SK_PASS;
}

SEC("tracepoint/syscalls/sys_enter_splice")
int tracepoint_syscalls_sys_enter_splice(void)
{
char pname[16];
struct task_struct *task = (struct task_struct *)bpf_get_current_task();

__builtin_memset(&pname, 0, sizeof(pname));
BPF_CORE_READ_STR_INTO(&pname, task, comm);

__u8 *allow = bpf_map_lookup_elem(&fastsock_allowlist_map, &pname);

if (!allow || (allow && *allow)) {
bpf_map_update_elem(&fastsock_allowlist_map, &pname, &zero_key, BPF_ANY);
bpf_printk("fastsock_allowlist_map[%s] = 0", pname);
}
return 0;
}

SEC("license") const char __license[] = "Dual BSD/GPL";
3 changes: 2 additions & 1 deletion docs/en/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,13 @@ CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_KPROBE_EVENTS=y
CONFIG_BPF_EVENTS=y
CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
```

Check them using command like:

```shell
(zcat /proc/config.gz || cat /boot/{config,config-$(uname -r)}) | grep -E 'CONFIG_(DEBUG_INFO|DEBUG_INFO_BTF|KPROBES|KPROBE_EVENTS|BPF|BPF_SYSCALL|BPF_JIT|BPF_STREAM_PARSER|NET_CLS_ACT|NET_SCH_INGRESS|NET_INGRESS|NET_EGRESS|NET_CLS_BPF|BPF_EVENTS|CGROUPS)=|# CONFIG_DEBUG_INFO_REDUCED is not set'
(zcat /proc/config.gz || cat /boot/{config,config-$(uname -r)}) | grep -E 'CONFIG_(DEBUG_INFO|DEBUG_INFO_BTF|KPROBES|KPROBE_EVENTS|BPF|BPF_SYSCALL|BPF_JIT|BPF_STREAM_PARSER|NET_CLS_ACT|NET_SCH_INGRESS|NET_INGRESS|NET_EGRESS|NET_CLS_BPF|BPF_EVENTS|CGROUPS|HAVE_SYSCALL_TRACEPOINTS)=|# CONFIG_DEBUG_INFO_REDUCED is not set'
```

> **Note**: `Armbian` users can follow the [**Upgrade Guide**](user-guide/kernel-upgrade.md) to upgrade the kernel to meet the kernel configuration requirement.
Expand Down
3 changes: 2 additions & 1 deletion docs/zh/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,13 @@ CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_KPROBE_EVENTS=y
CONFIG_BPF_EVENTS=y
CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
```

你可以通过以下命令检查他们:

```shell
(zcat /proc/config.gz || cat /boot/{config,config-$(uname -r)}) | grep -E 'CONFIG_(DEBUG_INFO|DEBUG_INFO_BTF|KPROBES|KPROBE_EVENTS|BPF|BPF_SYSCALL|BPF_JIT|BPF_STREAM_PARSER|NET_CLS_ACT|NET_SCH_INGRESS|NET_INGRESS|NET_EGRESS|NET_CLS_BPF|BPF_EVENTS|CGROUPS)=|# CONFIG_DEBUG_INFO_REDUCED is not set'
(zcat /proc/config.gz || cat /boot/{config,config-$(uname -r)}) | grep -E 'CONFIG_(DEBUG_INFO|DEBUG_INFO_BTF|KPROBES|KPROBE_EVENTS|BPF|BPF_SYSCALL|BPF_JIT|BPF_STREAM_PARSER|NET_CLS_ACT|NET_SCH_INGRESS|NET_INGRESS|NET_EGRESS|NET_CLS_BPF|BPF_EVENTS|CGROUPS|HAVE_SYSCALL_TRACEPOINTS)=|# CONFIG_DEBUG_INFO_REDUCED is not set'
```

> **注意**: `Armbian` 用户可以参考 [**Upgrade Guide**](../en/user-guide/kernel-upgrade.md) 升级到支持的内核。
Expand Down
Loading