diff --git a/compat.c b/compat.c index 23e3e5b..7798f7a 100644 --- a/compat.c +++ b/compat.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -56,9 +57,9 @@ int bst_close_range(unsigned int from, unsigned int to, unsigned int flags) errno = ENOSYS; #endif - if (rc == -1 && errno == ENOSYS) { - /* The system call is not implemented. Fall back to the good old - fashioned method. + if (rc == -1 && (errno == ENOSYS || errno == EINVAL)) { + /* The system call is not implemented, or it doesn't support + CLOSE_RANGE_CLOEXEC. Fall back to the good old fashioned method. Note that this isn't particularly efficient. bst_close_range is itself called in a loop, which means traversing the list of fds @@ -88,13 +89,24 @@ int bst_close_range(unsigned int from, unsigned int to, unsigned int flags) continue; } - /* Note: close takes a signed int, while close_range takes unsigned - ints. I'm not too sure how negative file descriptors are handled - (and I don't care much to be honest) so I'll just hope that the - system call just reads out an unsigned integer kernel-side. */ - - if (close((int) fd) == -1) { - err(1, "bst_close_range: close %d", fd); + /* Note: close/fcntl takes a signed int, while close_range takes + unsigned ints. I'm not too sure how negative file descriptors + are handled (and I don't care much to be honest) so I'll just + hope that the system call just reads out an unsigned integer + kernel-side. */ + + if (flags & BST_CLOSE_RANGE_CLOEXEC) { + int fdflags = fcntl((int) fd, F_GETFD); + if (fdflags == -1) { + err(1, "bst_close_range: fcntl %d F_GETFD", fd); + } + if (fcntl((int) fd, F_SETFD, fdflags | FD_CLOEXEC) == -1) { + err(1, "bst_close_range: fcntl %d F_SETFD", fd); + } + } else { + if (close((int) fd) == -1) { + err(1, "bst_close_range: close %d", fd); + } } } diff --git a/compat.h b/compat.h index 843e5a1..fbd1159 100644 --- a/compat.h +++ b/compat.h @@ -11,6 +11,7 @@ /* From the kernel headers */ # define BST_CLOSE_RANGE_UNSHARE (1U << 1) +# define BST_CLOSE_RANGE_CLOEXEC (1U << 2) size_t strlcpy(char *restrict dst, const char *restrict src, size_t size); unsigned int parse_fd(char *optarg); diff --git a/enter.c b/enter.c index 1fd643f..b2fbaa3 100644 --- a/enter.c +++ b/enter.c @@ -515,6 +515,12 @@ int enter(struct entry_settings *opts) } } + for (const struct close_range *range = opts->close_fds; range < opts->close_fds + opts->nclose_fds; ++range) { + if (bst_close_range(range->from, range->to, BST_CLOSE_RANGE_CLOEXEC) == -1) { + err(1, "close_range %d %d", range->from, range->to); + } + } + /* * Only mount a a cgroup hierarchy over sys/fs/cgroup if: * 1) The user has not specified --no_cgroup_remount @@ -807,11 +813,6 @@ int enter(struct entry_settings *opts) } const char *init = opts->init + rootlen; - for (const struct close_range *range = opts->close_fds; range < opts->close_fds + opts->nclose_fds; ++range) { - if (bst_close_range(range->from, range->to, BST_CLOSE_RANGE_UNSHARE) == -1) { - err(1, "close_range %d %d", range->from, range->to); - } - } execve(init, argv, opts->envp); err(1, "execve %s", init); }