From e2d34ece3657814d8aa20eac193e3d8e2af06fe3 Mon Sep 17 00:00:00 2001 From: Dmitry Meyer Date: Fri, 18 Oct 2024 07:31:09 +0000 Subject: [PATCH] [dind] Move dind processes to a separate cgroup (#1859) Fixes: https://github.com/dstackai/dstack/issues/1854 --- docker/dind/start-dockerd | 86 ++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/docker/dind/start-dockerd b/docker/dind/start-dockerd index be71d0804..406fb7884 100755 --- a/docker/dind/start-dockerd +++ b/docker/dind/start-dockerd @@ -1,32 +1,70 @@ #!/usr/bin/env bash set -euo pipefail -mkdir /mnt/_tmp -if ! mount -t tmpfs none /mnt/_tmp 2> /dev/null; then - echo 'docker privileged mode required' +check_privileged_mode_or_die() { + mkdir /mnt/_tmp + if ! mount -t tmpfs none /mnt/_tmp 2> /dev/null; then + echo 'docker privileged mode required' + rm -r /mnt/_tmp + exit 101 + fi + umount /mnt/_tmp rm -r /mnt/_tmp - exit 101 -fi -umount /mnt/_tmp -rm -r /mnt/_tmp +} -if ! supervisorctl status > /dev/null; then - supervisord -c /etc/supervisord.conf - action='started' -else - supervisorctl restart dockerd > /dev/null - action='restarted' -fi +start_restart_dockerd() { + if ! supervisorctl status > /dev/null; then + supervisord -c /etc/supervisord.conf + echo 'started' + else + supervisorctl restart dockerd > /dev/null + echo 'restarted' + fi +} -for _i in {1..10}; do - if supervisorctl tail dockerd | grep -q 'API listen on'; then - echo "dockerd ${action}" - exit 0 +move_processes_to_separate_cgroup() { + # Move processes to a separate cgroup to prevent the root cgroup from becoming + # threaded -- "Once you have a threaded controller you can not create cgroups + # below it that reference non-threaded controllers like the memory controller". + # "A domain cgroup is turned into a threaded domain when [...] threaded controllers + # are enabled in the “cgroup.subtree_control” file while there are processes + # in the cgroup." + # Fixes "cannot enter cgroupv2 "/sys/fs/cgroup/docker" with domain controllers -- + # it is in threaded mode" when starting containers with resource constraints, + # see https://github.com/dstackai/dstack/issues/1854 + # Based on https://github.com/moby/moby/blob/65cfcc2/hack/dind#L59 and + # https://github.com/earthly/earthly/blob/08b0d1f/buildkitd/dockerd-wrapper.sh#L63 + if [[ -f /sys/fs/cgroup/cgroup.controllers ]]; then + local group=/sys/fs/cgroup/dind + mkdir -p ${group} + xargs -rn1 < /sys/fs/cgroup/cgroup.procs > ${group}/cgroup.procs || true fi - sleep 1 -done +} + +wait_dockerd_started() { + for _i in {1..10}; do + if supervisorctl tail dockerd | grep -q 'API listen on'; then + return 0 + fi + sleep 1 + done + return 1 +} -supervisorctl stop dockerd > /dev/null -echo 'failed to start dockerd:' -supervisorctl tail dockerd -exit 102 +show_dockerd_log_and_die() { + supervisorctl stop dockerd > /dev/null + echo 'failed to start dockerd:' + supervisorctl tail dockerd + exit 102 +} + + +check_privileged_mode_or_die +event=$(start_restart_dockerd) +if ! wait_dockerd_started; then + show_dockerd_log_and_die +fi +if [[ ${event} = 'started' ]]; then + move_processes_to_separate_cgroup +fi +echo "dockerd ${event}"