From 3a7278b1b0b188daac8000d5dbcd6bf6657823e7 Mon Sep 17 00:00:00 2001 From: Jeremy Tuloup Date: Tue, 14 Nov 2023 08:54:56 +0000 Subject: [PATCH 1/8] Update the `repo2docker` image --- ansible/tljh.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/tljh.yml b/ansible/tljh.yml index 5c46e01..43e661d 100644 --- a/ansible/tljh.yml +++ b/ansible/tljh.yml @@ -47,5 +47,6 @@ - name: Pull the repo2docker Docker image docker_image: name: quay.io/jupyterhub/repo2docker - tag: main + tag: 2023.06.0-41.g57d229e source: pull + force_source: yes From 0a76972d1d69f03ac9ecad14018bc6196d7851d7 Mon Sep 17 00:00:00 2001 From: Jeremy Tuloup Date: Tue, 14 Nov 2023 09:00:23 +0000 Subject: [PATCH 2/8] Make the docker image configurable --- ansible/tljh.yml | 4 ++-- ansible/vars/default.yml | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ansible/tljh.yml b/ansible/tljh.yml index 43e661d..750e355 100644 --- a/ansible/tljh.yml +++ b/ansible/tljh.yml @@ -46,7 +46,7 @@ # Pull the repo2docker image to build user images - name: Pull the repo2docker Docker image docker_image: - name: quay.io/jupyterhub/repo2docker - tag: 2023.06.0-41.g57d229e + name: {{ repo2docker_docker_name }} + tag: {{ repo2docker_docker_tag }} source: pull force_source: yes diff --git a/ansible/vars/default.yml b/ansible/vars/default.yml index 2d992cd..eb9fa4f 100644 --- a/ansible/vars/default.yml +++ b/ansible/vars/default.yml @@ -9,4 +9,7 @@ tljh_prefix: /opt/tljh tljh_bootstrap_pip_spec: git+https://github.com/jtpio/the-littlest-jupyterhub.git@skip-install#"egg=the_littlest_jupyterhub" tljh_installer_url: https://raw.githubusercontent.com/jtpio/the-littlest-jupyterhub/skip-install/bootstrap/bootstrap.py +repo2docker_docker_name: quay.io/jupyterhub/repo2docker +repo2docker_docker_tag: 2023.06.0-41.g57d229e + ... From 186673abdbf2befa58a265642f9e9f72f9d03ebe Mon Sep 17 00:00:00 2001 From: Jeremy Tuloup Date: Wed, 15 Nov 2023 09:39:50 +0000 Subject: [PATCH 3/8] Update documentation --- docs/install/ansible.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/install/ansible.md b/docs/install/ansible.md index ce64c02..bd1bb5c 100644 --- a/docs/install/ansible.md +++ b/docs/install/ansible.md @@ -268,6 +268,21 @@ tagged as `v0.1`: tljh_plasma: git+https://github.com/plasmabio/plasma@v0.1#"egg=tljh-plasma&subdirectory=tljh-plasma" ``` +## Using a specific version of `repo2docker` + +`repo2docker` is used to build the user environments from the Git repositories. + +By default the Ansible playbooks use a pinned version of the `jupyterhub/repo2docker`, which is defined in the +`ansible/vars/default.yml` file. + +Like other Ansible variables, it is possible to use a different version of `repo2docker` by overriding the variable +via the command line: + +```bash +# use an older version of the repo2docker image +ansible-playbook tljh.yml -i hosts -u ubuntu -e "repo2docker_docker_tag=2022.10.0-232.g0ea6f7d" +``` + ## List of available playbooks The Ansible playbooks are located in the `ansible/` directory: From e960cfc357a53f50b2608667f202975fa5417fb1 Mon Sep 17 00:00:00 2001 From: Jeremy Tuloup Date: Thu, 16 Nov 2023 16:39:23 +0000 Subject: [PATCH 4/8] Try switching to a Python entrypoint like repo2docker --- tljh-plasma/tljh_plasma/__init__.py | 4 +- .../entrypoint/repo2docker-entrypoint | 156 ++++++++++++++++++ 2 files changed, 158 insertions(+), 2 deletions(-) create mode 100755 tljh-plasma/tljh_plasma/entrypoint/repo2docker-entrypoint diff --git a/tljh-plasma/tljh_plasma/__init__.py b/tljh-plasma/tljh_plasma/__init__.py index ff067bb..a28c8ad 100644 --- a/tljh-plasma/tljh_plasma/__init__.py +++ b/tljh-plasma/tljh_plasma/__init__.py @@ -70,7 +70,7 @@ async def start(self, *args, **kwargs): # mount volumes self.volumes = { os.path.join( - os.path.dirname(__file__), "entrypoint", "entrypoint.sh" + os.path.dirname(__file__), "entrypoint", "repo2docker-entrypoint" ): "/usr/local/bin/repo2docker-entrypoint", self.shared_data_path: {"bind": "/srv/data", "mode": "ro"}, } @@ -117,7 +117,7 @@ def tljh_custom_jupyterhub_config(c, tljh_config_file=CONFIG_FILE): # increase the timeout to be able to pull larger Docker images c.PlasmaSpawner.start_timeout = 120 c.PlasmaSpawner.pull_policy = "Never" - c.PlasmaSpawner.remove = True + # c.PlasmaSpawner.remove = True c.PlasmaSpawner.default_url = "/lab" # TODO: change back to jupyterhub-singleuser c.PlasmaSpawner.cmd = ["/srv/conda/envs/notebook/bin/jupyterhub-singleuser"] diff --git a/tljh-plasma/tljh_plasma/entrypoint/repo2docker-entrypoint b/tljh-plasma/tljh_plasma/entrypoint/repo2docker-entrypoint new file mode 100755 index 0000000..d541426 --- /dev/null +++ b/tljh-plasma/tljh_plasma/entrypoint/repo2docker-entrypoint @@ -0,0 +1,156 @@ +#!/usr/local/bin/python3-login +# note: must run on Python >= 3.5, which mainly means no f-strings + +# goals: +# - load environment variables from a login shell (bash -l) +# - preserve signal handling of subprocess (kill -TERM and friends) +# - tee output to a log file + +# Adapted from https://github.com/jupyterhub/repo2docker/blob/c6f97e55c19b44d6579d1d54087155f3e3df5338/repo2docker/buildpacks/repo2docker-entrypoint + +import fcntl +import os +import select +import signal +import shutil +import subprocess +import sys +import tempfile + +# output chunk size to read +CHUNK_SIZE = 1024 + +# signals to be forwarded to the child +# everything catchable, excluding SIGCHLD +SIGNALS = set(signal.Signals) - {signal.SIGKILL, signal.SIGSTOP, signal.SIGCHLD} + + +def main(): + + # open log file to send output to; + # preferred location of log file is: + # 1. REPO_DIR env variable + # 2. current working directory: "." + # 3. default temp directory for the OS (e.g. /tmp for linux) + log_dirs = [".", tempfile.gettempdir()] + log_file = None + if "REPO_DIR" in os.environ: + log_dirs.insert(0, os.environ["REPO_DIR"]) + for d in log_dirs: + log_path = os.path.join(d, ".jupyter-server-log.txt") + try: + log_file = open(log_path, "ab") + except Exception: + continue + else: + # success + break + # raise Exception if log_file could not be set + if log_file is None: + raise Exception("Could not open '.jupyter-server-log.txt' log file " ) + + # handle user override + NB_GID = os.environ.get("NB_UID") + PATH = os.environ.get("PATH").replace("jovyan", os.environ.get("NB_USER")) + IMAGE_DIR = os.path.join(os.environ.get("HOME"), os.environ.get("USER_IMAGE")) + + # add a new group for the user + subprocess.run(["groupadd", "-g", NB_GID, "-o", os.environ.get("NB_GROUP", os.environ.get("NB_USER"))]) + + # add the user and set their home directory + subprocess.run(["useradd", "--home", os.environ.get("HOME"), "-u", os.environ.get("NB_UID"), "-g", NB_GID, "-G", "100", "-l", os.environ.get("NB_USER")]) + + # copy the content from the default docker image to the user home directory + for file in os.listdir("/home/jovyan"): + if not os.path.exists(os.path.join(IMAGE_DIR, file)): + shutil.copytree(os.path.join("/home/jovyan", file), os.path.join(IMAGE_DIR, file)) + + # remove the .cache if it exists, as it can be a couple hundreds MB big + if os.path.exists(os.path.join(IMAGE_DIR, ".cache")): + shutil.rmtree(os.path.join(IMAGE_DIR, ".cache")) + + # set the name of the environment as the topbar text indicator + TOPBAR_TEXT_SETTINGS_DIR = os.path.join(IMAGE_DIR, ".jupyter/lab/user-settings/jupyterlab-topbar-text") + os.makedirs(TOPBAR_TEXT_SETTINGS_DIR, exist_ok=True) + with open(os.path.join(TOPBAR_TEXT_SETTINGS_DIR, "plugin.jupyterlab-settings"), "w") as f: + f.write("{\"editable\": false, \"text\":\"" + os.environ.get("USER_IMAGE") + "\"}") + + # set the correct permissions for the user home subdirectory + subprocess.run(["chown", "-R", os.environ.get("NB_USER") + ":" + os.environ.get("NB_USER"), IMAGE_DIR]) + + # set the Jupyter paths environment variables to find potential configuration + # and data files from the user environment base images home directories + os.environ["JUPYTER_CONFIG_DIR"] = os.path.join(IMAGE_DIR, ".jupyter") + os.environ["JUPYTER_PATH"] = os.path.join(IMAGE_DIR, ".local/share/jupyter") + + # start the notebook server from the environment directory + os.chdir(IMAGE_DIR) + + # # build the command + # # like `exec "$@"` + # command = sys.argv[1:] + # # load entrypoint override from env + # r2d_entrypoint = os.environ.get("R2D_ENTRYPOINT") + # if r2d_entrypoint: + # command.insert(0, r2d_entrypoint) + + command = ["su", "-", os.environ.get("NB_USER"), "-m", "-c", '"$0" "$@"', "--", "$@"] + + # launch the subprocess + child = subprocess.Popen( + command, + bufsize=1, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + + # hook up ~all signals so that every signal the parent gets, + # the children also get + + def relay_signal(sig, frame): + """Relay a signal to children""" + # DEBUG: show signal + child.send_signal(sig) + + for signum in SIGNALS: + signal.signal(signum, relay_signal) + + # tee output from child to both our stdout and the log file + def tee(chunk): + """Tee output from child to both our stdout and the log file""" + for f in [sys.stdout.buffer, log_file]: + f.write(chunk) + f.flush() + + # make stdout pipe non-blocking + # this means child.stdout.read(nbytes) + # will always return immediately, even if there's nothing to read + flags = fcntl.fcntl(child.stdout, fcntl.F_GETFL) + fcntl.fcntl(child.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK) + poller = select.poll() + poller.register(child.stdout) + + # while child is running, constantly relay output + while child.poll() is None: + chunk = child.stdout.read(CHUNK_SIZE) + if chunk: + tee(chunk) + else: + # empty chunk means nothing to read + # wait for output on the pipe + # timeout is in milliseconds + poller.poll(1000) + + # child has exited, continue relaying any remaining output + # At this point, read() will return an empty string when it's done + chunk = child.stdout.read() + while chunk: + tee(chunk) + chunk = child.stdout.read() + + # make our returncode match the child's returncode + sys.exit(child.returncode) + + +if __name__ == "__main__": + main() \ No newline at end of file From dd677da95dd2fded4025403be6e9ee93e1b3be7f Mon Sep 17 00:00:00 2001 From: Jeremy Tuloup Date: Mon, 20 Nov 2023 09:26:28 +0000 Subject: [PATCH 5/8] Explicitely opt-in as run_as_root --- tljh-plasma/tljh_plasma/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tljh-plasma/tljh_plasma/__init__.py b/tljh-plasma/tljh_plasma/__init__.py index a28c8ad..106a0c9 100644 --- a/tljh-plasma/tljh_plasma/__init__.py +++ b/tljh-plasma/tljh_plasma/__init__.py @@ -70,7 +70,7 @@ async def start(self, *args, **kwargs): # mount volumes self.volumes = { os.path.join( - os.path.dirname(__file__), "entrypoint", "repo2docker-entrypoint" + os.path.dirname(__file__), "entrypoint", "entrypoint.sh" ): "/usr/local/bin/repo2docker-entrypoint", self.shared_data_path: {"bind": "/srv/data", "mode": "ro"}, } @@ -117,12 +117,14 @@ def tljh_custom_jupyterhub_config(c, tljh_config_file=CONFIG_FILE): # increase the timeout to be able to pull larger Docker images c.PlasmaSpawner.start_timeout = 120 c.PlasmaSpawner.pull_policy = "Never" - # c.PlasmaSpawner.remove = True + c.PlasmaSpawner.remove = False c.PlasmaSpawner.default_url = "/lab" # TODO: change back to jupyterhub-singleuser c.PlasmaSpawner.cmd = ["/srv/conda/envs/notebook/bin/jupyterhub-singleuser"] # set the default cpu and memory limits c.PlasmaSpawner.args = ["--ResourceUseDisplay.track_cpu_percent=True"] + # explicitely opt-in to enable the custom entrypoint logic + c.PlasmaSpawner.run_as_root = True # prevent PID 1 running in the Docker container to stop when child processes are killed # see https://github.com/plasmabio/plasma/issues/191 for more info From 0acb9e1d09510455fbe4fee01759e4b8151fe546 Mon Sep 17 00:00:00 2001 From: Jeremy Tuloup Date: Mon, 20 Nov 2023 09:47:26 +0000 Subject: [PATCH 6/8] Omit - in su commmand --- tljh-plasma/tljh_plasma/__init__.py | 1 + .../tljh_plasma/entrypoint/entrypoint.sh | 2 +- .../entrypoint/repo2docker-entrypoint | 156 ------------------ 3 files changed, 2 insertions(+), 157 deletions(-) delete mode 100755 tljh-plasma/tljh_plasma/entrypoint/repo2docker-entrypoint diff --git a/tljh-plasma/tljh_plasma/__init__.py b/tljh-plasma/tljh_plasma/__init__.py index 106a0c9..681edb6 100644 --- a/tljh-plasma/tljh_plasma/__init__.py +++ b/tljh-plasma/tljh_plasma/__init__.py @@ -117,6 +117,7 @@ def tljh_custom_jupyterhub_config(c, tljh_config_file=CONFIG_FILE): # increase the timeout to be able to pull larger Docker images c.PlasmaSpawner.start_timeout = 120 c.PlasmaSpawner.pull_policy = "Never" + # TODO: re-enable c.PlasmaSpawner.remove = False c.PlasmaSpawner.default_url = "/lab" # TODO: change back to jupyterhub-singleuser diff --git a/tljh-plasma/tljh_plasma/entrypoint/entrypoint.sh b/tljh-plasma/tljh_plasma/entrypoint/entrypoint.sh index e9a66d8..0a91c1c 100755 --- a/tljh-plasma/tljh_plasma/entrypoint/entrypoint.sh +++ b/tljh-plasma/tljh_plasma/entrypoint/entrypoint.sh @@ -43,4 +43,4 @@ export JUPYTER_PATH=${IMAGE_DIR}/.local/share/jupyter cd ${IMAGE_DIR} # execute the notebook process as the given user -exec su - $NB_USER -m -c '"$0" "$@"' -- "$@" +exec su $NB_USER -m -c '"$0" "$@"' -- "$@" diff --git a/tljh-plasma/tljh_plasma/entrypoint/repo2docker-entrypoint b/tljh-plasma/tljh_plasma/entrypoint/repo2docker-entrypoint deleted file mode 100755 index d541426..0000000 --- a/tljh-plasma/tljh_plasma/entrypoint/repo2docker-entrypoint +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/local/bin/python3-login -# note: must run on Python >= 3.5, which mainly means no f-strings - -# goals: -# - load environment variables from a login shell (bash -l) -# - preserve signal handling of subprocess (kill -TERM and friends) -# - tee output to a log file - -# Adapted from https://github.com/jupyterhub/repo2docker/blob/c6f97e55c19b44d6579d1d54087155f3e3df5338/repo2docker/buildpacks/repo2docker-entrypoint - -import fcntl -import os -import select -import signal -import shutil -import subprocess -import sys -import tempfile - -# output chunk size to read -CHUNK_SIZE = 1024 - -# signals to be forwarded to the child -# everything catchable, excluding SIGCHLD -SIGNALS = set(signal.Signals) - {signal.SIGKILL, signal.SIGSTOP, signal.SIGCHLD} - - -def main(): - - # open log file to send output to; - # preferred location of log file is: - # 1. REPO_DIR env variable - # 2. current working directory: "." - # 3. default temp directory for the OS (e.g. /tmp for linux) - log_dirs = [".", tempfile.gettempdir()] - log_file = None - if "REPO_DIR" in os.environ: - log_dirs.insert(0, os.environ["REPO_DIR"]) - for d in log_dirs: - log_path = os.path.join(d, ".jupyter-server-log.txt") - try: - log_file = open(log_path, "ab") - except Exception: - continue - else: - # success - break - # raise Exception if log_file could not be set - if log_file is None: - raise Exception("Could not open '.jupyter-server-log.txt' log file " ) - - # handle user override - NB_GID = os.environ.get("NB_UID") - PATH = os.environ.get("PATH").replace("jovyan", os.environ.get("NB_USER")) - IMAGE_DIR = os.path.join(os.environ.get("HOME"), os.environ.get("USER_IMAGE")) - - # add a new group for the user - subprocess.run(["groupadd", "-g", NB_GID, "-o", os.environ.get("NB_GROUP", os.environ.get("NB_USER"))]) - - # add the user and set their home directory - subprocess.run(["useradd", "--home", os.environ.get("HOME"), "-u", os.environ.get("NB_UID"), "-g", NB_GID, "-G", "100", "-l", os.environ.get("NB_USER")]) - - # copy the content from the default docker image to the user home directory - for file in os.listdir("/home/jovyan"): - if not os.path.exists(os.path.join(IMAGE_DIR, file)): - shutil.copytree(os.path.join("/home/jovyan", file), os.path.join(IMAGE_DIR, file)) - - # remove the .cache if it exists, as it can be a couple hundreds MB big - if os.path.exists(os.path.join(IMAGE_DIR, ".cache")): - shutil.rmtree(os.path.join(IMAGE_DIR, ".cache")) - - # set the name of the environment as the topbar text indicator - TOPBAR_TEXT_SETTINGS_DIR = os.path.join(IMAGE_DIR, ".jupyter/lab/user-settings/jupyterlab-topbar-text") - os.makedirs(TOPBAR_TEXT_SETTINGS_DIR, exist_ok=True) - with open(os.path.join(TOPBAR_TEXT_SETTINGS_DIR, "plugin.jupyterlab-settings"), "w") as f: - f.write("{\"editable\": false, \"text\":\"" + os.environ.get("USER_IMAGE") + "\"}") - - # set the correct permissions for the user home subdirectory - subprocess.run(["chown", "-R", os.environ.get("NB_USER") + ":" + os.environ.get("NB_USER"), IMAGE_DIR]) - - # set the Jupyter paths environment variables to find potential configuration - # and data files from the user environment base images home directories - os.environ["JUPYTER_CONFIG_DIR"] = os.path.join(IMAGE_DIR, ".jupyter") - os.environ["JUPYTER_PATH"] = os.path.join(IMAGE_DIR, ".local/share/jupyter") - - # start the notebook server from the environment directory - os.chdir(IMAGE_DIR) - - # # build the command - # # like `exec "$@"` - # command = sys.argv[1:] - # # load entrypoint override from env - # r2d_entrypoint = os.environ.get("R2D_ENTRYPOINT") - # if r2d_entrypoint: - # command.insert(0, r2d_entrypoint) - - command = ["su", "-", os.environ.get("NB_USER"), "-m", "-c", '"$0" "$@"', "--", "$@"] - - # launch the subprocess - child = subprocess.Popen( - command, - bufsize=1, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - - # hook up ~all signals so that every signal the parent gets, - # the children also get - - def relay_signal(sig, frame): - """Relay a signal to children""" - # DEBUG: show signal - child.send_signal(sig) - - for signum in SIGNALS: - signal.signal(signum, relay_signal) - - # tee output from child to both our stdout and the log file - def tee(chunk): - """Tee output from child to both our stdout and the log file""" - for f in [sys.stdout.buffer, log_file]: - f.write(chunk) - f.flush() - - # make stdout pipe non-blocking - # this means child.stdout.read(nbytes) - # will always return immediately, even if there's nothing to read - flags = fcntl.fcntl(child.stdout, fcntl.F_GETFL) - fcntl.fcntl(child.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK) - poller = select.poll() - poller.register(child.stdout) - - # while child is running, constantly relay output - while child.poll() is None: - chunk = child.stdout.read(CHUNK_SIZE) - if chunk: - tee(chunk) - else: - # empty chunk means nothing to read - # wait for output on the pipe - # timeout is in milliseconds - poller.poll(1000) - - # child has exited, continue relaying any remaining output - # At this point, read() will return an empty string when it's done - chunk = child.stdout.read() - while chunk: - tee(chunk) - chunk = child.stdout.read() - - # make our returncode match the child's returncode - sys.exit(child.returncode) - - -if __name__ == "__main__": - main() \ No newline at end of file From 020b2e24cefa32fb43b558ded6343a5c0b836bda Mon Sep 17 00:00:00 2001 From: Jeremy Tuloup Date: Tue, 21 Nov 2023 16:34:43 +0200 Subject: [PATCH 7/8] Re-enable container removal --- tljh-plasma/tljh_plasma/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tljh-plasma/tljh_plasma/__init__.py b/tljh-plasma/tljh_plasma/__init__.py index 681edb6..60f2ffc 100644 --- a/tljh-plasma/tljh_plasma/__init__.py +++ b/tljh-plasma/tljh_plasma/__init__.py @@ -117,8 +117,7 @@ def tljh_custom_jupyterhub_config(c, tljh_config_file=CONFIG_FILE): # increase the timeout to be able to pull larger Docker images c.PlasmaSpawner.start_timeout = 120 c.PlasmaSpawner.pull_policy = "Never" - # TODO: re-enable - c.PlasmaSpawner.remove = False + c.PlasmaSpawner.remove = True c.PlasmaSpawner.default_url = "/lab" # TODO: change back to jupyterhub-singleuser c.PlasmaSpawner.cmd = ["/srv/conda/envs/notebook/bin/jupyterhub-singleuser"] From 64bff01d3b2004c4e455cc4dcc2c7502b7f95a16 Mon Sep 17 00:00:00 2001 From: Jeremy Tuloup Date: Wed, 22 Nov 2023 18:09:13 +0200 Subject: [PATCH 8/8] Fix missing quotes --- ansible/tljh.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/tljh.yml b/ansible/tljh.yml index 750e355..9592c85 100644 --- a/ansible/tljh.yml +++ b/ansible/tljh.yml @@ -46,7 +46,7 @@ # Pull the repo2docker image to build user images - name: Pull the repo2docker Docker image docker_image: - name: {{ repo2docker_docker_name }} - tag: {{ repo2docker_docker_tag }} + name: "{{ repo2docker_docker_name }}" + tag: "{{ repo2docker_docker_tag }}" source: pull force_source: yes