Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jupyterhub update to dev #716

Merged
merged 11 commits into from
Sep 4, 2024
26 changes: 17 additions & 9 deletions applications/jupyterhub/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,31 +1,39 @@
ARG CLOUDHARNESS_BASE
FROM $CLOUDHARNESS_BASE as base

FROM jupyterhub/k8s-hub:1.1.3
FROM quay.io/jupyterhub/k8s-hub:3.2.1
USER root


COPY --from=base libraries/models/requirements.txt /libraries/models/requirements.txt
RUN pip install -r /libraries/models/requirements.txt
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
pip install -r /libraries/models/requirements.txt
COPY --from=base libraries/cloudharness-common/requirements.txt /libraries/cloudharness-common/requirements.txt
RUN pip install -r /libraries/cloudharness-common/requirements.txt
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
pip install -r /libraries/cloudharness-common/requirements.txt
COPY --from=base libraries/client/cloudharness_cli/requirements.txt /libraries/client/cloudharness_cli/requirements.txt
RUN pip install -r /libraries/client/cloudharness_cli/requirements.txt
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
pip install -r /libraries/client/cloudharness_cli/requirements.txt

COPY --from=base libraries/models /libraries/models
RUN pip install -e /libraries/models
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
pip install -e /libraries/models

COPY --from=base libraries/cloudharness-common /libraries/cloudharness-common
COPY --from=base libraries/client/cloudharness_cli /libraries/client/cloudharness_cli

#

RUN pip install -e /libraries/cloudharness-common
RUN pip install -e /libraries/client/cloudharness_cli
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
pip install -e /libraries/cloudharness-common
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
pip install -e /libraries/client/cloudharness_cli


COPY src src
RUN pip install ./src/harness_jupyter
RUN pip install ./src/chauthenticator
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
pip install ./src/harness_jupyter
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
pip install ./src/chauthenticator

USER jovyan
11 changes: 11 additions & 0 deletions applications/jupyterhub/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,14 @@ To support the pre pulling of task images see (https://github.com/MetaCell/cloud
the template `templates/image-puller/_helpers-daemonset.tpl` has been changed (see line 167 and on)

TODO: remember to implement/revise this code after you have updated/changed the templates of JupyterHub

## How to update

The helm chart is based on the [zero-to-jupyterhub](https://github.com/jupyterhub/zero-to-jupyterhub-k8s/) helm chart.

1. Run update.sh [TAG] # Do not use latest!
2. Restore from the diff files with EDIT: CLOUDHARNESS. Use update.patch as a reference
3. 3. Update Dockerfile to use the same base image you see on values.yaml: hub/image

Customize notebook image: quay.io/jupyterhub/k8s-singleuser-sample:[TAG]

153 changes: 94 additions & 59 deletions applications/jupyterhub/deploy/resources/hub/jupyterhub_config.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,38 @@
# load the config object (satisfies linters)
c = get_config() # noqa

import glob
import os
import re
import sys
import logging

from tornado.httpclient import AsyncHTTPClient
from kubernetes import client
from jupyterhub.utils import url_path_join
from kubernetes_asyncio import client
from tornado.httpclient import AsyncHTTPClient

#CLOUDHARNESS: EDIT START
import logging

try:
from harness_jupyter.jupyterhub import harness_hub
harness_hub() # activates harness hooks on jupyterhub
except Exception as e:
logging.error("could not import harness_jupyter", exc_info=True)

# CLOUDHARNESS: EDIT END

# Make sure that modules placed in the same directory as the jupyterhub config are added to the pythonpath
configuration_directory = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, configuration_directory)

from z2jh import (
get_config,
set_config_if_not_none,
get_name,
get_name_env,
get_secret_value,
set_config_if_not_none,
)


print('Base url is', c.JupyterHub.get('base_url', '/'))

# Configure JupyterHub to use the curl backend for making HTTP requests,
# rather than the pure-python implementations. The default one starts
# being too slow to make a large number of requests to the proxy API
# at the rate required.
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")

c.JupyterHub.spawner_class = 'kubespawner.KubeSpawner'

# Connect to a proxy running in a different pod
c.ConfigurableHTTPProxy.api_url = 'http://{}:{}'.format(os.environ['PROXY_API_SERVICE_HOST'], int(os.environ['PROXY_API_SERVICE_PORT']))
c.ConfigurableHTTPProxy.should_start = False

# Do not shut down user pods when hub is restarted
c.JupyterHub.cleanup_servers = False

# Check that the proxy has routes appropriately setup
c.JupyterHub.last_activity_interval = 60

# Don't wait at all before redirecting a spawning user to the progress page
c.JupyterHub.tornado_settings = {
'slow_spawn_timeout': 0,
}


def camelCaseify(s):
"""convert snake_case to camelCase

Expand Down Expand Up @@ -173,6 +153,7 @@ def camelCaseify(s):
("events_enabled", "events"),
("extra_labels", None),
("extra_annotations", None),
# ("allow_privilege_escalation", None), # Managed manually below
("uid", None),
("fs_gid", None),
("service_account", "serviceAccountName"),
Expand Down Expand Up @@ -206,10 +187,19 @@ def camelCaseify(s):
if image:
tag = get_config("singleuser.image.tag")
if tag:
image = "{}:{}".format(image, tag)
image = f"{image}:{tag}"

c.KubeSpawner.image = image

# allow_privilege_escalation defaults to False in KubeSpawner 2+. Since its a
# property where None, False, and True all are valid values that users of the
# Helm chart may want to set, we can't use the set_config_if_not_none helper
# function as someone may want to override the default False value to None.
#
c.KubeSpawner.allow_privilege_escalation = get_config(
"singleuser.allowPrivilegeEscalation"
)

# Combine imagePullSecret.create (single), imagePullSecrets (list), and
# singleuser.image.pullSecrets (list).
image_pull_secrets = []
Expand Down Expand Up @@ -255,7 +245,7 @@ def camelCaseify(s):
pass
else:
raise ValueError(
"Unrecognized value for matchNodePurpose: %r" % match_node_purpose
f"Unrecognized value for matchNodePurpose: {match_node_purpose}"
)

# Combine the common tolerations for user pods with singleuser tolerations
Expand All @@ -271,7 +261,7 @@ def camelCaseify(s):
pvc_name_template = get_config("singleuser.storage.dynamic.pvcNameTemplate")
c.KubeSpawner.pvc_name_template = pvc_name_template
volume_name_template = get_config("singleuser.storage.dynamic.volumeNameTemplate")
c.KubeSpawner.storage_pvc_ensure = False
c.KubeSpawner.storage_pvc_ensure = True
set_config_if_not_none(
c.KubeSpawner, "storage_class", "singleuser.storage.dynamic.storageClass"
)
Expand Down Expand Up @@ -354,41 +344,62 @@ def camelCaseify(s):
)

c.JupyterHub.services = []
c.JupyterHub.load_roles = []

# jupyterhub-idle-culler's permissions are scoped to what it needs only, see
# https://github.com/jupyterhub/jupyterhub-idle-culler#permissions.
#
if get_config("cull.enabled", False):
jupyterhub_idle_culler_role = {
"name": "jupyterhub-idle-culler",
"scopes": [
"list:users",
"read:users:activity",
"read:servers",
"delete:servers",
# "admin:users", # dynamically added if --cull-users is passed
],
# assign the role to a jupyterhub service, so it gains these permissions
"services": ["jupyterhub-idle-culler"],
}

cull_cmd = ["python3", "-m", "jupyterhub_idle_culler"]
base_url = c.JupyterHub.get("base_url", "/")
cull_cmd.append("--url=http://localhost:8081" + url_path_join(base_url, "hub/api"))

cull_timeout = get_config("cull.timeout")
if cull_timeout:
cull_cmd.append("--timeout=%s" % cull_timeout)
cull_cmd.append(f"--timeout={cull_timeout}")

cull_every = get_config("cull.every")
if cull_every:
cull_cmd.append("--cull-every=%s" % cull_every)
cull_cmd.append(f"--cull-every={cull_every}")

cull_concurrency = get_config("cull.concurrency")
if cull_concurrency:
cull_cmd.append("--concurrency=%s" % cull_concurrency)
cull_cmd.append(f"--concurrency={cull_concurrency}")

if get_config("cull.users"):
cull_cmd.append("--cull-users")
jupyterhub_idle_culler_role["scopes"].append("admin:users")

if not get_config("cull.adminUsers"):
cull_cmd.append("--cull-admin-users=false")

if get_config("cull.removeNamedServers"):
cull_cmd.append("--remove-named-servers")

cull_max_age = get_config("cull.maxAge")
if cull_max_age:
cull_cmd.append("--max-age=%s" % cull_max_age)
cull_cmd.append(f"--max-age={cull_max_age}")

c.JupyterHub.services.append(
{
"name": "cull-idle",
"admin": True,
"name": "jupyterhub-idle-culler",
"command": cull_cmd,
}
)
c.JupyterHub.load_roles.append(jupyterhub_idle_culler_role)

for key, service in get_config("hub.services", {}).items():
# c.JupyterHub.services is a list of dicts, but
Expand All @@ -402,33 +413,52 @@ def camelCaseify(s):

c.JupyterHub.services.append(service)

for key, role in get_config("hub.loadRoles", {}).items():
# c.JupyterHub.load_roles is a list of dicts, but
# hub.loadRoles is a dict of dicts to make the config mergable
role.setdefault("name", key)

c.JupyterHub.load_roles.append(role)

# respect explicit null command (distinct from unspecified)
# this avoids relying on KubeSpawner.cmd's default being None
_unspecified = object()
specified_cmd = get_config("singleuser.cmd", _unspecified)
if specified_cmd is not _unspecified:
c.Spawner.cmd = specified_cmd

set_config_if_not_none(c.Spawner, "cmd", "singleuser.cmd")
set_config_if_not_none(c.Spawner, "default_url", "singleuser.defaultUrl")

cloud_metadata = get_config("singleuser.cloudMetadata", {})
cloud_metadata = get_config("singleuser.cloudMetadata")

if cloud_metadata.get("blockWithIptables") == True:
# Use iptables to block access to cloud metadata by default
network_tools_image_name = get_config("singleuser.networkTools.image.name")
network_tools_image_tag = get_config("singleuser.networkTools.image.tag")
network_tools_resources = get_config("singleuser.networkTools.resources")
ip = cloud_metadata["ip"]
ip_block_container = client.V1Container(
name="block-cloud-metadata",
image=f"{network_tools_image_name}:{network_tools_image_tag}",
command=[
"iptables",
"-A",
"--append",
"OUTPUT",
"-d",
cloud_metadata.get("ip", "169.254.169.254"),
"-j",
"--protocol",
"tcp",
"--destination",
ip,
"--destination-port",
"80",
"--jump",
"DROP",
],
security_context=client.V1SecurityContext(
privileged=True,
run_as_user=0,
capabilities=client.V1Capabilities(add=["NET_ADMIN"]),
),
resources=network_tools_resources,
)

c.KubeSpawner.init_containers.append(ip_block_container)
Expand All @@ -438,17 +468,6 @@ def camelCaseify(s):
c.JupyterHub.log_level = "DEBUG"
c.Spawner.debug = True

# load /usr/local/etc/jupyterhub/jupyterhub_config.d config files
config_dir = "/usr/local/etc/jupyterhub/jupyterhub_config.d"
if os.path.isdir(config_dir):
for file_path in sorted(glob.glob(f"{config_dir}/*.py")):
file_name = os.path.basename(file_path)
print(f"Loading {config_dir} config: {file_name}")
with open(file_path) as f:
file_content = f.read()
# compiling makes debugging easier: https://stackoverflow.com/a/437857
exec(compile(source=file_content, filename=file_name, mode="exec"))

# load potentially seeded secrets
#
# NOTE: ConfigurableHTTPProxy.auth_token is set through an environment variable
Expand All @@ -471,11 +490,24 @@ def camelCaseify(s):
cfg.pop("keys", None)
c[app].update(cfg)


# load /usr/local/etc/jupyterhub/jupyterhub_config.d config files
config_dir = "/usr/local/etc/jupyterhub/jupyterhub_config.d"
if os.path.isdir(config_dir):
for file_path in sorted(glob.glob(f"{config_dir}/*.py")):
file_name = os.path.basename(file_path)
print(f"Loading {config_dir} config: {file_name}")
with open(file_path) as f:
file_content = f.read()
# compiling makes debugging easier: https://stackoverflow.com/a/437857
exec(compile(source=file_content, filename=file_name, mode="exec"))

# execute hub.extraConfig entries
for key, config_py in sorted(get_config("hub.extraConfig", {}).items()):
print("Loading extra config: %s" % key)
print(f"Loading extra config: {key}")
exec(config_py)

# CLOUDHARNESS: EDIT START
# Allow switching authenticators easily
auth_type = get_config('hub.config.JupyterHub.authenticator_class')
email_domain = 'local'
Expand Down Expand Up @@ -504,6 +536,7 @@ def camelCaseify(s):
c.Authenticator.auto_login = True
c.OAuthenticator.client_id = client_id
c.OAuthenticator.client_secret = client_secret
c.OAuthenticator.allow_all = True

c.GenericOAuthenticator.login_service = "CH"
c.GenericOAuthenticator.username_key = "email"
Expand All @@ -525,4 +558,6 @@ def camelCaseify(s):
c.apps = get_config('apps')
c.registry = get_config('registry')
c.domain = get_config('root.domain')
c.namespace = get_config('root.namespace')
c.namespace = get_config('root.namespace')
# CLOUDHARNESS: EDIT END

Loading
Loading