From a2f5c19d29f5867349bae294c462533ef3a01d5b Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 29 Sep 2023 11:51:22 -0700 Subject: [PATCH 01/13] fix pagination, basically a complete rewrite --- scripts/delete-unused-users.py | 153 ++++++++++++++++++++++++--------- 1 file changed, 112 insertions(+), 41 deletions(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 1192750e5..5880fccf1 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -12,58 +12,129 @@ ok. """ import argparse -from jhub_client.api import JupyterHubAPI -from dateutil.parser import parse import asyncio from datetime import timedelta, datetime +from dateutil.parser import parse +from jhub_client.api import JupyterHubAPI +import logging +import os +import requests +import sys + +logging.basicConfig(stream=sys.stdout, level=logging.WARNING) +logger = logging.getLogger(__name__) + +token = os.environ["JUPYTERHUB_API_TOKEN"] +headers = { + "Accept": "application/jupyterhub-pagination+json", + "Authorization": f"Bearer {token}", +} + +def retrieve_users(hub_url): + """Returns generator of user models that should be deleted""" + url = hub_url.rstrip("/") + "/hub/api/users" + next_page = True + params = {} + + while next_page: + r = requests.get(url, headers=headers, params=params) + r.raise_for_status() + resp = r.json() + user_list = resp["items"] + for user in user_list: + # only yield users that should be deleted + if should_delete(user): + yield user + + pagination = resp["_pagination"] + next_page = pagination["next"] + if next_page: + params = { + "offset": next_page["offset"], + "limit": next_page["limit"], + } + +def should_delete(user): + """ + Returns a boolean if user is to be deleted. The critera are: + - was the user active in the past 24 hours? + - is there a current user server running? + """ + last_activity_str = user.get('last_activity', False) + if last_activity_str: + try: + last_activity = parse(user['last_activity']) + except: + logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}") + raise + if isinstance(last_activity, datetime): + was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) + else: + logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") + raise + + logger.debug(f"User: {user['name']}") + logger.debug(f"Last login: {last_activity}") + logger.debug(f"24hrs since last login: {was_active_last_day}") + logger.debug(f"Running server: {user['server']}") + if was_active_last_day or user['server'] is not None: + logger.info(f"Not deleting {user['name']}") + return False + else: + logger.info(f"Flagged {user['name']} for deletion.") + return True -async def main(): +async def delete_user(hub, user, count): + """Delete a user from the hub ORM""" + username = user['name'] + print(f"{count}: deleting {username}") + if not args.dry_run: + await hub.delete_user(username) + else: + logger.warning("Skipped due to dry run.") + +async def main(args): + """ + Get users from a hub, check to see if they should be deleted from the ORM + and if so, delete them! + """ + hub = JupyterHubAPI(hub_url=args.hub_url) + count = 1 + for user in list(retrieve_users(args.hub_url)): + await delete_user(hub, user, count) + count += 1 + count -= 1 + print(f"Deleted {count} total users.") + +if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( - 'hub_url', - help='Fully qualified URL to the JupyterHub' + '--hub_url', + help='Fully qualified URL to the JupyterHub', + required=True ) argparser.add_argument( '--dry_run', action='store_true', help='Dry run without deleting users' ) + argparser.add_argument( + '-v', + dest='verbose', + action='store_true', + help='Set info log level' + ) + argparser.add_argument( + '-d', + dest='debug', + action='store_true', + help='Set debug log level' + ) args = argparser.parse_args() - to_delete = [] - async with JupyterHubAPI(hub_url=args.hub_url) as hub: - users = await hub.list_users() - for user in users: - last_activity_str = user.get('last_activity', False) - if last_activity_str: - try: - last_activity = parse(user['last_activity']) - except: - print(user['last_activity']) - raise - if isinstance(last_activity, datetime): - was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) - else: - print(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") - raise - - print(f"User: {user['name']}") - print(f"Last login: {last_activity}") - print(f"24hrs since last login: {was_active_last_day}") - print(f"Running server: {user['server']}") - if was_active_last_day or user['server'] is not None: - print(f"Not deleting {user['name']}") - else: - to_delete.append(user['name']) - print(f"Deleting {user['name']}") - print("") - - for i, username in enumerate(to_delete): - print(f'{i+1} of {len(to_delete)}: deleting {username}') - if not args.dry_run: - await hub.delete_user(username) - else: - print('Skipped due to dry run.') + if args.verbose: + logger.setLevel(logging.INFO) + elif args.debug: + logger.setLevel(logging.DEBUG) -if __name__ == '__main__': - asyncio.run(main()) + asyncio.run(main(args)) From 9c610d104c65da6a7f74fd468a30b6a25e3faab8 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 29 Sep 2023 19:51:47 -0700 Subject: [PATCH 02/13] basically just stolen code lol --- scripts/delete-unused-users.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 5880fccf1..46ff75a33 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -84,25 +84,29 @@ def should_delete(user): logger.info(f"Flagged {user['name']} for deletion.") return True -async def delete_user(hub, user, count): - """Delete a user from the hub ORM""" - username = user['name'] - print(f"{count}: deleting {username}") - if not args.dry_run: - await hub.delete_user(username) - else: - logger.warning("Skipped due to dry run.") +def delete_user(hub_url, name): + """Delete a given user by name via JupyterHub API""" + r = requests.delete( + hub_url.rstrip("/") + f"/hub/api/users/{name}", + headers=headers, + ) + r.raise_for_status() -async def main(args): +def main(args): """ Get users from a hub, check to see if they should be deleted from the ORM and if so, delete them! """ - hub = JupyterHubAPI(hub_url=args.hub_url) count = 1 for user in list(retrieve_users(args.hub_url)): - await delete_user(hub, user, count) - count += 1 + print(f"{count}: deleting {user['name']}") + if not args.dry_run: + delete_user(args.hub_url, user['name']) + count += 1 + else: + logger.warning(f"Skipped {user['name']} due to dry run.") + # await delete_user(hub, user, count) + count -= 1 print(f"Deleted {count} total users.") @@ -137,4 +141,4 @@ async def main(args): elif args.debug: logger.setLevel(logging.DEBUG) - asyncio.run(main(args)) + main(args) From 6b42240684e7181c0c837256c18568e8a91ef0d2 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 29 Sep 2023 19:53:05 -0700 Subject: [PATCH 03/13] give attribution --- scripts/delete-unused-users.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 46ff75a33..bbfe1214f 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -10,6 +10,9 @@ make sure they aren't active right now. This will require users to log in again the next time they use the hub, but that's probably ok. + +Core functionality from @minrk: +https://discourse.jupyter.org/t/is-there-a-way-to-bulk-delete-old-users/20866/3 """ import argparse import asyncio From 7bbbdab0f4380fda548fc8ccb2ae18d08838ccc3 Mon Sep 17 00:00:00 2001 From: Ryan Lovett Date: Tue, 3 Oct 2023 13:50:34 -0700 Subject: [PATCH 04/13] Delete old dev-r image/ directory. There are two images now in the images/ directory. --- deployments/dev-r/image/Dockerfile | 135 ------------------ deployments/dev-r/image/Rprofile.site | 23 --- deployments/dev-r/image/apt.txt | 37 ----- deployments/dev-r/image/class-libs.R | 20 --- deployments/dev-r/image/environment.yml | 19 --- .../dev-r/image/infra-requirements.txt | 31 ---- .../dev-r/image/install-mambaforge.bash | 41 ------ deployments/dev-r/image/install.R | 18 --- deployments/dev-r/image/rsession.conf | 2 - 9 files changed, 326 deletions(-) delete mode 100644 deployments/dev-r/image/Dockerfile delete mode 100644 deployments/dev-r/image/Rprofile.site delete mode 100644 deployments/dev-r/image/apt.txt delete mode 100644 deployments/dev-r/image/class-libs.R delete mode 100644 deployments/dev-r/image/environment.yml delete mode 100644 deployments/dev-r/image/infra-requirements.txt delete mode 100755 deployments/dev-r/image/install-mambaforge.bash delete mode 100755 deployments/dev-r/image/install.R delete mode 100644 deployments/dev-r/image/rsession.conf diff --git a/deployments/dev-r/image/Dockerfile b/deployments/dev-r/image/Dockerfile deleted file mode 100644 index d07ac7202..000000000 --- a/deployments/dev-r/image/Dockerfile +++ /dev/null @@ -1,135 +0,0 @@ -FROM buildpack-deps:jammy-scm as base - -# Set up common env variables -ENV TZ=America/Los_Angeles -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US.UTF-8 -ENV DEBIAN_FRONTEND=noninteractive -ENV NB_USER jovyan -ENV NB_UID 1000 -# These are used by the python, R, and final stages -ENV CONDA_DIR /opt/conda -ENV R_LIBS_USER /opt/r - -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes locales && \ - echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ - locale-gen - -RUN adduser --disabled-password --gecos "Default Jupyter user" ${NB_USER} - -# Install all apt packages -COPY apt.txt /tmp/apt.txt -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes --no-install-recommends \ - $(grep -v ^# /tmp/apt.txt) && \ - apt-get -qq purge && \ - apt-get -qq clean && \ - rm -rf /var/lib/apt/lists/* - -# Install R. -# These packages must be installed into the base stage since they are in system -# paths rather than /opt. -# Pre-built R packages from rspm are built against system libs in jammy. -ENV R_VERSION=4.3.1-1.2204.0 -ENV LITTLER_VERSION=0.3.18-2.2204.0 -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 -RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" > /etc/apt/sources.list.d/cran.list -RUN curl --silent --location --fail https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc -RUN apt-get update -qq --yes > /dev/null && \ - apt-get install --yes -qq \ - r-base-core=${R_VERSION} \ - r-base-dev=${R_VERSION} \ - littler=${LITTLER_VERSION} > /dev/null - -ENV RSTUDIO_URL https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2023.06.0-421-amd64.deb -RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \ - apt install --no-install-recommends --yes /tmp/rstudio.deb && \ - rm /tmp/rstudio.deb - -ENV SHINY_SERVER_URL https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.20.1002-amd64.deb -RUN curl --silent --location --fail ${SHINY_SERVER_URL} > /tmp/shiny.deb && \ - apt install --no-install-recommends --yes /tmp/shiny.deb && \ - rm /tmp/shiny.deb - -# Set CRAN mirror to rspm before we install anything -COPY Rprofile.site /usr/lib/R/etc/Rprofile.site -# RStudio needs its own config -COPY rsession.conf /etc/rstudio/rsession.conf - -# R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads. -# We uncomment the default, and set what we wanna - so it picks up -# the packages we install. Without this, RStudio doesn't see the packages -# that R does. -# Stolen from https://github.com/jupyterhub/repo2docker/blob/6a07a48b2df48168685bb0f993d2a12bd86e23bf/repo2docker/buildpacks/r.py -# To try fight https://community.rstudio.com/t/timedatectl-had-status-1/72060, -# which shows up sometimes when trying to install packages that want the TZ -# timedatectl expects systemd running, which isn't true in our containers -RUN sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \ - echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron && \ - echo "TZ=${TZ}" >> /etc/R/Renviron - -# ============================================================================= -# This stage exists to build /opt/r. -FROM base as opt-r - -# Create user owned R libs dir -# This lets users temporarily install packages -RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER} - -# Install R libraries as our user -USER ${NB_USER} - -COPY class-libs.R /tmp/class-libs.R -RUN mkdir -p /tmp/r-packages - -# Our install.R needs devtools which needs install2.r which needs docopt. -# install2.r is not reproducible, but our install.R script is. -RUN Rscript -e "install.packages('docopt')" -RUN /usr/lib/R/site-library/littler/examples/install2.r devtools - -# Install all our base R packages -COPY install.R /tmp/install.R -RUN /tmp/install.R && rm -rf /tmp/downloaded_packages - -# ============================================================================= -# This stage exists to build /opt/conda. -FROM base as opt-conda - -COPY install-mambaforge.bash /tmp/install-mambaforge.bash -RUN /tmp/install-mambaforge.bash - -# Install conda environment as our user -USER ${NB_USER} - -ENV PATH ${CONDA_DIR}/bin:$PATH - -COPY infra-requirements.txt /tmp/infra-requirements.txt -COPY environment.yml /tmp/environment.yml - -RUN mamba env update -p ${CONDA_DIR} -f /tmp/environment.yml && \ - mamba clean -afy - -# ============================================================================= -# This stage consumes base and import /opt/r and /opt/conda. -FROM base as final -COPY --from=opt-r /opt/r /opt/r -COPY --from=opt-conda /opt/conda /opt/conda - -# Install IR kernelspec. Requires python and R. -ENV PATH ${CONDA_DIR}/bin:${PATH}:${R_LIBS_USER}/bin -RUN R -e "IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')" - -# clear out /tmp -USER root -RUN rm -rf /tmp/* - -USER ${NB_USER} -WORKDIR /home/${NB_USER} - -EXPOSE 8888 - -ENTRYPOINT ["tini", "--"] diff --git a/deployments/dev-r/image/Rprofile.site b/deployments/dev-r/image/Rprofile.site deleted file mode 100644 index 51f9daba8..000000000 --- a/deployments/dev-r/image/Rprofile.site +++ /dev/null @@ -1,23 +0,0 @@ -# Use RStudio's CRAN mirror to get binary packages. "latest" just means it has -# all available versions. We can specify version numbers in -# devtools::install_version. -options( - repos = c( - CRAN = "https://packagemanager.rstudio.com/all/__linux__/jammy/latest" - ) -) - -# RStudio sets the HTTPUserAgent option which causes it to download binaries, -# rather than source, from the Posit Public Package Manager. In order for the -# RStudio terminal, Jupyter terminal, Jupyter R notebook, or anything else to -# do the same, we must manually set HTTPUserAgent. This speeds up package -# installation. -# We fake rstudio version because `RStudio.Version()$version` is only available -# when we are in RStudio. -options( - HTTPUserAgent = sprintf( - "RStudio Server (%s); R (%s)", - "2022.7.1.554", - paste(getRversion(), R.version$platform, R.version$arch, R.version$os) - ) -) diff --git a/deployments/dev-r/image/apt.txt b/deployments/dev-r/image/apt.txt deleted file mode 100644 index da9f3e4e1..000000000 --- a/deployments/dev-r/image/apt.txt +++ /dev/null @@ -1,37 +0,0 @@ -# our entry point -tini - -# useful utilities -tar -vim -less - -# for nbconvert, knitr, etc. -texlive-xetex -texlive-fonts-recommended -texlive-plain-generic -# provides FandolSong-Regular.otf for issue #2714 -texlive-lang-chinese - -# for notebook-as-pdf -libx11-xcb1 -libxtst6 -libxrandr2 -libasound2 -libpangocairo-1.0-0 -libatk1.0-0 -libatk-bridge2.0-0 -libgtk-3-0 -libnss3 -libxss1 -fonts-noto-color-emoji - -# for rstudio -#psmisc -#sudo -#libapparmor1 -lsb-release -libclang-dev - -# Used by littler for IRkernel::installspec -libzmq5 diff --git a/deployments/dev-r/image/class-libs.R b/deployments/dev-r/image/class-libs.R deleted file mode 100644 index 1c9343537..000000000 --- a/deployments/dev-r/image/class-libs.R +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env Rscript - -class_libs_install_version <- function(class_name, class_libs) { - print(paste("Installing packages for", class_name)) - for (i in seq(1, length(class_libs), 2)) { - installed_packages <- rownames(installed.packages()) - package_name = class_libs[i] - version = class_libs[i+1] - # Only install packages if they haven't already been installed! - # devtools doesn't do that by default - if (!package_name %in% installed_packages) { - print(paste("Installing", package_name, version)) - devtools::install_version(package_name, version, quiet=TRUE) - } else { - # FIXME: This ignores version incompatibilities :'( - print(paste("Not installing", package_name, " as it is already installed")) - } - } - print(paste("Done installing packages for", class_name)) -} diff --git a/deployments/dev-r/image/environment.yml b/deployments/dev-r/image/environment.yml deleted file mode 100644 index 0009398a9..000000000 --- a/deployments/dev-r/image/environment.yml +++ /dev/null @@ -1,19 +0,0 @@ -dependencies: -- python=3.10.* -- pip=23.1.* -- nodejs=18.* - -- jupyter-server-proxy==4.0.0 -- jupyter-rsession-proxy==2.2.0 - -- syncthing==1.23.5 -- pyppeteer==1.0.2 - -# for nbconvert -- pandoc==3.1.3 - -# for jupyter-tree-download -- zip==3.0 -- pip: - - -r infra-requirements.txt - - jupyter-shiny-proxy==1.1 diff --git a/deployments/dev-r/image/infra-requirements.txt b/deployments/dev-r/image/infra-requirements.txt deleted file mode 100644 index afa026881..000000000 --- a/deployments/dev-r/image/infra-requirements.txt +++ /dev/null @@ -1,31 +0,0 @@ -# WARNING: Original source at scripts/infra-packages/requirements.txt -# PLEASE DO NOT EDIT ELSEWHERE -# After editing scripts/infra-packages/requirements.txt, please run -# scripts/infra-packages/sync.bash. - -# This file pins versions of notebook related python packages we want -# across all hubs. This makes sure we don't need to upgrade them -# everwhere one by one. - -# FIXME: Freeze this to get exact versions of all dependencies -notebook==6.4.12 -jupyterlab==3.4.5 -retrolab==0.3.21 -nbgitpuller==1.1.0 -jupyter-resource-usage==0.6.1 -# Matches version in images/hub/Dockerfile -jupyterhub==4.0.2 -appmode==0.8.0 -ipywidgets==7.7.2 -jupyter-tree-download==1.0.1 -git-credential-helpers==0.2 -# Enough people like this, let's load it in. -jupyter-contrib-nbextensions==0.5.1 -jupyter_nbextensions_configurator==0.4.1 -# Measure popularity of different packages in our hubs -# https://discourse.jupyter.org/t/request-for-implementation-instrument-libraries-actively-used-by-users-on-a-jupyterhub/7994?u=yuvipanda -popularity-contest==0.4.1 -# RISE is useful for presentations - see https://github.com/berkeley-dsep-infra/datahub/issues/2527 -RISE==5.7.1 -# syncthing for dropbox-like functionality -jupyter-syncthing-proxy==1.0.3 diff --git a/deployments/dev-r/image/install-mambaforge.bash b/deployments/dev-r/image/install-mambaforge.bash deleted file mode 100755 index 2609d7ad8..000000000 --- a/deployments/dev-r/image/install-mambaforge.bash +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# -# Download and install a pinned version of mambaforge. -# - -set -ex - -cd $(dirname $0) -MAMBAFORGE_VERSION=23.1.0-1 - -URL="https://github.com/conda-forge/miniforge/releases/download/${MAMBAFORGE_VERSION}/Mambaforge-${MAMBAFORGE_VERSION}-Linux-x86_64.sh" -INSTALLER_PATH=/tmp/mambaforge-installer.sh - -wget --quiet $URL -O ${INSTALLER_PATH} -chmod +x ${INSTALLER_PATH} - -bash ${INSTALLER_PATH} -b -p ${CONDA_DIR} -export PATH="${CONDA_DIR}/bin:$PATH" - -# Do not attempt to auto update conda or dependencies -conda config --system --set auto_update_conda false -conda config --system --set show_channel_urls true - -# Empty the conda history file, which seems to result in some effective pinning -# of packages in the initial env, which we don't intend. This file must not be -# removed. -> ${CONDA_DIR}/conda-meta/history - -# Clean things out! -conda clean --all -f -y - -# Remove the big installer so we don't increase docker image size too much -rm ${INSTALLER_PATH} - -# Remove the pip cache created as part of installing mambaforge -rm -rf ${HOME}/.cache - -chown -R $NB_USER:$NB_USER ${CONDA_DIR} - -conda list -n root diff --git a/deployments/dev-r/image/install.R b/deployments/dev-r/image/install.R deleted file mode 100755 index 4888be50a..000000000 --- a/deployments/dev-r/image/install.R +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env r - -# Install devtools so we can install versioned packages -#install.packages("devtools") - -source("/tmp/class-libs.R") - -# R packages to be installed that aren't from apt -# Combination of informal requests & rocker image suggestions -# Some of these were already in datahub image -cran_packages = c( - "BiocManager", "1.30.21", - "IRkernel", "1.3.2", - "rmarkdown", "2.22", - "shiny", "1.7.4" -) - -class_libs_install_version("Base packages", cran_packages) diff --git a/deployments/dev-r/image/rsession.conf b/deployments/dev-r/image/rsession.conf deleted file mode 100644 index 1f82b6b54..000000000 --- a/deployments/dev-r/image/rsession.conf +++ /dev/null @@ -1,2 +0,0 @@ -# Use binary packages! -r-cran-repos=https://packagemanager.rstudio.com/all/__linux__/jammy/latest From 77d598a50e8d9e4dbafcaebb7e84a64d5e9a44b4 Mon Sep 17 00:00:00 2001 From: Ryan Lovett Date: Tue, 3 Oct 2023 14:43:23 -0700 Subject: [PATCH 05/13] Install infra-requirements.txt in images subdirs. This accounts for both datahub and dev-r, and un-special-cases datahub. --- .../dev-r/images/secondary/Dockerfile- | 135 ------------------ scripts/infra-packages/sync.bash | 11 +- 2 files changed, 8 insertions(+), 138 deletions(-) delete mode 100644 deployments/dev-r/images/secondary/Dockerfile- diff --git a/deployments/dev-r/images/secondary/Dockerfile- b/deployments/dev-r/images/secondary/Dockerfile- deleted file mode 100644 index d07ac7202..000000000 --- a/deployments/dev-r/images/secondary/Dockerfile- +++ /dev/null @@ -1,135 +0,0 @@ -FROM buildpack-deps:jammy-scm as base - -# Set up common env variables -ENV TZ=America/Los_Angeles -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US.UTF-8 -ENV DEBIAN_FRONTEND=noninteractive -ENV NB_USER jovyan -ENV NB_UID 1000 -# These are used by the python, R, and final stages -ENV CONDA_DIR /opt/conda -ENV R_LIBS_USER /opt/r - -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes locales && \ - echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ - locale-gen - -RUN adduser --disabled-password --gecos "Default Jupyter user" ${NB_USER} - -# Install all apt packages -COPY apt.txt /tmp/apt.txt -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes --no-install-recommends \ - $(grep -v ^# /tmp/apt.txt) && \ - apt-get -qq purge && \ - apt-get -qq clean && \ - rm -rf /var/lib/apt/lists/* - -# Install R. -# These packages must be installed into the base stage since they are in system -# paths rather than /opt. -# Pre-built R packages from rspm are built against system libs in jammy. -ENV R_VERSION=4.3.1-1.2204.0 -ENV LITTLER_VERSION=0.3.18-2.2204.0 -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 -RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" > /etc/apt/sources.list.d/cran.list -RUN curl --silent --location --fail https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc -RUN apt-get update -qq --yes > /dev/null && \ - apt-get install --yes -qq \ - r-base-core=${R_VERSION} \ - r-base-dev=${R_VERSION} \ - littler=${LITTLER_VERSION} > /dev/null - -ENV RSTUDIO_URL https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2023.06.0-421-amd64.deb -RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \ - apt install --no-install-recommends --yes /tmp/rstudio.deb && \ - rm /tmp/rstudio.deb - -ENV SHINY_SERVER_URL https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.20.1002-amd64.deb -RUN curl --silent --location --fail ${SHINY_SERVER_URL} > /tmp/shiny.deb && \ - apt install --no-install-recommends --yes /tmp/shiny.deb && \ - rm /tmp/shiny.deb - -# Set CRAN mirror to rspm before we install anything -COPY Rprofile.site /usr/lib/R/etc/Rprofile.site -# RStudio needs its own config -COPY rsession.conf /etc/rstudio/rsession.conf - -# R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads. -# We uncomment the default, and set what we wanna - so it picks up -# the packages we install. Without this, RStudio doesn't see the packages -# that R does. -# Stolen from https://github.com/jupyterhub/repo2docker/blob/6a07a48b2df48168685bb0f993d2a12bd86e23bf/repo2docker/buildpacks/r.py -# To try fight https://community.rstudio.com/t/timedatectl-had-status-1/72060, -# which shows up sometimes when trying to install packages that want the TZ -# timedatectl expects systemd running, which isn't true in our containers -RUN sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \ - echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron && \ - echo "TZ=${TZ}" >> /etc/R/Renviron - -# ============================================================================= -# This stage exists to build /opt/r. -FROM base as opt-r - -# Create user owned R libs dir -# This lets users temporarily install packages -RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER} - -# Install R libraries as our user -USER ${NB_USER} - -COPY class-libs.R /tmp/class-libs.R -RUN mkdir -p /tmp/r-packages - -# Our install.R needs devtools which needs install2.r which needs docopt. -# install2.r is not reproducible, but our install.R script is. -RUN Rscript -e "install.packages('docopt')" -RUN /usr/lib/R/site-library/littler/examples/install2.r devtools - -# Install all our base R packages -COPY install.R /tmp/install.R -RUN /tmp/install.R && rm -rf /tmp/downloaded_packages - -# ============================================================================= -# This stage exists to build /opt/conda. -FROM base as opt-conda - -COPY install-mambaforge.bash /tmp/install-mambaforge.bash -RUN /tmp/install-mambaforge.bash - -# Install conda environment as our user -USER ${NB_USER} - -ENV PATH ${CONDA_DIR}/bin:$PATH - -COPY infra-requirements.txt /tmp/infra-requirements.txt -COPY environment.yml /tmp/environment.yml - -RUN mamba env update -p ${CONDA_DIR} -f /tmp/environment.yml && \ - mamba clean -afy - -# ============================================================================= -# This stage consumes base and import /opt/r and /opt/conda. -FROM base as final -COPY --from=opt-r /opt/r /opt/r -COPY --from=opt-conda /opt/conda /opt/conda - -# Install IR kernelspec. Requires python and R. -ENV PATH ${CONDA_DIR}/bin:${PATH}:${R_LIBS_USER}/bin -RUN R -e "IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')" - -# clear out /tmp -USER root -RUN rm -rf /tmp/* - -USER ${NB_USER} -WORKDIR /home/${NB_USER} - -EXPOSE 8888 - -ENTRYPOINT ["tini", "--"] diff --git a/scripts/infra-packages/sync.bash b/scripts/infra-packages/sync.bash index ada568010..6c49f6edd 100755 --- a/scripts/infra-packages/sync.bash +++ b/scripts/infra-packages/sync.bash @@ -4,7 +4,12 @@ set -euxo pipefail SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" ROOT_DIR="${SCRIPT_DIR}/../.." -find "${ROOT_DIR}/deployments" -type d -name 'image' -exec cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; +# install file in deployment directories named "image" +find "${ROOT_DIR}/deployments" -type d -name 'image' \ + -exec echo cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; -# FIXME: Don't specialcase datahub! -cp ${SCRIPT_DIR}/requirements.txt ${ROOT_DIR}/deployments/datahub/images/default/infra-requirements.txt \ No newline at end of file +# install file in subdirectories of deployment directories named "images" +for d in $(find "${ROOT_DIR}/deployments" -type d -name images); do + find $d -not -name images -maxdepth 1 -type d \ + -exec echo cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; +done From f253795036b8f9312246e1855a97e2561654aea2 Mon Sep 17 00:00:00 2001 From: Ryan Lovett Date: Tue, 3 Oct 2023 14:45:35 -0700 Subject: [PATCH 06/13] Don't include debug echo. --- scripts/infra-packages/sync.bash | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/infra-packages/sync.bash b/scripts/infra-packages/sync.bash index 6c49f6edd..26f1b812a 100755 --- a/scripts/infra-packages/sync.bash +++ b/scripts/infra-packages/sync.bash @@ -6,10 +6,10 @@ ROOT_DIR="${SCRIPT_DIR}/../.." # install file in deployment directories named "image" find "${ROOT_DIR}/deployments" -type d -name 'image' \ - -exec echo cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; + -exec cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; # install file in subdirectories of deployment directories named "images" for d in $(find "${ROOT_DIR}/deployments" -type d -name images); do find $d -not -name images -maxdepth 1 -type d \ - -exec echo cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; + -exec cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; done From a3399df5662bb2e79532788b09a2fc220e73b80d Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 14:24:18 -0700 Subject: [PATCH 07/13] address comments, quick bugfix w/indentation --- scripts/delete-unused-users.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index bbfe1214f..fbfe41b9a 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -15,15 +15,15 @@ https://discourse.jupyter.org/t/is-there-a-way-to-bulk-delete-old-users/20866/3 """ import argparse -import asyncio from datetime import timedelta, datetime -from dateutil.parser import parse -from jhub_client.api import JupyterHubAPI import logging import os import requests import sys +from dateutil.parser import parse +from jhub_client.api import JupyterHubAPI + logging.basicConfig(stream=sys.stdout, level=logging.WARNING) logger = logging.getLogger(__name__) @@ -103,10 +103,10 @@ def main(args): count = 1 for user in list(retrieve_users(args.hub_url)): print(f"{count}: deleting {user['name']}") + count += 1 if not args.dry_run: delete_user(args.hub_url, user['name']) - count += 1 - else: + else: logger.warning(f"Skipped {user['name']} due to dry run.") # await delete_user(hub, user, count) @@ -127,12 +127,14 @@ def main(args): ) argparser.add_argument( '-v', + '--verbose', dest='verbose', action='store_true', help='Set info log level' ) argparser.add_argument( '-d', + '--debug', dest='debug', action='store_true', help='Set debug log level' From c2de00c84c13eba72cf324c24bc25d291c4b893a Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 14:25:53 -0700 Subject: [PATCH 08/13] at the end of the run, state exactly where these users were deleted from --- scripts/delete-unused-users.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index fbfe41b9a..243ad5260 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -111,7 +111,7 @@ def main(args): # await delete_user(hub, user, count) count -= 1 - print(f"Deleted {count} total users.") + print(f"Deleted {count} total users from the ORM.") if __name__ == "__main__": argparser = argparse.ArgumentParser() From df941bdd176dcfc85d8dd42c618f68a8e7c4a696 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 14:36:18 -0700 Subject: [PATCH 09/13] update docs --- docs/admins/howto/remove-users-orm.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/admins/howto/remove-users-orm.rst b/docs/admins/howto/remove-users-orm.rst index 538f9b6bc..ab11565dc 100644 --- a/docs/admins/howto/remove-users-orm.rst +++ b/docs/admins/howto/remove-users-orm.rst @@ -25,6 +25,4 @@ You can run the script on your own device. The script depends on the `jhub_clien #. You will need to acquire a JupyterHub API token with administrative rights. A hub admin can go to {hub_url}/hub/token to create a new one. #. Set the environment variable `JUPYTERHUB_API_TOKEN` to the token. -#. Run `python scripts/delete-unused-users.py {hub_url}` - -The script currently does not paginate properly, meaning that it operates on the first 200 users provided by the hub. If there are less then 200 active users it is sufficient to keep running the script in a loop until all inactive users are removed. If there are more than 200 active users this procedure will be inadequate. (the script needs to be fixed!) +#. Run `python scripts/delete-unused-users.py --hub_url {hub_url}` From 015ae85f845860e49d09e1254a4ed582483b0e5b Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 14:37:42 -0700 Subject: [PATCH 10/13] add short form arg for hub url --- scripts/delete-unused-users.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 243ad5260..9c23445b9 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -116,6 +116,7 @@ def main(args): if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( + '-h', '--hub_url', help='Fully qualified URL to the JupyterHub', required=True From 57b8f8eb06fe5ffbd3d64f6fdab934c6e1780136 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 15:07:49 -0700 Subject: [PATCH 11/13] remove an old commented out line --- scripts/delete-unused-users.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 9c23445b9..15f46faff 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -108,7 +108,6 @@ def main(args): delete_user(args.hub_url, user['name']) else: logger.warning(f"Skipped {user['name']} due to dry run.") - # await delete_user(hub, user, count) count -= 1 print(f"Deleted {count} total users from the ORM.") From d78c17ecaf8e3d65e9088df8b1807519237e5cd7 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 16:54:31 -0700 Subject: [PATCH 12/13] bump data101 memory --- deployments/data101/config/common.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployments/data101/config/common.yaml b/deployments/data101/config/common.yaml index 1de2f50c3..c0936d096 100644 --- a/deployments/data101/config/common.yaml +++ b/deployments/data101/config/common.yaml @@ -189,8 +189,8 @@ jupyterhub: - name: mongodb mountPath: /data/db memory: - guarantee: 2G - limit: 2G + guarantee: 4G + limit: 4G custom: group_profiles: From 0751f4911c2af3632e9c6d113344b6338a10aa51 Mon Sep 17 00:00:00 2001 From: Jonathan Felder Date: Wed, 4 Oct 2023 17:18:21 -0700 Subject: [PATCH 13/13] [DH-130] increasing data101 again from 4G to 5wqG per user to more loosely pack --- deployments/data101/config/common.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployments/data101/config/common.yaml b/deployments/data101/config/common.yaml index c0936d096..92c590f34 100644 --- a/deployments/data101/config/common.yaml +++ b/deployments/data101/config/common.yaml @@ -189,8 +189,8 @@ jupyterhub: - name: mongodb mountPath: /data/db memory: - guarantee: 4G - limit: 4G + guarantee: 5G + limit: 5G custom: group_profiles: