diff --git a/deployments/data101/config/common.yaml b/deployments/data101/config/common.yaml index 1de2f50c3..92c590f34 100644 --- a/deployments/data101/config/common.yaml +++ b/deployments/data101/config/common.yaml @@ -189,8 +189,8 @@ jupyterhub: - name: mongodb mountPath: /data/db memory: - guarantee: 2G - limit: 2G + guarantee: 5G + limit: 5G custom: group_profiles: diff --git a/deployments/dev-r/image/Dockerfile b/deployments/dev-r/image/Dockerfile deleted file mode 100644 index d07ac7202..000000000 --- a/deployments/dev-r/image/Dockerfile +++ /dev/null @@ -1,135 +0,0 @@ -FROM buildpack-deps:jammy-scm as base - -# Set up common env variables -ENV TZ=America/Los_Angeles -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US.UTF-8 -ENV DEBIAN_FRONTEND=noninteractive -ENV NB_USER jovyan -ENV NB_UID 1000 -# These are used by the python, R, and final stages -ENV CONDA_DIR /opt/conda -ENV R_LIBS_USER /opt/r - -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes locales && \ - echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ - locale-gen - -RUN adduser --disabled-password --gecos "Default Jupyter user" ${NB_USER} - -# Install all apt packages -COPY apt.txt /tmp/apt.txt -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes --no-install-recommends \ - $(grep -v ^# /tmp/apt.txt) && \ - apt-get -qq purge && \ - apt-get -qq clean && \ - rm -rf /var/lib/apt/lists/* - -# Install R. -# These packages must be installed into the base stage since they are in system -# paths rather than /opt. -# Pre-built R packages from rspm are built against system libs in jammy. -ENV R_VERSION=4.3.1-1.2204.0 -ENV LITTLER_VERSION=0.3.18-2.2204.0 -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 -RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" > /etc/apt/sources.list.d/cran.list -RUN curl --silent --location --fail https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc -RUN apt-get update -qq --yes > /dev/null && \ - apt-get install --yes -qq \ - r-base-core=${R_VERSION} \ - r-base-dev=${R_VERSION} \ - littler=${LITTLER_VERSION} > /dev/null - -ENV RSTUDIO_URL https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2023.06.0-421-amd64.deb -RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \ - apt install --no-install-recommends --yes /tmp/rstudio.deb && \ - rm /tmp/rstudio.deb - -ENV SHINY_SERVER_URL https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.20.1002-amd64.deb -RUN curl --silent --location --fail ${SHINY_SERVER_URL} > /tmp/shiny.deb && \ - apt install --no-install-recommends --yes /tmp/shiny.deb && \ - rm /tmp/shiny.deb - -# Set CRAN mirror to rspm before we install anything -COPY Rprofile.site /usr/lib/R/etc/Rprofile.site -# RStudio needs its own config -COPY rsession.conf /etc/rstudio/rsession.conf - -# R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads. -# We uncomment the default, and set what we wanna - so it picks up -# the packages we install. Without this, RStudio doesn't see the packages -# that R does. -# Stolen from https://github.com/jupyterhub/repo2docker/blob/6a07a48b2df48168685bb0f993d2a12bd86e23bf/repo2docker/buildpacks/r.py -# To try fight https://community.rstudio.com/t/timedatectl-had-status-1/72060, -# which shows up sometimes when trying to install packages that want the TZ -# timedatectl expects systemd running, which isn't true in our containers -RUN sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \ - echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron && \ - echo "TZ=${TZ}" >> /etc/R/Renviron - -# ============================================================================= -# This stage exists to build /opt/r. -FROM base as opt-r - -# Create user owned R libs dir -# This lets users temporarily install packages -RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER} - -# Install R libraries as our user -USER ${NB_USER} - -COPY class-libs.R /tmp/class-libs.R -RUN mkdir -p /tmp/r-packages - -# Our install.R needs devtools which needs install2.r which needs docopt. -# install2.r is not reproducible, but our install.R script is. -RUN Rscript -e "install.packages('docopt')" -RUN /usr/lib/R/site-library/littler/examples/install2.r devtools - -# Install all our base R packages -COPY install.R /tmp/install.R -RUN /tmp/install.R && rm -rf /tmp/downloaded_packages - -# ============================================================================= -# This stage exists to build /opt/conda. -FROM base as opt-conda - -COPY install-mambaforge.bash /tmp/install-mambaforge.bash -RUN /tmp/install-mambaforge.bash - -# Install conda environment as our user -USER ${NB_USER} - -ENV PATH ${CONDA_DIR}/bin:$PATH - -COPY infra-requirements.txt /tmp/infra-requirements.txt -COPY environment.yml /tmp/environment.yml - -RUN mamba env update -p ${CONDA_DIR} -f /tmp/environment.yml && \ - mamba clean -afy - -# ============================================================================= -# This stage consumes base and import /opt/r and /opt/conda. -FROM base as final -COPY --from=opt-r /opt/r /opt/r -COPY --from=opt-conda /opt/conda /opt/conda - -# Install IR kernelspec. Requires python and R. -ENV PATH ${CONDA_DIR}/bin:${PATH}:${R_LIBS_USER}/bin -RUN R -e "IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')" - -# clear out /tmp -USER root -RUN rm -rf /tmp/* - -USER ${NB_USER} -WORKDIR /home/${NB_USER} - -EXPOSE 8888 - -ENTRYPOINT ["tini", "--"] diff --git a/deployments/dev-r/image/Rprofile.site b/deployments/dev-r/image/Rprofile.site deleted file mode 100644 index 51f9daba8..000000000 --- a/deployments/dev-r/image/Rprofile.site +++ /dev/null @@ -1,23 +0,0 @@ -# Use RStudio's CRAN mirror to get binary packages. "latest" just means it has -# all available versions. We can specify version numbers in -# devtools::install_version. -options( - repos = c( - CRAN = "https://packagemanager.rstudio.com/all/__linux__/jammy/latest" - ) -) - -# RStudio sets the HTTPUserAgent option which causes it to download binaries, -# rather than source, from the Posit Public Package Manager. In order for the -# RStudio terminal, Jupyter terminal, Jupyter R notebook, or anything else to -# do the same, we must manually set HTTPUserAgent. This speeds up package -# installation. -# We fake rstudio version because `RStudio.Version()$version` is only available -# when we are in RStudio. -options( - HTTPUserAgent = sprintf( - "RStudio Server (%s); R (%s)", - "2022.7.1.554", - paste(getRversion(), R.version$platform, R.version$arch, R.version$os) - ) -) diff --git a/deployments/dev-r/image/apt.txt b/deployments/dev-r/image/apt.txt deleted file mode 100644 index da9f3e4e1..000000000 --- a/deployments/dev-r/image/apt.txt +++ /dev/null @@ -1,37 +0,0 @@ -# our entry point -tini - -# useful utilities -tar -vim -less - -# for nbconvert, knitr, etc. -texlive-xetex -texlive-fonts-recommended -texlive-plain-generic -# provides FandolSong-Regular.otf for issue #2714 -texlive-lang-chinese - -# for notebook-as-pdf -libx11-xcb1 -libxtst6 -libxrandr2 -libasound2 -libpangocairo-1.0-0 -libatk1.0-0 -libatk-bridge2.0-0 -libgtk-3-0 -libnss3 -libxss1 -fonts-noto-color-emoji - -# for rstudio -#psmisc -#sudo -#libapparmor1 -lsb-release -libclang-dev - -# Used by littler for IRkernel::installspec -libzmq5 diff --git a/deployments/dev-r/image/class-libs.R b/deployments/dev-r/image/class-libs.R deleted file mode 100644 index 1c9343537..000000000 --- a/deployments/dev-r/image/class-libs.R +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env Rscript - -class_libs_install_version <- function(class_name, class_libs) { - print(paste("Installing packages for", class_name)) - for (i in seq(1, length(class_libs), 2)) { - installed_packages <- rownames(installed.packages()) - package_name = class_libs[i] - version = class_libs[i+1] - # Only install packages if they haven't already been installed! - # devtools doesn't do that by default - if (!package_name %in% installed_packages) { - print(paste("Installing", package_name, version)) - devtools::install_version(package_name, version, quiet=TRUE) - } else { - # FIXME: This ignores version incompatibilities :'( - print(paste("Not installing", package_name, " as it is already installed")) - } - } - print(paste("Done installing packages for", class_name)) -} diff --git a/deployments/dev-r/image/environment.yml b/deployments/dev-r/image/environment.yml deleted file mode 100644 index 0009398a9..000000000 --- a/deployments/dev-r/image/environment.yml +++ /dev/null @@ -1,19 +0,0 @@ -dependencies: -- python=3.10.* -- pip=23.1.* -- nodejs=18.* - -- jupyter-server-proxy==4.0.0 -- jupyter-rsession-proxy==2.2.0 - -- syncthing==1.23.5 -- pyppeteer==1.0.2 - -# for nbconvert -- pandoc==3.1.3 - -# for jupyter-tree-download -- zip==3.0 -- pip: - - -r infra-requirements.txt - - jupyter-shiny-proxy==1.1 diff --git a/deployments/dev-r/image/infra-requirements.txt b/deployments/dev-r/image/infra-requirements.txt deleted file mode 100644 index afa026881..000000000 --- a/deployments/dev-r/image/infra-requirements.txt +++ /dev/null @@ -1,31 +0,0 @@ -# WARNING: Original source at scripts/infra-packages/requirements.txt -# PLEASE DO NOT EDIT ELSEWHERE -# After editing scripts/infra-packages/requirements.txt, please run -# scripts/infra-packages/sync.bash. - -# This file pins versions of notebook related python packages we want -# across all hubs. This makes sure we don't need to upgrade them -# everwhere one by one. - -# FIXME: Freeze this to get exact versions of all dependencies -notebook==6.4.12 -jupyterlab==3.4.5 -retrolab==0.3.21 -nbgitpuller==1.1.0 -jupyter-resource-usage==0.6.1 -# Matches version in images/hub/Dockerfile -jupyterhub==4.0.2 -appmode==0.8.0 -ipywidgets==7.7.2 -jupyter-tree-download==1.0.1 -git-credential-helpers==0.2 -# Enough people like this, let's load it in. -jupyter-contrib-nbextensions==0.5.1 -jupyter_nbextensions_configurator==0.4.1 -# Measure popularity of different packages in our hubs -# https://discourse.jupyter.org/t/request-for-implementation-instrument-libraries-actively-used-by-users-on-a-jupyterhub/7994?u=yuvipanda -popularity-contest==0.4.1 -# RISE is useful for presentations - see https://github.com/berkeley-dsep-infra/datahub/issues/2527 -RISE==5.7.1 -# syncthing for dropbox-like functionality -jupyter-syncthing-proxy==1.0.3 diff --git a/deployments/dev-r/image/install-mambaforge.bash b/deployments/dev-r/image/install-mambaforge.bash deleted file mode 100755 index 2609d7ad8..000000000 --- a/deployments/dev-r/image/install-mambaforge.bash +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# -# Download and install a pinned version of mambaforge. -# - -set -ex - -cd $(dirname $0) -MAMBAFORGE_VERSION=23.1.0-1 - -URL="https://github.com/conda-forge/miniforge/releases/download/${MAMBAFORGE_VERSION}/Mambaforge-${MAMBAFORGE_VERSION}-Linux-x86_64.sh" -INSTALLER_PATH=/tmp/mambaforge-installer.sh - -wget --quiet $URL -O ${INSTALLER_PATH} -chmod +x ${INSTALLER_PATH} - -bash ${INSTALLER_PATH} -b -p ${CONDA_DIR} -export PATH="${CONDA_DIR}/bin:$PATH" - -# Do not attempt to auto update conda or dependencies -conda config --system --set auto_update_conda false -conda config --system --set show_channel_urls true - -# Empty the conda history file, which seems to result in some effective pinning -# of packages in the initial env, which we don't intend. This file must not be -# removed. -> ${CONDA_DIR}/conda-meta/history - -# Clean things out! -conda clean --all -f -y - -# Remove the big installer so we don't increase docker image size too much -rm ${INSTALLER_PATH} - -# Remove the pip cache created as part of installing mambaforge -rm -rf ${HOME}/.cache - -chown -R $NB_USER:$NB_USER ${CONDA_DIR} - -conda list -n root diff --git a/deployments/dev-r/image/install.R b/deployments/dev-r/image/install.R deleted file mode 100755 index 4888be50a..000000000 --- a/deployments/dev-r/image/install.R +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env r - -# Install devtools so we can install versioned packages -#install.packages("devtools") - -source("/tmp/class-libs.R") - -# R packages to be installed that aren't from apt -# Combination of informal requests & rocker image suggestions -# Some of these were already in datahub image -cran_packages = c( - "BiocManager", "1.30.21", - "IRkernel", "1.3.2", - "rmarkdown", "2.22", - "shiny", "1.7.4" -) - -class_libs_install_version("Base packages", cran_packages) diff --git a/deployments/dev-r/image/rsession.conf b/deployments/dev-r/image/rsession.conf deleted file mode 100644 index 1f82b6b54..000000000 --- a/deployments/dev-r/image/rsession.conf +++ /dev/null @@ -1,2 +0,0 @@ -# Use binary packages! -r-cran-repos=https://packagemanager.rstudio.com/all/__linux__/jammy/latest diff --git a/deployments/dev-r/images/secondary/Dockerfile- b/deployments/dev-r/images/secondary/Dockerfile- deleted file mode 100644 index d07ac7202..000000000 --- a/deployments/dev-r/images/secondary/Dockerfile- +++ /dev/null @@ -1,135 +0,0 @@ -FROM buildpack-deps:jammy-scm as base - -# Set up common env variables -ENV TZ=America/Los_Angeles -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US.UTF-8 -ENV DEBIAN_FRONTEND=noninteractive -ENV NB_USER jovyan -ENV NB_UID 1000 -# These are used by the python, R, and final stages -ENV CONDA_DIR /opt/conda -ENV R_LIBS_USER /opt/r - -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes locales && \ - echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ - locale-gen - -RUN adduser --disabled-password --gecos "Default Jupyter user" ${NB_USER} - -# Install all apt packages -COPY apt.txt /tmp/apt.txt -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes --no-install-recommends \ - $(grep -v ^# /tmp/apt.txt) && \ - apt-get -qq purge && \ - apt-get -qq clean && \ - rm -rf /var/lib/apt/lists/* - -# Install R. -# These packages must be installed into the base stage since they are in system -# paths rather than /opt. -# Pre-built R packages from rspm are built against system libs in jammy. -ENV R_VERSION=4.3.1-1.2204.0 -ENV LITTLER_VERSION=0.3.18-2.2204.0 -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 -RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" > /etc/apt/sources.list.d/cran.list -RUN curl --silent --location --fail https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc -RUN apt-get update -qq --yes > /dev/null && \ - apt-get install --yes -qq \ - r-base-core=${R_VERSION} \ - r-base-dev=${R_VERSION} \ - littler=${LITTLER_VERSION} > /dev/null - -ENV RSTUDIO_URL https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2023.06.0-421-amd64.deb -RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \ - apt install --no-install-recommends --yes /tmp/rstudio.deb && \ - rm /tmp/rstudio.deb - -ENV SHINY_SERVER_URL https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.20.1002-amd64.deb -RUN curl --silent --location --fail ${SHINY_SERVER_URL} > /tmp/shiny.deb && \ - apt install --no-install-recommends --yes /tmp/shiny.deb && \ - rm /tmp/shiny.deb - -# Set CRAN mirror to rspm before we install anything -COPY Rprofile.site /usr/lib/R/etc/Rprofile.site -# RStudio needs its own config -COPY rsession.conf /etc/rstudio/rsession.conf - -# R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads. -# We uncomment the default, and set what we wanna - so it picks up -# the packages we install. Without this, RStudio doesn't see the packages -# that R does. -# Stolen from https://github.com/jupyterhub/repo2docker/blob/6a07a48b2df48168685bb0f993d2a12bd86e23bf/repo2docker/buildpacks/r.py -# To try fight https://community.rstudio.com/t/timedatectl-had-status-1/72060, -# which shows up sometimes when trying to install packages that want the TZ -# timedatectl expects systemd running, which isn't true in our containers -RUN sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \ - echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron && \ - echo "TZ=${TZ}" >> /etc/R/Renviron - -# ============================================================================= -# This stage exists to build /opt/r. -FROM base as opt-r - -# Create user owned R libs dir -# This lets users temporarily install packages -RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER} - -# Install R libraries as our user -USER ${NB_USER} - -COPY class-libs.R /tmp/class-libs.R -RUN mkdir -p /tmp/r-packages - -# Our install.R needs devtools which needs install2.r which needs docopt. -# install2.r is not reproducible, but our install.R script is. -RUN Rscript -e "install.packages('docopt')" -RUN /usr/lib/R/site-library/littler/examples/install2.r devtools - -# Install all our base R packages -COPY install.R /tmp/install.R -RUN /tmp/install.R && rm -rf /tmp/downloaded_packages - -# ============================================================================= -# This stage exists to build /opt/conda. -FROM base as opt-conda - -COPY install-mambaforge.bash /tmp/install-mambaforge.bash -RUN /tmp/install-mambaforge.bash - -# Install conda environment as our user -USER ${NB_USER} - -ENV PATH ${CONDA_DIR}/bin:$PATH - -COPY infra-requirements.txt /tmp/infra-requirements.txt -COPY environment.yml /tmp/environment.yml - -RUN mamba env update -p ${CONDA_DIR} -f /tmp/environment.yml && \ - mamba clean -afy - -# ============================================================================= -# This stage consumes base and import /opt/r and /opt/conda. -FROM base as final -COPY --from=opt-r /opt/r /opt/r -COPY --from=opt-conda /opt/conda /opt/conda - -# Install IR kernelspec. Requires python and R. -ENV PATH ${CONDA_DIR}/bin:${PATH}:${R_LIBS_USER}/bin -RUN R -e "IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')" - -# clear out /tmp -USER root -RUN rm -rf /tmp/* - -USER ${NB_USER} -WORKDIR /home/${NB_USER} - -EXPOSE 8888 - -ENTRYPOINT ["tini", "--"] diff --git a/docs/admins/howto/remove-users-orm.rst b/docs/admins/howto/remove-users-orm.rst index 538f9b6bc..ab11565dc 100644 --- a/docs/admins/howto/remove-users-orm.rst +++ b/docs/admins/howto/remove-users-orm.rst @@ -25,6 +25,4 @@ You can run the script on your own device. The script depends on the `jhub_clien #. You will need to acquire a JupyterHub API token with administrative rights. A hub admin can go to {hub_url}/hub/token to create a new one. #. Set the environment variable `JUPYTERHUB_API_TOKEN` to the token. -#. Run `python scripts/delete-unused-users.py {hub_url}` - -The script currently does not paginate properly, meaning that it operates on the first 200 users provided by the hub. If there are less then 200 active users it is sufficient to keep running the script in a loop until all inactive users are removed. If there are more than 200 active users this procedure will be inadequate. (the script needs to be fixed!) +#. Run `python scripts/delete-unused-users.py --hub_url {hub_url}` diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 1192750e5..15f46faff 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -10,60 +10,140 @@ make sure they aren't active right now. This will require users to log in again the next time they use the hub, but that's probably ok. + +Core functionality from @minrk: +https://discourse.jupyter.org/t/is-there-a-way-to-bulk-delete-old-users/20866/3 """ import argparse -from jhub_client.api import JupyterHubAPI -from dateutil.parser import parse -import asyncio from datetime import timedelta, datetime +import logging +import os +import requests +import sys + +from dateutil.parser import parse +from jhub_client.api import JupyterHubAPI + +logging.basicConfig(stream=sys.stdout, level=logging.WARNING) +logger = logging.getLogger(__name__) + +token = os.environ["JUPYTERHUB_API_TOKEN"] +headers = { + "Accept": "application/jupyterhub-pagination+json", + "Authorization": f"Bearer {token}", +} + +def retrieve_users(hub_url): + """Returns generator of user models that should be deleted""" + url = hub_url.rstrip("/") + "/hub/api/users" + next_page = True + params = {} + + while next_page: + r = requests.get(url, headers=headers, params=params) + r.raise_for_status() + resp = r.json() + user_list = resp["items"] + for user in user_list: + # only yield users that should be deleted + if should_delete(user): + yield user + + pagination = resp["_pagination"] + next_page = pagination["next"] + if next_page: + params = { + "offset": next_page["offset"], + "limit": next_page["limit"], + } + +def should_delete(user): + """ + Returns a boolean if user is to be deleted. The critera are: + - was the user active in the past 24 hours? + - is there a current user server running? + """ + last_activity_str = user.get('last_activity', False) + if last_activity_str: + try: + last_activity = parse(user['last_activity']) + except: + logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}") + raise + if isinstance(last_activity, datetime): + was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) + else: + logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") + raise -async def main(): + logger.debug(f"User: {user['name']}") + logger.debug(f"Last login: {last_activity}") + logger.debug(f"24hrs since last login: {was_active_last_day}") + logger.debug(f"Running server: {user['server']}") + if was_active_last_day or user['server'] is not None: + logger.info(f"Not deleting {user['name']}") + return False + else: + logger.info(f"Flagged {user['name']} for deletion.") + return True + +def delete_user(hub_url, name): + """Delete a given user by name via JupyterHub API""" + r = requests.delete( + hub_url.rstrip("/") + f"/hub/api/users/{name}", + headers=headers, + ) + r.raise_for_status() + +def main(args): + """ + Get users from a hub, check to see if they should be deleted from the ORM + and if so, delete them! + """ + count = 1 + for user in list(retrieve_users(args.hub_url)): + print(f"{count}: deleting {user['name']}") + count += 1 + if not args.dry_run: + delete_user(args.hub_url, user['name']) + else: + logger.warning(f"Skipped {user['name']} due to dry run.") + + count -= 1 + print(f"Deleted {count} total users from the ORM.") + +if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( - 'hub_url', - help='Fully qualified URL to the JupyterHub' + '-h', + '--hub_url', + help='Fully qualified URL to the JupyterHub', + required=True ) argparser.add_argument( '--dry_run', action='store_true', help='Dry run without deleting users' ) + argparser.add_argument( + '-v', + '--verbose', + dest='verbose', + action='store_true', + help='Set info log level' + ) + argparser.add_argument( + '-d', + '--debug', + dest='debug', + action='store_true', + help='Set debug log level' + ) args = argparser.parse_args() - to_delete = [] - async with JupyterHubAPI(hub_url=args.hub_url) as hub: - users = await hub.list_users() - for user in users: - last_activity_str = user.get('last_activity', False) - if last_activity_str: - try: - last_activity = parse(user['last_activity']) - except: - print(user['last_activity']) - raise - if isinstance(last_activity, datetime): - was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) - else: - print(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") - raise - - print(f"User: {user['name']}") - print(f"Last login: {last_activity}") - print(f"24hrs since last login: {was_active_last_day}") - print(f"Running server: {user['server']}") - if was_active_last_day or user['server'] is not None: - print(f"Not deleting {user['name']}") - else: - to_delete.append(user['name']) - print(f"Deleting {user['name']}") - print("") - - for i, username in enumerate(to_delete): - print(f'{i+1} of {len(to_delete)}: deleting {username}') - if not args.dry_run: - await hub.delete_user(username) - else: - print('Skipped due to dry run.') - -if __name__ == '__main__': - asyncio.run(main()) + if args.verbose: + logger.setLevel(logging.INFO) + elif args.debug: + logger.setLevel(logging.DEBUG) + + main(args) diff --git a/scripts/infra-packages/sync.bash b/scripts/infra-packages/sync.bash index ada568010..26f1b812a 100755 --- a/scripts/infra-packages/sync.bash +++ b/scripts/infra-packages/sync.bash @@ -4,7 +4,12 @@ set -euxo pipefail SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" ROOT_DIR="${SCRIPT_DIR}/../.." -find "${ROOT_DIR}/deployments" -type d -name 'image' -exec cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; +# install file in deployment directories named "image" +find "${ROOT_DIR}/deployments" -type d -name 'image' \ + -exec cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; -# FIXME: Don't specialcase datahub! -cp ${SCRIPT_DIR}/requirements.txt ${ROOT_DIR}/deployments/datahub/images/default/infra-requirements.txt \ No newline at end of file +# install file in subdirectories of deployment directories named "images" +for d in $(find "${ROOT_DIR}/deployments" -type d -name images); do + find $d -not -name images -maxdepth 1 -type d \ + -exec cp "${SCRIPT_DIR}/requirements.txt" {}/infra-requirements.txt \; +done