From b2bf1bff937b29f692ecb8577d82e89bd34c7dbe Mon Sep 17 00:00:00 2001 From: Moritz Schlarb Date: Thu, 5 Oct 2023 09:03:15 +0200 Subject: [PATCH 01/11] Delete unused import JupyterHubAPI --- scripts/delete-unused-users.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 15f46faff..a7898028f 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -22,7 +22,6 @@ import sys from dateutil.parser import parse -from jhub_client.api import JupyterHubAPI logging.basicConfig(stream=sys.stdout, level=logging.WARNING) logger = logging.getLogger(__name__) From a782546c1fcc0bce31be5810423c53fdce083bca Mon Sep 17 00:00:00 2001 From: Moritz Schlarb Date: Thu, 5 Oct 2023 09:11:30 +0200 Subject: [PATCH 02/11] Change --hub_url short option string to -H Otherwise, I get: Traceback (most recent call last): File "/tmp/datahub/scripts/./delete-unused-users.py", line 116, in argparser.add_argument( File "/usr/lib/python3.11/argparse.py", line 1480, in add_argument return self._add_action(action) ^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/argparse.py", line 1862, in _add_action self._optionals._add_action(action) File "/usr/lib/python3.11/argparse.py", line 1682, in _add_action action = super(_ArgumentGroup, self)._add_action(action) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/argparse.py", line 1494, in _add_action self._check_conflict(action) File "/usr/lib/python3.11/argparse.py", line 1631, in _check_conflict conflict_handler(action, confl_optionals) File "/usr/lib/python3.11/argparse.py", line 1640, in _handle_conflict_error raise ArgumentError(action, message % conflict_string) argparse.ArgumentError: argument -h/--hub_url: conflicting option string: -h --- scripts/delete-unused-users.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index a7898028f..276db06fb 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -114,7 +114,7 @@ def main(args): if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( - '-h', + '-H', '--hub_url', help='Fully qualified URL to the JupyterHub', required=True From e790e881e60bf80ef8e2d8bfcabea62763141109 Mon Sep 17 00:00:00 2001 From: Moritz Schlarb Date: Thu, 5 Oct 2023 09:32:27 +0200 Subject: [PATCH 03/11] add argument for inactive_since to specify other timedeltas --- scripts/delete-unused-users.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 276db06fb..1beed9fe9 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -32,7 +32,12 @@ "Authorization": f"Bearer {token}", } -def retrieve_users(hub_url): +def parse_timedelta(arg): + """Parse timedelta value from literal string constructor values""" + key, value = arg.split('=') + return timedelta(**{key: int(value)}) + +def retrieve_users(hub_url, inactive_since): """Returns generator of user models that should be deleted""" url = hub_url.rstrip("/") + "/hub/api/users" next_page = True @@ -45,7 +50,7 @@ def retrieve_users(hub_url): user_list = resp["items"] for user in user_list: # only yield users that should be deleted - if should_delete(user): + if should_delete(user, inactive_since): yield user pagination = resp["_pagination"] @@ -56,10 +61,10 @@ def retrieve_users(hub_url): "limit": next_page["limit"], } -def should_delete(user): +def should_delete(user, inactive_since): """ Returns a boolean if user is to be deleted. The critera are: - - was the user active in the past 24 hours? + - was the user active in the past inactive_since period? - is there a current user server running? """ last_activity_str = user.get('last_activity', False) @@ -70,16 +75,16 @@ def should_delete(user): logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}") raise if isinstance(last_activity, datetime): - was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) + was_active_recently = datetime.now().astimezone() - last_activity < inactive_since else: logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") raise logger.debug(f"User: {user['name']}") logger.debug(f"Last login: {last_activity}") - logger.debug(f"24hrs since last login: {was_active_last_day}") + logger.debug(f"Recent activity: {was_active_recently}") logger.debug(f"Running server: {user['server']}") - if was_active_last_day or user['server'] is not None: + if was_active_recently or user['server'] is not None: logger.info(f"Not deleting {user['name']}") return False else: @@ -100,7 +105,7 @@ def main(args): and if so, delete them! """ count = 1 - for user in list(retrieve_users(args.hub_url)): + for user in list(retrieve_users(args.hub_url, args.inactive_since)): print(f"{count}: deleting {user['name']}") count += 1 if not args.dry_run: @@ -124,6 +129,13 @@ def main(args): action='store_true', help='Dry run without deleting users' ) + argparser.add_argument( + '--inactive_since', + default='hours=24', + type=parse_timedelta, + help='Period of inactivity after which users are considered for deletion (literal string constructor values for timedelta objects)' + # https://docs.python.org/3/library/datetime.html#timedelta-objects + ) argparser.add_argument( '-v', '--verbose', @@ -144,5 +156,6 @@ def main(args): logger.setLevel(logging.INFO) elif args.debug: logger.setLevel(logging.DEBUG) + logger.debug(args) main(args) From cea580b7e54c00956ff9cd61b5c574bae49eed93 Mon Sep 17 00:00:00 2001 From: Moritz Schlarb Date: Thu, 5 Oct 2023 09:39:46 +0200 Subject: [PATCH 04/11] more sophisticated timedelta parsing --- scripts/delete-unused-users.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 1beed9fe9..f5285d311 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -32,10 +32,25 @@ "Authorization": f"Bearer {token}", } -def parse_timedelta(arg): - """Parse timedelta value from literal string constructor values""" - key, value = arg.split('=') - return timedelta(**{key: int(value)}) +def parse_timedelta(args): + """ + Parse timedelta value from literal string constructor values + + Trying to support all possible values like described in + https://docs.python.org/3/library/datetime.html#datetime.timedelta + """ + result = {} + for arg in args.split(','): + key, value = arg.split('=') + try: + value = int(value) + except ValueError: + try: + value = float(value) + except ValueError as e: + raise argparse.ArgumentError from e + result[key] = value + return timedelta(**result) def retrieve_users(hub_url, inactive_since): """Returns generator of user models that should be deleted""" From 1a64e0a30e57fb46a3dbd58f12e5126522a68f1e Mon Sep 17 00:00:00 2001 From: Ryan Lovett Date: Thu, 5 Oct 2023 12:45:58 -0700 Subject: [PATCH 05/11] Add jupyterlab-myst. Bump some other packages. --- deployments/dev-r/images/secondary/environment.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/deployments/dev-r/images/secondary/environment.yml b/deployments/dev-r/images/secondary/environment.yml index 3a1441db5..9f5511cd2 100644 --- a/deployments/dev-r/images/secondary/environment.yml +++ b/deployments/dev-r/images/secondary/environment.yml @@ -1,9 +1,10 @@ dependencies: - python=3.10 - pip=23.2.* -- jupyter-server-proxy==4.0.0 +- jupyter-server-proxy==4.1.0 - jupyter-rsession-proxy==2.2.0 -- syncthing==1.23.5 +- jupyterlab-myst==2.0.2 +- syncthing==1.25.0 - pyppeteer==1.0.2 # for nbconvert From e33bce9cc0239c334d40b5217245ad07dade79ef Mon Sep 17 00:00:00 2001 From: Balaji Alwar Date: Thu, 5 Oct 2023 13:28:23 -0700 Subject: [PATCH 06/11] Add xboost and randomforest to R hub image --- .../datahub/images/default/r-packages/2023-fall-mba-247.r | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r b/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r index 3bc83935b..31b6da47c 100644 --- a/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r +++ b/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r @@ -12,6 +12,8 @@ class_libs = c( "SnowballC", "0.7.1", "wordcloud", "2.6", "pROC", "1.18.4", - "rpart.plot", "3.1.1" + "rpart.plot", "3.1.1", + "randomForest","4.7-1.1", + "xgboost","1.7.5.1" ) class_libs_install_version(class_name, class_libs) From c76922b51aa5f848f5ba9f0d74a999a9af47e8a8 Mon Sep 17 00:00:00 2001 From: Ryan Lovett Date: Thu, 5 Oct 2023 13:29:09 -0700 Subject: [PATCH 07/11] Bump secondary image tag. --- deployments/dev-r/config/common.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployments/dev-r/config/common.yaml b/deployments/dev-r/config/common.yaml index 0f29855cc..9d88af9c5 100644 --- a/deployments/dev-r/config/common.yaml +++ b/deployments/dev-r/config/common.yaml @@ -58,7 +58,7 @@ jupyterhub: - display_name: "repo2docker image" description: "A newer repo2docker-based image with similar components as the primary." kubespawner_override: - image: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/dev-r-secondary:5c55c90 + image: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/dev-r-secondary:1a64e0a - display_name: "1524699: DataHub Infrastructure" slug: "1524699" description: "Regular image with per-course subpath." From b66e85df3de8b6288dc33c7707e7a39b8c18804d Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 6 Oct 2023 10:30:04 -0700 Subject: [PATCH 08/11] add nbconvert[webpdf] library --- deployments/datahub/images/default/environment.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deployments/datahub/images/default/environment.yml b/deployments/datahub/images/default/environment.yml index b098a4549..8f1f63a05 100644 --- a/deployments/datahub/images/default/environment.yml +++ b/deployments/datahub/images/default/environment.yml @@ -93,3 +93,5 @@ dependencies: - pycountry-convert==0.7.2 - otter-grader==4.2.0 - gh-scoped-creds==4.1 + # error converting to PDF https://github.com/berkeley-dsep-infra/datahub/issues/5062 + - nbconvert[webpdf] From ae3b032af439b8f18dddf67b3ef0d95486d99d01 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 6 Oct 2023 13:26:40 -0700 Subject: [PATCH 09/11] removing nb2pdf --- deployments/datahub/images/default/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployments/datahub/images/default/requirements.txt b/deployments/datahub/images/default/requirements.txt index 0eb1ff72f..e6d591917 100644 --- a/deployments/datahub/images/default/requirements.txt +++ b/deployments/datahub/images/default/requirements.txt @@ -17,7 +17,7 @@ nose==1.3.7 # # modules beautifulsoup4==4.9.3 -nb2pdf==0.6.2 +# nb2pdf==0.6.2 commented out by sknapp 06.10.2023 to unblock https://github.com/berkeley-dsep-infra/datahub/issues/5062 # # ls 88-3; neuro lxml==4.9.1 From a9707d6fdd9df2901c1f7406659c6bae0b1b88a6 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 6 Oct 2023 14:28:59 -0700 Subject: [PATCH 10/11] dep hell is for suckers... guess im a sucker --- deployments/datahub/images/default/environment.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deployments/datahub/images/default/environment.yml b/deployments/datahub/images/default/environment.yml index 8f1f63a05..329e44fad 100644 --- a/deployments/datahub/images/default/environment.yml +++ b/deployments/datahub/images/default/environment.yml @@ -88,6 +88,9 @@ dependencies: # MUSIC 30, https://github.com/berkeley-dsep-infra/datahub/issues/5047 - music21==8.3.0 +# error converting to PDF https://github.com/berkeley-dsep-infra/datahub/issues/5062 +- pyppeteer==1.0.2 + - pip: # Econ 148, Spring 2023 https://github.com/berkeley-dsep-infra/datahub/issues/4093 - pycountry-convert==0.7.2 From d55fef0179c52e094727c551087c64a94e460167 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 6 Oct 2023 16:36:35 -0700 Subject: [PATCH 11/11] this never ends --- deployments/datahub/images/default/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deployments/datahub/images/default/Dockerfile b/deployments/datahub/images/default/Dockerfile index 6cb1939ad..667ea3d61 100644 --- a/deployments/datahub/images/default/Dockerfile +++ b/deployments/datahub/images/default/Dockerfile @@ -144,6 +144,10 @@ RUN pip install --no-cache -r /tmp/requirements.txt ENV PYPPETEER_HOME ${CONDA_DIR} RUN pyppeteer-install +# install chromium browser for playwright +# https://github.com/berkeley-dsep-infra/datahub/issues/5062 +RUN playwright install chromium + # Install IR kernelspec RUN r -e "IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')"