diff --git a/deployments/datahub/images/default/Dockerfile b/deployments/datahub/images/default/Dockerfile index 6cb1939ad..667ea3d61 100644 --- a/deployments/datahub/images/default/Dockerfile +++ b/deployments/datahub/images/default/Dockerfile @@ -144,6 +144,10 @@ RUN pip install --no-cache -r /tmp/requirements.txt ENV PYPPETEER_HOME ${CONDA_DIR} RUN pyppeteer-install +# install chromium browser for playwright +# https://github.com/berkeley-dsep-infra/datahub/issues/5062 +RUN playwright install chromium + # Install IR kernelspec RUN r -e "IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')" diff --git a/deployments/datahub/images/default/environment.yml b/deployments/datahub/images/default/environment.yml index b098a4549..329e44fad 100644 --- a/deployments/datahub/images/default/environment.yml +++ b/deployments/datahub/images/default/environment.yml @@ -88,8 +88,13 @@ dependencies: # MUSIC 30, https://github.com/berkeley-dsep-infra/datahub/issues/5047 - music21==8.3.0 +# error converting to PDF https://github.com/berkeley-dsep-infra/datahub/issues/5062 +- pyppeteer==1.0.2 + - pip: # Econ 148, Spring 2023 https://github.com/berkeley-dsep-infra/datahub/issues/4093 - pycountry-convert==0.7.2 - otter-grader==4.2.0 - gh-scoped-creds==4.1 + # error converting to PDF https://github.com/berkeley-dsep-infra/datahub/issues/5062 + - nbconvert[webpdf] diff --git a/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r b/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r index 3bc83935b..31b6da47c 100644 --- a/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r +++ b/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r @@ -12,6 +12,8 @@ class_libs = c( "SnowballC", "0.7.1", "wordcloud", "2.6", "pROC", "1.18.4", - "rpart.plot", "3.1.1" + "rpart.plot", "3.1.1", + "randomForest","4.7-1.1", + "xgboost","1.7.5.1" ) class_libs_install_version(class_name, class_libs) diff --git a/deployments/datahub/images/default/requirements.txt b/deployments/datahub/images/default/requirements.txt index 0eb1ff72f..e6d591917 100644 --- a/deployments/datahub/images/default/requirements.txt +++ b/deployments/datahub/images/default/requirements.txt @@ -17,7 +17,7 @@ nose==1.3.7 # # modules beautifulsoup4==4.9.3 -nb2pdf==0.6.2 +# nb2pdf==0.6.2 commented out by sknapp 06.10.2023 to unblock https://github.com/berkeley-dsep-infra/datahub/issues/5062 # # ls 88-3; neuro lxml==4.9.1 diff --git a/deployments/dev-r/config/common.yaml b/deployments/dev-r/config/common.yaml index 0f29855cc..9d88af9c5 100644 --- a/deployments/dev-r/config/common.yaml +++ b/deployments/dev-r/config/common.yaml @@ -58,7 +58,7 @@ jupyterhub: - display_name: "repo2docker image" description: "A newer repo2docker-based image with similar components as the primary." kubespawner_override: - image: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/dev-r-secondary:5c55c90 + image: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/dev-r-secondary:1a64e0a - display_name: "1524699: DataHub Infrastructure" slug: "1524699" description: "Regular image with per-course subpath." diff --git a/deployments/dev-r/images/secondary/environment.yml b/deployments/dev-r/images/secondary/environment.yml index 3a1441db5..9f5511cd2 100644 --- a/deployments/dev-r/images/secondary/environment.yml +++ b/deployments/dev-r/images/secondary/environment.yml @@ -1,9 +1,10 @@ dependencies: - python=3.10 - pip=23.2.* -- jupyter-server-proxy==4.0.0 +- jupyter-server-proxy==4.1.0 - jupyter-rsession-proxy==2.2.0 -- syncthing==1.23.5 +- jupyterlab-myst==2.0.2 +- syncthing==1.25.0 - pyppeteer==1.0.2 # for nbconvert diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 15f46faff..f5285d311 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -22,7 +22,6 @@ import sys from dateutil.parser import parse -from jhub_client.api import JupyterHubAPI logging.basicConfig(stream=sys.stdout, level=logging.WARNING) logger = logging.getLogger(__name__) @@ -33,7 +32,27 @@ "Authorization": f"Bearer {token}", } -def retrieve_users(hub_url): +def parse_timedelta(args): + """ + Parse timedelta value from literal string constructor values + + Trying to support all possible values like described in + https://docs.python.org/3/library/datetime.html#datetime.timedelta + """ + result = {} + for arg in args.split(','): + key, value = arg.split('=') + try: + value = int(value) + except ValueError: + try: + value = float(value) + except ValueError as e: + raise argparse.ArgumentError from e + result[key] = value + return timedelta(**result) + +def retrieve_users(hub_url, inactive_since): """Returns generator of user models that should be deleted""" url = hub_url.rstrip("/") + "/hub/api/users" next_page = True @@ -46,7 +65,7 @@ def retrieve_users(hub_url): user_list = resp["items"] for user in user_list: # only yield users that should be deleted - if should_delete(user): + if should_delete(user, inactive_since): yield user pagination = resp["_pagination"] @@ -57,10 +76,10 @@ def retrieve_users(hub_url): "limit": next_page["limit"], } -def should_delete(user): +def should_delete(user, inactive_since): """ Returns a boolean if user is to be deleted. The critera are: - - was the user active in the past 24 hours? + - was the user active in the past inactive_since period? - is there a current user server running? """ last_activity_str = user.get('last_activity', False) @@ -71,16 +90,16 @@ def should_delete(user): logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}") raise if isinstance(last_activity, datetime): - was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) + was_active_recently = datetime.now().astimezone() - last_activity < inactive_since else: logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") raise logger.debug(f"User: {user['name']}") logger.debug(f"Last login: {last_activity}") - logger.debug(f"24hrs since last login: {was_active_last_day}") + logger.debug(f"Recent activity: {was_active_recently}") logger.debug(f"Running server: {user['server']}") - if was_active_last_day or user['server'] is not None: + if was_active_recently or user['server'] is not None: logger.info(f"Not deleting {user['name']}") return False else: @@ -101,7 +120,7 @@ def main(args): and if so, delete them! """ count = 1 - for user in list(retrieve_users(args.hub_url)): + for user in list(retrieve_users(args.hub_url, args.inactive_since)): print(f"{count}: deleting {user['name']}") count += 1 if not args.dry_run: @@ -115,7 +134,7 @@ def main(args): if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( - '-h', + '-H', '--hub_url', help='Fully qualified URL to the JupyterHub', required=True @@ -125,6 +144,13 @@ def main(args): action='store_true', help='Dry run without deleting users' ) + argparser.add_argument( + '--inactive_since', + default='hours=24', + type=parse_timedelta, + help='Period of inactivity after which users are considered for deletion (literal string constructor values for timedelta objects)' + # https://docs.python.org/3/library/datetime.html#timedelta-objects + ) argparser.add_argument( '-v', '--verbose', @@ -145,5 +171,6 @@ def main(args): logger.setLevel(logging.INFO) elif args.debug: logger.setLevel(logging.DEBUG) + logger.debug(args) main(args)