Skip to content

Commit

Permalink
Merge pull request #5114 from berkeley-dsep-infra/staging
Browse files Browse the repository at this point in the history
merging 5104/5106/5107/5108/5109/5111/5113 to prod
  • Loading branch information
shaneknapp authored Oct 7, 2023
2 parents c6b23e6 + 9ea65a7 commit 9a96d0c
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 15 deletions.
4 changes: 4 additions & 0 deletions deployments/datahub/images/default/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ RUN pip install --no-cache -r /tmp/requirements.txt
ENV PYPPETEER_HOME ${CONDA_DIR}
RUN pyppeteer-install

# install chromium browser for playwright
# https://github.com/berkeley-dsep-infra/datahub/issues/5062
RUN playwright install chromium

# Install IR kernelspec
RUN r -e "IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')"

Expand Down
5 changes: 5 additions & 0 deletions deployments/datahub/images/default/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,13 @@ dependencies:
# MUSIC 30, https://github.com/berkeley-dsep-infra/datahub/issues/5047
- music21==8.3.0

# error converting to PDF https://github.com/berkeley-dsep-infra/datahub/issues/5062
- pyppeteer==1.0.2

- pip:
# Econ 148, Spring 2023 https://github.com/berkeley-dsep-infra/datahub/issues/4093
- pycountry-convert==0.7.2
- otter-grader==4.2.0
- gh-scoped-creds==4.1
# error converting to PDF https://github.com/berkeley-dsep-infra/datahub/issues/5062
- nbconvert[webpdf]
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ class_libs = c(
"SnowballC", "0.7.1",
"wordcloud", "2.6",
"pROC", "1.18.4",
"rpart.plot", "3.1.1"
"rpart.plot", "3.1.1",
"randomForest","4.7-1.1",
"xgboost","1.7.5.1"
)
class_libs_install_version(class_name, class_libs)
2 changes: 1 addition & 1 deletion deployments/datahub/images/default/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ nose==1.3.7
#
# modules
beautifulsoup4==4.9.3
nb2pdf==0.6.2
# nb2pdf==0.6.2 commented out by sknapp 06.10.2023 to unblock https://github.com/berkeley-dsep-infra/datahub/issues/5062
#
# ls 88-3; neuro
lxml==4.9.1
Expand Down
2 changes: 1 addition & 1 deletion deployments/dev-r/config/common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jupyterhub:
- display_name: "repo2docker image"
description: "A newer repo2docker-based image with similar components as the primary."
kubespawner_override:
image: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/dev-r-secondary:5c55c90
image: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/dev-r-secondary:1a64e0a
- display_name: "1524699: DataHub Infrastructure"
slug: "1524699"
description: "Regular image with per-course subpath."
Expand Down
5 changes: 3 additions & 2 deletions deployments/dev-r/images/secondary/environment.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
dependencies:
- python=3.10
- pip=23.2.*
- jupyter-server-proxy==4.0.0
- jupyter-server-proxy==4.1.0
- jupyter-rsession-proxy==2.2.0
- syncthing==1.23.5
- jupyterlab-myst==2.0.2
- syncthing==1.25.0
- pyppeteer==1.0.2

# for nbconvert
Expand Down
47 changes: 37 additions & 10 deletions scripts/delete-unused-users.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import sys

from dateutil.parser import parse
from jhub_client.api import JupyterHubAPI

logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
logger = logging.getLogger(__name__)
Expand All @@ -33,7 +32,27 @@
"Authorization": f"Bearer {token}",
}

def retrieve_users(hub_url):
def parse_timedelta(args):
"""
Parse timedelta value from literal string constructor values
Trying to support all possible values like described in
https://docs.python.org/3/library/datetime.html#datetime.timedelta
"""
result = {}
for arg in args.split(','):
key, value = arg.split('=')
try:
value = int(value)
except ValueError:
try:
value = float(value)
except ValueError as e:
raise argparse.ArgumentError from e
result[key] = value
return timedelta(**result)

def retrieve_users(hub_url, inactive_since):
"""Returns generator of user models that should be deleted"""
url = hub_url.rstrip("/") + "/hub/api/users"
next_page = True
Expand All @@ -46,7 +65,7 @@ def retrieve_users(hub_url):
user_list = resp["items"]
for user in user_list:
# only yield users that should be deleted
if should_delete(user):
if should_delete(user, inactive_since):
yield user

pagination = resp["_pagination"]
Expand All @@ -57,10 +76,10 @@ def retrieve_users(hub_url):
"limit": next_page["limit"],
}

def should_delete(user):
def should_delete(user, inactive_since):
"""
Returns a boolean if user is to be deleted. The critera are:
- was the user active in the past 24 hours?
- was the user active in the past inactive_since period?
- is there a current user server running?
"""
last_activity_str = user.get('last_activity', False)
Expand All @@ -71,16 +90,16 @@ def should_delete(user):
logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}")
raise
if isinstance(last_activity, datetime):
was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24)
was_active_recently = datetime.now().astimezone() - last_activity < inactive_since
else:
logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.")
raise

logger.debug(f"User: {user['name']}")
logger.debug(f"Last login: {last_activity}")
logger.debug(f"24hrs since last login: {was_active_last_day}")
logger.debug(f"Recent activity: {was_active_recently}")
logger.debug(f"Running server: {user['server']}")
if was_active_last_day or user['server'] is not None:
if was_active_recently or user['server'] is not None:
logger.info(f"Not deleting {user['name']}")
return False
else:
Expand All @@ -101,7 +120,7 @@ def main(args):
and if so, delete them!
"""
count = 1
for user in list(retrieve_users(args.hub_url)):
for user in list(retrieve_users(args.hub_url, args.inactive_since)):
print(f"{count}: deleting {user['name']}")
count += 1
if not args.dry_run:
Expand All @@ -115,7 +134,7 @@ def main(args):
if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument(
'-h',
'-H',
'--hub_url',
help='Fully qualified URL to the JupyterHub',
required=True
Expand All @@ -125,6 +144,13 @@ def main(args):
action='store_true',
help='Dry run without deleting users'
)
argparser.add_argument(
'--inactive_since',
default='hours=24',
type=parse_timedelta,
help='Period of inactivity after which users are considered for deletion (literal string constructor values for timedelta objects)'
# https://docs.python.org/3/library/datetime.html#timedelta-objects
)
argparser.add_argument(
'-v',
'--verbose',
Expand All @@ -145,5 +171,6 @@ def main(args):
logger.setLevel(logging.INFO)
elif args.debug:
logger.setLevel(logging.DEBUG)
logger.debug(args)

main(args)

0 comments on commit 9a96d0c

Please sign in to comment.