From a2f5c19d29f5867349bae294c462533ef3a01d5b Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 29 Sep 2023 11:51:22 -0700 Subject: [PATCH] fix pagination, basically a complete rewrite --- scripts/delete-unused-users.py | 153 ++++++++++++++++++++++++--------- 1 file changed, 112 insertions(+), 41 deletions(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 1192750e5..5880fccf1 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -12,58 +12,129 @@ ok. """ import argparse -from jhub_client.api import JupyterHubAPI -from dateutil.parser import parse import asyncio from datetime import timedelta, datetime +from dateutil.parser import parse +from jhub_client.api import JupyterHubAPI +import logging +import os +import requests +import sys + +logging.basicConfig(stream=sys.stdout, level=logging.WARNING) +logger = logging.getLogger(__name__) + +token = os.environ["JUPYTERHUB_API_TOKEN"] +headers = { + "Accept": "application/jupyterhub-pagination+json", + "Authorization": f"Bearer {token}", +} + +def retrieve_users(hub_url): + """Returns generator of user models that should be deleted""" + url = hub_url.rstrip("/") + "/hub/api/users" + next_page = True + params = {} + + while next_page: + r = requests.get(url, headers=headers, params=params) + r.raise_for_status() + resp = r.json() + user_list = resp["items"] + for user in user_list: + # only yield users that should be deleted + if should_delete(user): + yield user + + pagination = resp["_pagination"] + next_page = pagination["next"] + if next_page: + params = { + "offset": next_page["offset"], + "limit": next_page["limit"], + } + +def should_delete(user): + """ + Returns a boolean if user is to be deleted. The critera are: + - was the user active in the past 24 hours? + - is there a current user server running? + """ + last_activity_str = user.get('last_activity', False) + if last_activity_str: + try: + last_activity = parse(user['last_activity']) + except: + logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}") + raise + if isinstance(last_activity, datetime): + was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) + else: + logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") + raise + + logger.debug(f"User: {user['name']}") + logger.debug(f"Last login: {last_activity}") + logger.debug(f"24hrs since last login: {was_active_last_day}") + logger.debug(f"Running server: {user['server']}") + if was_active_last_day or user['server'] is not None: + logger.info(f"Not deleting {user['name']}") + return False + else: + logger.info(f"Flagged {user['name']} for deletion.") + return True -async def main(): +async def delete_user(hub, user, count): + """Delete a user from the hub ORM""" + username = user['name'] + print(f"{count}: deleting {username}") + if not args.dry_run: + await hub.delete_user(username) + else: + logger.warning("Skipped due to dry run.") + +async def main(args): + """ + Get users from a hub, check to see if they should be deleted from the ORM + and if so, delete them! + """ + hub = JupyterHubAPI(hub_url=args.hub_url) + count = 1 + for user in list(retrieve_users(args.hub_url)): + await delete_user(hub, user, count) + count += 1 + count -= 1 + print(f"Deleted {count} total users.") + +if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( - 'hub_url', - help='Fully qualified URL to the JupyterHub' + '--hub_url', + help='Fully qualified URL to the JupyterHub', + required=True ) argparser.add_argument( '--dry_run', action='store_true', help='Dry run without deleting users' ) + argparser.add_argument( + '-v', + dest='verbose', + action='store_true', + help='Set info log level' + ) + argparser.add_argument( + '-d', + dest='debug', + action='store_true', + help='Set debug log level' + ) args = argparser.parse_args() - to_delete = [] - async with JupyterHubAPI(hub_url=args.hub_url) as hub: - users = await hub.list_users() - for user in users: - last_activity_str = user.get('last_activity', False) - if last_activity_str: - try: - last_activity = parse(user['last_activity']) - except: - print(user['last_activity']) - raise - if isinstance(last_activity, datetime): - was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) - else: - print(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") - raise - - print(f"User: {user['name']}") - print(f"Last login: {last_activity}") - print(f"24hrs since last login: {was_active_last_day}") - print(f"Running server: {user['server']}") - if was_active_last_day or user['server'] is not None: - print(f"Not deleting {user['name']}") - else: - to_delete.append(user['name']) - print(f"Deleting {user['name']}") - print("") - - for i, username in enumerate(to_delete): - print(f'{i+1} of {len(to_delete)}: deleting {username}') - if not args.dry_run: - await hub.delete_user(username) - else: - print('Skipped due to dry run.') + if args.verbose: + logger.setLevel(logging.INFO) + elif args.debug: + logger.setLevel(logging.DEBUG) -if __name__ == '__main__': - asyncio.run(main()) + asyncio.run(main(args))